├── project ├── build.properties └── plugins.sbt ├── app ├── views │ ├── indexkba.scala.html │ ├── index.scala.html │ ├── mainkba.scala.html │ ├── provs.scala.html │ └── main.scala.html ├── org │ └── sameersingh │ │ └── ervisualizer │ │ ├── Logging.scala │ │ ├── kba │ │ ├── KbaRunner.scala │ │ ├── KBADataObjects.scala │ │ ├── JsonWrites.scala │ │ ├── KBAStore.scala │ │ └── KBADB.scala │ │ ├── data │ │ ├── Provenance.scala │ │ ├── Entity.scala │ │ ├── DBStore.scala │ │ ├── SummaTextToHTML.scala │ │ ├── TestDocs.scala │ │ ├── DB.scala │ │ ├── JsonWrites.scala │ │ └── DocumentStore.scala │ │ └── nlp │ │ ├── ReadD2DDocs.scala │ │ └── ReadProcessedDocs.scala └── controllers │ ├── ApplicationKBA.scala │ └── Application.scala ├── docs └── img │ ├── entity.png │ ├── search.png │ └── relations.png ├── data └── test │ ├── docs.json.gz │ ├── ecounts.txt.gz │ ├── wcounts.txt.gz │ ├── ent.freebase │ ├── ent.head │ └── ent.info ├── public ├── images │ ├── favicon.png │ └── summa_logo.png ├── javascripts │ ├── bootstrap │ │ └── fonts │ │ │ ├── glyphicons-halflings-regular.eot │ │ │ ├── glyphicons-halflings-regular.ttf │ │ │ └── glyphicons-halflings-regular.woff │ ├── kba.js │ ├── d3 │ │ ├── topojson.v1.min.js │ │ └── d3.layout.cloud.js │ ├── main-kba.js │ └── listCollapse.js ├── html │ └── summa │ │ ├── chinese-amb.html │ │ ├── threat.html │ │ ├── randy.html │ │ ├── maiduguri.html │ │ ├── janara.html │ │ ├── iai │ │ └── title │ │ │ └── topic-6.html │ │ ├── boko.html │ │ └── index.html └── stylesheets │ ├── mainkba.css │ └── main.css ├── .gitignore ├── test ├── IntegrationSpec.scala ├── ApplicationSpec.scala └── org │ └── sameersingh │ └── ervisualizer │ └── data │ └── InMemoryDBTest.scala ├── LICENSE ├── conf ├── reference.conf └── routes └── README.md /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.5 2 | -------------------------------------------------------------------------------- /app/views/indexkba.scala.html: -------------------------------------------------------------------------------- 1 | @(message: String) 2 | 3 | @mainkba(message) -------------------------------------------------------------------------------- /app/views/index.scala.html: -------------------------------------------------------------------------------- 1 | @(message: String, db: String) 2 | 3 | @main(message, db) 4 | -------------------------------------------------------------------------------- /docs/img/entity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/docs/img/entity.png -------------------------------------------------------------------------------- /docs/img/search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/docs/img/search.png -------------------------------------------------------------------------------- /data/test/docs.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/data/test/docs.json.gz -------------------------------------------------------------------------------- /docs/img/relations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/docs/img/relations.png -------------------------------------------------------------------------------- /data/test/ecounts.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/data/test/ecounts.txt.gz -------------------------------------------------------------------------------- /data/test/wcounts.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/data/test/wcounts.txt.gz -------------------------------------------------------------------------------- /public/images/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/public/images/favicon.png -------------------------------------------------------------------------------- /public/images/summa_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/public/images/summa_logo.png -------------------------------------------------------------------------------- /public/javascripts/bootstrap/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/public/javascripts/bootstrap/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /public/javascripts/bootstrap/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/public/javascripts/bootstrap/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /public/javascripts/bootstrap/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameersingh/er-visualizer/HEAD/public/javascripts/bootstrap/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /data/test/ent.freebase: -------------------------------------------------------------------------------- 1 | {"id":"m_01w5m","types":["College/University"]} 2 | {"id":"m_025s5v9","types":["Celebrity"]} 3 | {"id":"m_02mjmr","types":["US President"]} 4 | {"id":"m_09c7w0","types":["Country"]} 5 | {"id":"m_02hrh0_","types":["City/Town/Village"]} 6 | -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/Logging.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer 2 | 3 | import play.api.Logger 4 | 5 | /** 6 | * @author sameer 7 | * @since 7/26/15. 8 | */ 9 | trait Logging { 10 | val logger: Logger = Logger(this.getClass()) 11 | } 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # sbt specific 5 | .cache/ 6 | .history/ 7 | .lib/ 8 | dist/* 9 | target/ 10 | lib_managed/ 11 | src_managed/ 12 | project/boot/ 13 | project/plugins/project/ 14 | 15 | # Scala-IDE specific 16 | .scala_dependencies 17 | .worksheet 18 | .idea 19 | .idea_modules -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/kba/KbaRunner.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.kba 2 | 3 | import com.typesafe.config.ConfigFactory 4 | 5 | 6 | object KbaRunner extends App { 7 | val jsonPath = ConfigFactory.load().getString("nlp.kba.jsonPath") 8 | println(jsonPath) 9 | } 10 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | // Comment to get more information during initialization 2 | logLevel := Level.Warn 3 | 4 | // The Typesafe repository 5 | resolvers += "Typesafe repository" at "http://repo.typesafe.com/typesafe/releases/" 6 | 7 | // Use the Play sbt plugin for Play projects 8 | addSbtPlugin("com.typesafe.play" % "sbt-plugin" % "2.3.8") -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/data/Provenance.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.data 2 | 3 | /** 4 | * @author sameer 5 | * @since 6/10/14. 6 | */ 7 | case class Provenance(docId: String, sentId: Int, tokPos: Seq[(Int, Int)], confidence: Double = 1.0) 8 | 9 | case class Sentence(docId: String, sentId: Int, string: String) 10 | 11 | case class Document(docId: String, path: String, title: String, cite: String, text: String, sents: Seq[Sentence]) -------------------------------------------------------------------------------- /data/test/ent.head: -------------------------------------------------------------------------------- 1 | {"id":"m_01w5m","name":"Columbia University","nerTag":"","popularity":0.0,"geo":[-73.961944,40.8075]} 2 | {"id":"m_025s5v9","name":"Michelle Obama","nerTag":"","popularity":0.0,"geo":[]} 3 | {"id":"m_02mjmr","name":"Barack Obama","nerTag":"","popularity":0.0,"geo":[]} 4 | {"id":"m_09c7w0","name":"United States of America","nerTag":"","popularity":0.0,"geo":[-77.016667,38.883333]} 5 | {"id":"m_02hrh0_","name":"Honolulu","nerTag":"","popularity":0.0,"geo":[-157.816667,21.3]} 6 | -------------------------------------------------------------------------------- /test/IntegrationSpec.scala: -------------------------------------------------------------------------------- 1 | import org.specs2.mutable._ 2 | import org.specs2.runner._ 3 | import org.junit.runner._ 4 | 5 | import play.api.test._ 6 | import play.api.test.Helpers._ 7 | 8 | /** 9 | * add your integration spec here. 10 | * An integration test will fire up a whole play application in a real (or headless) browser 11 | */ 12 | @RunWith(classOf[JUnitRunner]) 13 | class IntegrationSpec extends Specification { 14 | 15 | "Application" should { 16 | 17 | "work from within a browser" in new WithBrowser { 18 | 19 | browser.goTo("http://localhost:" + port) 20 | 21 | browser.pageSource must contain("Your new application is ready.") 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /test/ApplicationSpec.scala: -------------------------------------------------------------------------------- 1 | import org.specs2.mutable._ 2 | import org.specs2.runner._ 3 | import org.junit.runner._ 4 | 5 | import play.api.test._ 6 | import play.api.test.Helpers._ 7 | 8 | /** 9 | * Add your spec here. 10 | * You can mock out a whole application including requests, plugins etc. 11 | * For more information, consult the wiki. 12 | */ 13 | @RunWith(classOf[JUnitRunner]) 14 | class ApplicationSpec extends Specification { 15 | 16 | "Application" should { 17 | 18 | "send 404 on a bad request" in new WithApplication{ 19 | route(FakeRequest(GET, "/boum")) must beNone 20 | } 21 | 22 | "render the index page" in new WithApplication{ 23 | val home = route(FakeRequest(GET, "/")).get 24 | 25 | status(home) must equalTo(OK) 26 | contentType(home) must beSome.which(_ == "text/html") 27 | contentAsString(home) must contain ("Your new application is ready.") 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/kba/KBADataObjects.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.kba 2 | 3 | /** 4 | * @author nacho 5 | */ 6 | 7 | case class StalenessKba(cj: Int, inc: Double, dec: Double) 8 | 9 | case class DocumentKba(id: String, timestamp: Long, relevance: Int, score: Int, ci: Int, lambdas: Seq[StalenessKba] = Seq.empty) 10 | 11 | case class EntityKba(id: String, name: String, documents : Seq[DocumentKba] = Seq.empty) 12 | 13 | case class ClusterKba(cj: Int, cj_emb: Seq[WordKba]) 14 | 15 | case class EmbeddingKba(id: String, timestamp: Long, di: Seq[WordKba], clusters: Seq[ClusterKba]) 16 | 17 | case class WordKba(t: String, p: Int) 18 | 19 | // For non-KBA staleness visualization 20 | case class Word(w : String, c: Double) 21 | 22 | case class Cluster(id: Int, words: Seq[Word]) 23 | 24 | case class Staleness(value : Double, time : Long) 25 | 26 | case class Doc(id: String, time : Long) 27 | 28 | case class Entity(id: String, staleness: Seq[Staleness], docs: Seq[Doc], clusters: Seq[Cluster]) -------------------------------------------------------------------------------- /app/controllers/ApplicationKBA.scala: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import org.sameersingh.ervisualizer.kba.KBADB 4 | import play.api._ 5 | import play.api.mvc._ 6 | import org.sameersingh.ervisualizer.data._ 7 | import play.api.libs.json.{JsValue, Writes, Json} 8 | import play.api.libs.functional.syntax._ 9 | 10 | object ApplicationKBA extends Controller { 11 | 12 | private var _db: KBADB = null 13 | 14 | def db = _db 15 | 16 | def init() { 17 | _db = KBADB.readDB 18 | } 19 | 20 | init(); 21 | 22 | import org.sameersingh.ervisualizer.data.JsonWrites._ 23 | 24 | def index = Action { 25 | Ok(views.html.indexkba("UW TRECKBA - default")) 26 | } 27 | 28 | def entities = Action { 29 | Ok(Json.toJson(db.entities)) 30 | } 31 | 32 | def documents(entityId: String) = Action { 33 | Ok(Json.toJson(db.documents(entityId))) 34 | } 35 | 36 | def clusterWordCloud(entityId: String, clusterId: Int, timestamp: Long) = Action { 37 | Ok(Json.toJson(db.clusterWordCloud(entityId, clusterId, timestamp))) 38 | } 39 | 40 | def documentWordCloud(entityId: String, timestamp: Long) = Action { 41 | Ok(Json.toJson(db.documentWordCloud(entityId, timestamp))) 42 | } 43 | 44 | } -------------------------------------------------------------------------------- /public/html/summa/chinese-amb.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | UW Summa 5 | 6 | 7 | 8 | 9 | 10 |
11 |

Summa

12 | 13 | 21 | 22 |
23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Sameer Singh 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 11 | -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/data/Entity.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.data 2 | 3 | /** 4 | * @author sameer 5 | * @since 6/10/14. 6 | */ 7 | case class EntityHeader(id: String, name: String, nerTag: String, popularity: Double, geo: Seq[Double] = Seq.empty) 8 | 9 | case class EntityInfo(id: String, freebaseInfo: Map[String, String]) 10 | 11 | case class EntityFreebase(id: String, types: Seq[String]) 12 | 13 | case class EntityText(id: String, provenances: Seq[Provenance]) 14 | 15 | case class TypeModelProvenances(id: String, entityType: String, provenances: Seq[Provenance]) 16 | 17 | object EntityUtils { 18 | def emptyInfo(eid: String) = EntityInfo(eid, Map.empty) 19 | 20 | def emptyText(eid: String) = EntityText(eid, Seq.empty) 21 | 22 | def emptyKBA(eid: String) = org.sameersingh.ervisualizer.kba.Entity(eid, Seq.empty, Seq.empty, Seq.empty) 23 | 24 | def emptyFreebase(eid: String) = EntityFreebase(eid, Seq.empty) 25 | 26 | def emptyProvenance(eid: String) = EntityText(eid, Seq.empty) 27 | 28 | def emptyTypeProvenance(eid: String, et: String) = TypeModelProvenances(eid, et, Seq.empty) 29 | } 30 | 31 | case class RelationHeader(sourceId: String, targetId: String, popularity: Double) 32 | 33 | case class RelationFreebase(sourceId: String, targetId: String, rels: Seq[String]) 34 | 35 | case class RelationText(sourceId: String, targetId: String, provenances: Seq[Provenance]) 36 | 37 | case class RelModelProvenances(sourceId: String, targetId: String, relType: String, provenances: Seq[Provenance], confidence: Double = 1.0) 38 | 39 | object RelationUtils { 40 | def emptyFreebase(sid: String, tid: String) = RelationFreebase(sid, tid, Seq.empty) 41 | 42 | def emptyProvenance(sid: String, tid: String) = RelationText(sid, tid, Seq.empty) 43 | 44 | def emptyRelProvenance(sid: String, tid: String, rt: String) = RelModelProvenances(sid, tid, rt, Seq.empty) 45 | } 46 | -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/nlp/ReadD2DDocs.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.nlp 2 | 3 | import edu.stanford.nlp.semgraph.SemanticGraph 4 | import play.api.libs.json.Json 5 | import org.sameersingh.ervisualizer.data.Document 6 | 7 | /** 8 | * @author sameer 9 | * @since 7/11/14. 10 | */ 11 | class ReadD2DDocs(val baseDir: String) { 12 | 13 | def path(baseDir: String, name: String, format: String): String = { 14 | "%s/allafrica.com_07-2013-to-05-2014_%s/%s.%s" format(baseDir, format, name, format) 15 | } 16 | 17 | def readD2DNLP(name: String) { 18 | val source = io.Source.fromFile(path(baseDir, name, "nlp"), "UTF-8") 19 | for(s <- source.getLines().drop(8)) { 20 | println(s) 21 | val sg = SemanticGraph.valueOf(s) 22 | println(sg.toFormattedString) 23 | } //.mkString("\n")//.replaceAll("\\]\\[", "]\n[") 24 | 25 | source.close() 26 | } 27 | 28 | case class D2DDoc(title: Array[String], cite: Array[String], h1: Array[String], div: Array[String]) 29 | implicit val d2dDocWrites = Json.writes[D2DDoc] 30 | implicit val d2dDocReads = Json.reads[D2DDoc] 31 | 32 | def readOriginalDoc(name: String): D2DDoc = { 33 | val source = io.Source.fromFile(path(baseDir, name, "json"), "UTF-8") 34 | val s = source.getLines().mkString("\n")//.replaceAll("\\]\\[", "]\n[") 35 | // println(s) 36 | val d = Json.fromJson[D2DDoc](Json.parse(s)).get 37 | // println(d) 38 | d 39 | } 40 | 41 | def readDoc(id: String, name: String): Document = { 42 | val d = readOriginalDoc(name) 43 | Document(id, name, d.title.mkString("___SEP___"), d.cite.mkString("___SEP___"), d.div.mkString("\n"), Seq.empty) 44 | } 45 | } 46 | 47 | object ReadD2DDocs extends ReadD2DDocs("/Users/sameer/Work/data/d2d") { 48 | def main(args: Array[String]) { 49 | readD2DNLP("Nigeria/piracy/stories/201307010505") 50 | // readOriginalDoc("Nigeria/piracy/stories/201307010505") 51 | } 52 | } -------------------------------------------------------------------------------- /public/stylesheets/mainkba.css: -------------------------------------------------------------------------------- 1 | body { 2 | overflow-y:scroll; 3 | } 4 | 5 | text { 6 | font: 12px sans-serif; 7 | } 8 | 9 | svg { 10 | display: block; 11 | } 12 | 13 | .chart svg { 14 | height: 500px; 15 | min-width: 100px; 16 | min-height: 100px; 17 | } 18 | 19 | .center { 20 | position: absolute; 21 | top:10px; 22 | left:50%; 23 | z-index:100; 24 | } 25 | 26 | .centerContent { 27 | position: relative; 28 | left:-50%; 29 | z-index:100; 30 | } 31 | 32 | /* typeahead specific style */ 33 | .typeahead, 34 | .tt-query, 35 | .tt-hint { 36 | width: 100%; 37 | height: 40px; 38 | padding: 4px 6px; 39 | font-size: 16px; 40 | line-height: 30px; 41 | border: 2px solid #ccc; 42 | border-radius: 5px; 43 | outline: none; 44 | } 45 | 46 | .typeahead { 47 | background-color: #fff; 48 | } 49 | 50 | .twitter-typeahead { 51 | width: 250px; 52 | } 53 | 54 | .typeahead:focus { 55 | border: 2px solid #0097cf; 56 | } 57 | 58 | .tt-query { 59 | -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); 60 | -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); 61 | box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); 62 | } 63 | 64 | .tt-hint { 65 | color: #999 66 | } 67 | 68 | .tt-dropdown-menu { 69 | width: 250px; 70 | margin-top: 6px; 71 | padding: 4px 0; 72 | background-color: #fff; 73 | border: 1px solid #ccc; 74 | border: 1px solid rgba(0, 0, 0, 0.2); 75 | border-radius: 5px; 76 | max-height: 150px; 77 | overflow-y: auto; 78 | } 79 | 80 | .tt-suggestion { 81 | padding: 3px 20px; 82 | font-size: 12px; 83 | line-height: 24px; 84 | } 85 | 86 | .tt-suggestion.tt-cursor { 87 | color: #fff; 88 | background-color: #0097cf; 89 | 90 | } 91 | 92 | .tt-suggestion p { 93 | margin: 0; 94 | } 95 | 96 | #entity .tt-dropdown-menu { 97 | max-height: 150px; 98 | overflow-y: auto; 99 | } 100 | 101 | .space { 102 | margin-bottom: 1.5cm; 103 | } 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/data/DBStore.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.data 2 | 3 | import org.sameersingh.ervisualizer.Logging 4 | 5 | import scala.collection.mutable 6 | import scala.collection.mutable.HashMap 7 | 8 | /** 9 | * @author sameer 10 | * @since 1/25/15. 11 | */ 12 | class DBStore(docs: DocumentStore) extends Logging { 13 | type Id = String 14 | 15 | val maxDBs = 20 16 | val dbMap = new HashMap[Id, DB] 17 | val dbQueue = new mutable.Queue[Id]() 18 | val queryMap = new HashMap[String, Id] 19 | val queryIdMap = new HashMap[Id, String] 20 | 21 | def query(string: String): (Id, DB) = { 22 | val id = queryId(string) 23 | val odb = dbMap.get(id) 24 | id -> odb.getOrElse({ 25 | val docIds = docs.query(string) 26 | logger.info("Reading " + docIds.size + " docs.") 27 | val inDB = new InMemoryDB() 28 | NLPReader.readDocs(docIds.map(id => docs(id)).iterator, inDB) 29 | NLPReader.addRelationInfo(inDB) 30 | //NLPReader.removeSingletonEntities(inDB) 31 | EntityInfoReader.read(inDB) 32 | logger.info(inDB.toString) 33 | val freeMem = (Runtime.getRuntime().maxMemory() - Runtime.getRuntime().totalMemory()) / (1024 * 1024 * 1024) 34 | logger.info("Free memory (Kbytes): " + Runtime.getRuntime().freeMemory() / 1024) 35 | logger.info("Total memory (Kbytes): " + Runtime.getRuntime().totalMemory() / 1024) 36 | logger.info("Max memory (Kbytes): " + Runtime.getRuntime().maxMemory() / 1024) 37 | logger.info("Free memory (GBs): " + freeMem + ", DBs: " + dbMap.size) 38 | if(dbMap.size >=1 && (dbMap.size >= maxDBs || freeMem < 1)) { 39 | val id = dbQueue.dequeue() 40 | logger.info("Dequeuing " + id + " for query: \"" + queryIdMap(id) + "\"") 41 | dbMap.remove(id) 42 | } 43 | dbQueue += id 44 | dbMap(id) = inDB 45 | inDB 46 | }) 47 | } 48 | 49 | def id(id: String) = { 50 | dbMap.getOrElse(id, query(queryIdMap(id))._2) 51 | } 52 | 53 | private def queryId(string: String): Id = { 54 | queryMap.getOrElseUpdate(string, { 55 | val id = "db" + queryMap.size 56 | queryIdMap(id) = string 57 | id 58 | }) 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /conf/reference.conf: -------------------------------------------------------------------------------- 1 | # This is the main configuration file for the application. 2 | # ~~~~~ 3 | 4 | # Secret key 5 | # ~~~~~ 6 | # The secret key is used to secure cryptographics functions. 7 | # If you deploy your application to several instances be sure to use the same key! 8 | application.secret="dv4Lf:17Zswrd?D^Q@0v`Rv?LuJe:wg_GC`rdCLhillOEYZu^a`>KEsUB7Sk[ir2" 9 | 10 | # The application languages 11 | # ~~~~~ 12 | application.langs="en" 13 | 14 | # Global object class 15 | # ~~~~~ 16 | # Define the Global object class for this application. 17 | # Default to Global in the root package. 18 | # application.global=Global 19 | 20 | # Router 21 | # ~~~~~ 22 | # Define the Router object to use for this application. 23 | # This router will be looked up first when the application is starting up, 24 | # so make sure this is the entry point. 25 | # Furthermore, it's assumed your route file is named properly. 26 | # So for an application router like `my.application.Router`, 27 | # you may need to define a router file `conf/my.application.routes`. 28 | # Default to Routes in the root package (and conf/routes) 29 | # application.router=my.application.Routes 30 | 31 | # Database configuration 32 | # ~~~~~ 33 | # You can declare as many datasources as you want. 34 | # By convention, the default datasource is named `default` 35 | # 36 | # db.default.driver=org.h2.Driver 37 | # db.default.url="jdbc:h2:mem:play" 38 | # db.default.user=sa 39 | # db.default.password="" 40 | 41 | # Evolutions 42 | # ~~~~~ 43 | # You can disable evolutions if needed 44 | # evolutionplugin=disabled 45 | 46 | # Logger 47 | # ~~~~~ 48 | # You can also configure logback (http://logback.qos.ch/), 49 | # by providing an application-logger.xml file in the conf directory. 50 | 51 | # Root logger: 52 | logger.root=ERROR 53 | 54 | # Logger used by the framework: 55 | logger.play=INFO 56 | 57 | # Logger provided to your application: 58 | logger.application=DEBUG 59 | 60 | nlp { 61 | 62 | data { 63 | baseDir = "data/test" 64 | docsFile = "docs.json.gz" 65 | defaultDB = "president" 66 | mongo = false 67 | } 68 | 69 | # UNUSED 70 | # kba { 71 | # entitiesFile = "/Users/icano/Documents/UW/RA/summer14/data/final/viz/entities.tsv" 72 | # stalenessBaseDir = "/Users/icano/Documents/UW/RA/summer14/data/final/viz/staleness" 73 | # embeddingBaseDir = "/Users/icano/Documents/UW/RA/summer14/data/final/viz/embedding" 74 | # } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /app/views/mainkba.scala.html: -------------------------------------------------------------------------------- 1 | 2 | @(title: String) 3 | 4 | 5 | 6 | 7 | 8 | @title 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 29 | 30 | 31 | 32 | 43 | 44 |
45 |
46 | 47 |
48 |
49 |

50 |

51 | 52 |
53 |

54 |

55 | 56 |
57 | 58 | 59 | -------------------------------------------------------------------------------- /public/javascripts/kba.js: -------------------------------------------------------------------------------- 1 | /* 2 | Rendering KBA related info (staleness and word clusters) in the ER visualizer 3 | */ 4 | function renderKBA(div, kba) { 5 | console.log(kba); 6 | var divStaleness = div.append("div").attr('id', 'divStaleness'); 7 | var divClusters = div.append("div").attr('id', 'divClusters'); 8 | renderStaleness(divStaleness, kba); 9 | renderClusters(divClusters, kba); 10 | } 11 | 12 | function renderStaleness(div, kba) { 13 | var margin = {top: 5, right: 5, bottom: 5, left: 5}, 14 | width = $("#kba").width() - margin.left - margin.right, 15 | height = 100 - margin.top - margin.bottom; 16 | 17 | var x = d3.scale.linear() 18 | .range([0, width]); 19 | 20 | var y = d3.scale.linear() 21 | .range([height, 0]); 22 | 23 | var xAxis = d3.svg.axis() 24 | .scale(x) 25 | .orient("bottom"); 26 | 27 | var yAxis = d3.svg.axis() 28 | .scale(y) 29 | .orient("left"); 30 | 31 | var area = d3.svg.area() 32 | .x(function(d) { return x(d.time); }) 33 | .y0(height) 34 | .y1(function(d) { return y(d.value); }); 35 | 36 | var svg = div.append("svg") 37 | .attr("width", width + margin.left + margin.right) 38 | .attr("height", height + margin.top + margin.bottom) 39 | .append("g") 40 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); 41 | 42 | var data = kba.staleness; 43 | x.domain(d3.extent(kba.docs, function(d) { return d.time; })); 44 | y.domain([0, 1.0]); //d3.max(data, function(d) { return d.value; })]); 45 | 46 | svg.append("path") 47 | .datum(data) 48 | .attr("class", "area") 49 | .attr("d", area); 50 | 51 | svg.selectAll("circle") 52 | .data(kba.docs) 53 | .enter() 54 | .append("circle") 55 | .attr("cx", function(d) { return x(d.time); }) 56 | .attr("cy", y(0.0)) 57 | .attr("r", "3") 58 | .attr("fill", "#eeeeee") 59 | .attr("stroke", "#444444") 60 | .append("title") 61 | .text(function(d) { return d.id + "(" + new Date(d.time).toDateString() + ")"; }) 62 | } 63 | 64 | /** 65 |
66 |
67 | Basic panel example 68 |
69 |
70 | **/ 71 | function renderClusters(div, kba) { 72 | var num = kba.clusters.length; 73 | var colors = d3.scale.ordinal() 74 | .domain(d3.range(1,num+1)) 75 | .range(colorbrewer.RdBu[9]); 76 | colors = d3.scale.category10(); 77 | div.selectAll('div') 78 | .data(kba.clusters) 79 | .enter() 80 | .append("div") 81 | .attr("class", "panel panel-default") 82 | .style("color", function(d) { return colors(d.id);}) 83 | .style("margin", "5px") 84 | .style("padding", "5px") 85 | //.style("width", "20%") 86 | .style("float", "left") 87 | .selectAll('span') 88 | .data(function(d) { return d.words; }) 89 | .enter() 90 | .append("span") 91 | .html(function(w) {return w.w + '
';}) 92 | .style("font-size", function(w) {return }); 93 | 94 | //.text(function(d) { return d.words.map(function(w) { return ' '+ w.w; }); }); 95 | } -------------------------------------------------------------------------------- /conf/routes: -------------------------------------------------------------------------------- 1 | # Routes 2 | # This file defines all application routes (Higher priority routes first) 3 | # ~~~~ 4 | 5 | # Home page 6 | GET / controllers.Application.index 7 | GET /page controllers.Application.page(query: Option[String]) 8 | GET /page/db/:db controllers.Application.pageId(db: String) 9 | GET /search controllers.Application.search 10 | 11 | # Docs etc. 12 | GET /docs/doc/:doc controllers.Application.document(doc: String, db: Option[String]) 13 | GET /docs/sentence/:doc/:sid controllers.Application.sentence(doc: String, sid: Int, db: Option[String]) 14 | 15 | # Entity Stuff 16 | GET /entity/all controllers.Application.entityHeaders(db: Option[String]) 17 | GET /entity/info/:id controllers.Application.entityInfo(id: String, db: Option[String]) 18 | GET /entity/fb/:id controllers.Application.entityFreebase(id: String, db: Option[String]) 19 | GET /entity/rels/:id controllers.Application.entityRelations(id: String, db: Option[String]) 20 | GET /entity/types/:id controllers.Application.entityTypes(id: String, db: Option[String]) 21 | GET /entity/text/:id controllers.Application.entityText(id: String, db: Option[String], limit: Option[Int]) 22 | GET /entity/typeprov/:id/:etype controllers.Application.entityTypeProv(id: String, etype: String, db: Option[String], limit: Option[Int]) 23 | GET /entity/provs/:id controllers.Application.entityProvs(id: String, db: Option[String]) 24 | 25 | GET /entity/kba/:id controllers.Application.entityKBA(id: String) 26 | 27 | # Relation Stuff 28 | GET /relation/all controllers.Application.relationHeaders(db: Option[String]) 29 | GET /relation/fb/:sid/:tid controllers.Application.relationFreebase(sid: String, tid: String, db: Option[String]) 30 | GET /relation/types/:sid/:tid controllers.Application.relationPredictions(sid: String, tid: String, db: Option[String]) 31 | GET /relation/text/:sid/:tid controllers.Application.relationText(sid: String, tid: String, db: Option[String], limit: Option[Int]) 32 | GET /relation/typeprov/:sid/:tid/:rtype controllers.Application.relationProvenances(sid: String, tid: String, rtype: String, db: Option[String], limit: Option[Int]) 33 | GET /relation/provs/:sid/:tid controllers.Application.relationProvs(sid: String, tid: String, db: Option[String]) 34 | 35 | GET /relation/kba/:sid/:tid controllers.Application.relationKBA(sid: String, tid: String) 36 | 37 | # Map static resources from the /public folder to the /assets URL path 38 | GET /assets/*file controllers.Assets.at(path="/public", file) 39 | 40 | 41 | ### KBA stuff 42 | 43 | GET /kba controllers.ApplicationKBA.index 44 | GET /kba/entities controllers.ApplicationKBA.entities 45 | GET /kba/documents/:eid controllers.ApplicationKBA.documents(eid: String) 46 | GET /kba/wordcloud/:eid/:cid/:timestamp controllers.ApplicationKBA.clusterWordCloud(eid: String, cid: Int, timestamp: Long) 47 | GET /kba/wordcloud/:eid/:timestamp controllers.ApplicationKBA.documentWordCloud(eid: String, timestamp: Long) -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/kba/JsonWrites.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.kba 2 | 3 | import org.sameersingh.ervisualizer.kba._ 4 | import play.api.libs.json._ 5 | import play.api.libs.functional.syntax._ 6 | 7 | /** 8 | * Created by sameer on 7/20/14. 9 | */ 10 | object JsonWrites { 11 | implicit val seqStringPairWrites: Writes[Seq[(String, String)]] = new Writes[Seq[(String, String)]] { 12 | override def writes(o: Seq[(String, String)]): JsValue = { 13 | Json.toJson(o.map(p => Json.toJson(Seq(p._1, p._2)))) 14 | } 15 | } 16 | val seqIntPairWrites: Writes[Seq[(Int, Int)]] = new Writes[Seq[(Int, Int)]] { 17 | override def writes(o: Seq[(Int, Int)]): JsValue = { 18 | Json.toJson(o.map(p => Json.toJson(Seq(p._1, p._2)))) 19 | } 20 | } 21 | 22 | implicit val stalenessKbaWrites = Json.writes[StalenessKba] 23 | 24 | implicit val docKbaWrites: Writes[DocumentKba] = ( 25 | (JsPath \ "streamid").write[String] and 26 | (JsPath \ "timestamp").write[Long] and 27 | (JsPath \ "relevance").write[Int] and 28 | (JsPath \ "score").write[Int] and 29 | (JsPath \ "ci").write[Int] and 30 | (JsPath \ "lambdas").write[Seq[StalenessKba]] 31 | )(unlift(DocumentKba.unapply)) 32 | 33 | implicit val entityKbaWrites = Json.writes[EntityKba] 34 | 35 | implicit val wordKbaWrites = Json.writes[WordKba] 36 | 37 | } 38 | 39 | object JsonReads { 40 | implicit val seqStringPairReads: Reads[Seq[(String, String)]] = new Reads[Seq[(String, String)]] { 41 | override def reads(json: JsValue): JsResult[Seq[(String, String)]] = { 42 | Json.fromJson[Seq[Seq[String]]](json).flatMap(seqs => JsSuccess(seqs.map(seq => seq(0) -> seq(1)))) 43 | } 44 | } 45 | val seqIntPairReads: Reads[Seq[(Int, Int)]] = new Reads[Seq[(Int, Int)]] { 46 | override def reads(json: JsValue): JsResult[Seq[(Int, Int)]] = { 47 | Json.fromJson[Seq[Seq[Int]]](json).flatMap(seqs => JsSuccess(seqs.map(seq => seq(0) -> seq(1)))) 48 | } 49 | } 50 | 51 | implicit val wordReads = Json.reads[Word] 52 | implicit val clusterReads = Json.reads[Cluster] 53 | implicit val stalenessReads = Json.reads[Staleness] 54 | implicit val documentReads = Json.reads[Doc] 55 | implicit val entityReads = Json.reads[Entity] 56 | 57 | implicit val stalenessKbaReads = Json.reads[StalenessKba] 58 | 59 | implicit val docKbaReads: Reads[DocumentKba] = ( 60 | (JsPath \ "streamid").read[String] and 61 | (JsPath \ "timestamp").read[Long] and 62 | (JsPath \ "relevance").read[Int] and 63 | (JsPath \ "score").read[Int] and 64 | (JsPath \ "ci").read[Int] and 65 | (JsPath \ "lambdas").read[Seq[StalenessKba]] 66 | )(DocumentKba.apply _) 67 | 68 | implicit val entityKbaReads = Json.reads[EntityKba] 69 | 70 | implicit val wordKbaReads = Json.reads[WordKba] 71 | 72 | implicit val clusterKbaReads: Reads[ClusterKba] = ( 73 | (JsPath \ "cj").read[Int] and 74 | (JsPath \ "cj_emb").read[Seq[WordKba]] 75 | )(ClusterKba.apply _) 76 | 77 | implicit val embeddingKbaReads: Reads[EmbeddingKba] = ( 78 | (JsPath \ "streamid").read[String] and 79 | (JsPath \ "timestamp").read[Long] and 80 | (JsPath \ "di").read[Seq[WordKba]] and 81 | (JsPath \ "clusters").read[Seq[ClusterKba]] 82 | )(EmbeddingKba.apply _) 83 | 84 | } 85 | -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/kba/KBAStore.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.kba 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import nlp_serde.FileUtil 5 | import org.sameersingh.ervisualizer.data.{FreebaseReader, EntityUtils} 6 | import org.sameersingh.ervisualizer.kba 7 | import play.api.libs.json.Json 8 | 9 | import scala.collection.mutable.HashMap 10 | import scala.util.Random 11 | 12 | /** 13 | * @author sameer 14 | * @since 1/11/15. 15 | */ 16 | trait KBAStore { 17 | 18 | def entityKBA(id: String): Entity 19 | 20 | 21 | def relationKBA(sid: String, tid: String): Entity 22 | } 23 | 24 | class InMemEntityKBA extends KBAStore { 25 | val _entityKBA = new HashMap[String, kba.Entity] 26 | val _relationKBA = new HashMap[(String, String), kba.Entity] 27 | 28 | override def entityKBA(id: String): kba.Entity = _entityKBA.getOrElse(id, EntityUtils.emptyKBA(id)) 29 | 30 | override def relationKBA(sid: String, tid: String): kba.Entity = _relationKBA.getOrElse(sid -> tid, EntityUtils.emptyKBA(sid + "|" + tid)) 31 | } 32 | 33 | object EntityKBAReader { 34 | def read(): KBAStore = { 35 | val db = new InMemEntityKBA 36 | val cfg = ConfigFactory.load() 37 | val baseDir = cfg.getString("nlp.data.baseDir") //.replaceAll(" ", "\\ ") 38 | //StalenessReader.readStaleness(baseDir + "/docs.staleness.json.gz", db, Some(100)) 39 | db 40 | } 41 | } 42 | 43 | 44 | object StalenessReader { 45 | 46 | val random = new Random(0) 47 | /* 48 | * fill entityKBA and relationKBA 49 | */ 50 | def readStaleness(stalenessFile: String, db: InMemEntityKBA, maxPoints: Option[Int] = None): Unit = { 51 | import org.sameersingh.ervisualizer.kba.JsonReads._ 52 | println("Reading staleness") 53 | val dotEvery = 100 54 | val lineEvery = 1000 55 | var docIdx = 0 56 | for (line <- FileUtil.inputSource(stalenessFile, true).getLines()) { 57 | val oe = Json.fromJson[kba.Entity](Json.parse(line)).get 58 | val e = if(maxPoints.isEmpty) oe 59 | else { 60 | val stalenessSampleProb = maxPoints.get.toDouble / oe.staleness.size.toDouble 61 | val docSampleProb = maxPoints.get.toDouble / oe.docs.size.toDouble 62 | kba.Entity(oe.id, oe.staleness.filter(s => random.nextDouble() < stalenessSampleProb), 63 | oe.docs.filter(s => random.nextDouble() < docSampleProb), oe.clusters) 64 | } 65 | if(e.id.contains("|")) { 66 | val ids = e.id.split("\\|").map(s => FreebaseReader.convertFbIdToId(s)) 67 | assert(ids.size == 2, s"More than 2 I in id?: ${e.id}: ${ids.mkString(", ")}") 68 | val rid = ids(0) -> ids(1) 69 | // db._relationIds += rid 70 | db._relationKBA(rid) = e 71 | } else { 72 | val id = FreebaseReader.convertFbIdToId(e.id) 73 | db._entityKBA(id) = e 74 | } 75 | docIdx += 1 76 | if (docIdx % dotEvery == 0) print(".") 77 | if (docIdx % lineEvery == 0) println(": read " + docIdx + " lines.") 78 | } 79 | } 80 | 81 | import JsonReads._ 82 | 83 | def main(args: Array[String]): Unit = { 84 | for (line <- io.Source.fromFile(ConfigFactory.load().getString("nlp.data.baseDir") + "/docs.staleness.json").getLines()) { 85 | val e = Json.fromJson[kba.Entity](Json.parse(line)).get 86 | println(e) 87 | } 88 | } 89 | } 90 | 91 | -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/data/SummaTextToHTML.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.data 2 | 3 | import java.io.PrintWriter 4 | 5 | import com.typesafe.config.ConfigFactory 6 | import org.sameersingh.ervisualizer.freebase.MongoIO 7 | import scala.collection.mutable 8 | import play.api.libs.json.Json 9 | 10 | import scala.collection.mutable.ArrayBuffer 11 | 12 | /** 13 | * Created by sameer on 7/20/14. 14 | */ 15 | class SummaTextToHTML(text: String) { 16 | 17 | case class TreeNode(text: String, children: ArrayBuffer[TreeNode]) { 18 | def toString(prefix: Int): String = { 19 | (0 until prefix).map(i => "-").foldLeft("")(_ + _) + text + "\n" + children.map(_.toString(prefix + 1)).mkString("") 20 | } 21 | 22 | def html(prefix: Int): String = { 23 | val prefixStr = (0 until prefix + 1).map(i => "\t").foldLeft("")(_ + _) 24 | prefixStr + 25 | "" + text + "\n" + 26 | (if (children.size > 0) { 27 | prefixStr + "\n" 30 | } else "") 31 | } 32 | } 33 | 34 | def tree: TreeNode = { 35 | val root = TreeNode("", new ArrayBuffer) 36 | val maxTabs = 3 37 | val currNodes = new mutable.HashMap[Int, TreeNode] 38 | currNodes(0) = root 39 | for (line <- text.split("\n+")) { 40 | val numTabs = if (line.startsWith("\t\t\t")) 3 else if (line.startsWith("\t\t")) 2 else if (line.startsWith("\t")) 1 else 0 41 | val str = line.trim 42 | val node = TreeNode(str, new ArrayBuffer) 43 | assert(currNodes.contains(numTabs)) 44 | currNodes(numTabs).children += node 45 | currNodes(numTabs + 1) = node 46 | ((numTabs + 2) to maxTabs).map(i => currNodes.remove(i)) 47 | } 48 | root 49 | } 50 | 51 | def html: String = 52 | """ 53 | | 54 | | 55 | | UW Summa 56 | | 57 | | 58 | | 59 | | 60 | | 61 | |
62 | |

Summa

63 | |%s 64 | |
65 | | 66 | | 67 | """.stripMargin format (tree.html(0)) 68 | } 69 | 70 | object SummaTextToHTML { 71 | def main(args: Array[String]): Unit = { 72 | val baseDir = ConfigFactory.load().getString("nlp.data.baseDir") + "/summa/" 73 | val fileName = if (args.isEmpty) "janara" else args(0) 74 | val text = io.Source.fromFile(baseDir + fileName + "/summa.txt").getLines().mkString("\n") 75 | val summa = new SummaTextToHTML(text) 76 | val output = "public/html/summa/" + fileName + ".html" 77 | val writer = new PrintWriter(output) 78 | writer.println(summa.html) 79 | writer.flush() 80 | writer.close() 81 | } 82 | } -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/data/TestDocs.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.data 2 | 3 | import nlp_serde.annotators.{AnnotatorPipeline, Annotator, StanfordAnnotator} 4 | import nlp_serde.immutable.Relation 5 | import nlp_serde.writers.PerLineJsonWriter 6 | import nlp_serde.{Document => Doc} 7 | 8 | import scala.collection.mutable 9 | 10 | /** 11 | * @author sameer 12 | * @since 7/27/15. 13 | */ 14 | object TestDocs { 15 | 16 | // Two documents, with two sentences each 17 | val docTexts = Seq( 18 | """ 19 | | Barack Obama was born in Honolulu. 20 | | He was married to Michelle before he became the president of USA. 21 | """.stripMargin, 22 | """ 23 | | Barack Obama went to Columbia University. 24 | | It was at Columbia that Barack met his wife-to-be, Michelle. 25 | """.stripMargin).iterator 26 | 27 | val nlpAnnotator = new StanfordAnnotator() 28 | 29 | val linker = new Annotator { 30 | override def process(doc: Doc): Doc = { 31 | for (e <- doc.entities) { 32 | e.representativeString match { 33 | case "Barack Obama" => e.freebaseIds("/m/02mjmr") = 1.0 34 | case "USA" => e.freebaseIds("/m/09c7w0") = 1.0 35 | case "Michelle" => e.freebaseIds("/m/025s5v9") = 1.0 36 | case "Columbia University" => e.freebaseIds("/m/01w5m") = 1.0 37 | case "Columbia" => e.freebaseIds("/m/01w5m") = 1.0 38 | case "Honolulu" => e.freebaseIds("/m/02hrh0_") = 1.0 39 | case _ => println("unlinked: " + e.representativeString) 40 | } 41 | } 42 | doc 43 | } 44 | } 45 | 46 | val relExtractor = new Annotator { 47 | override def process(doc: Doc): Doc = { 48 | for (s <- doc.sentences) { 49 | for(m1 <- s.mentions; 50 | e1id <- m1.entityId; 51 | e1 = doc.entity(e1id); 52 | if (!e1.freebaseIds.isEmpty); 53 | m2 <- s.mentions; 54 | e2id <- m2.entityId; 55 | e2 = doc.entity(e2id); 56 | if (!e2.freebaseIds.isEmpty); 57 | if (m1 != m2)) { 58 | val rels: mutable.Set[String] = (e1.freebaseIds.maxBy(_._2)._1, e2.freebaseIds.maxBy(_._2)._1) match { 59 | case ("/m/02mjmr", "/m/025s5v9") => mutable.Set("per:spouse") 60 | case ("/m/025s5v9","/m/02mjmr") => mutable.Set("per:spouse") 61 | case ("/m/02mjmr", "/m/01w5m") => mutable.Set("per:school_attended") 62 | case ("/m/02mjmr", "/m/02hrh0_") => mutable.Set("per:born_in") 63 | case ("/m/02mjmr", "/m/09c7w0") => mutable.Set("per:president_of", "per:lives_in") 64 | case _ => mutable.Set.empty 65 | } 66 | if(!rels.isEmpty) s.relations += { 67 | val r = new nlp_serde.Relation 68 | r.m1Id = m1.id 69 | r.m2Id = m2.id 70 | r.relations = rels 71 | r 72 | } 73 | } 74 | } 75 | doc 76 | } 77 | } 78 | 79 | val pipeline = new AnnotatorPipeline(Seq(nlpAnnotator, linker, relExtractor)) 80 | 81 | def main(args: Array[String]): Unit = { 82 | val outputFile = "data/test/docs.json.gz" 83 | val docs = docTexts.zipWithIndex.map(ti => { 84 | val d = new Doc() 85 | d.id = "doc" + ti._2 86 | d.text = ti._1 87 | d 88 | }) 89 | val w = new PerLineJsonWriter(true) 90 | w.write(outputFile, pipeline.process(docs)) 91 | } 92 | 93 | } 94 | -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/data/DB.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.data 2 | 3 | /** 4 | * @author sameer 5 | * @since 6/10/14. 6 | */ 7 | trait DB { 8 | 9 | def docIds: Seq[String] 10 | 11 | def document(docId: String): Document 12 | 13 | def sentence(docId: String, sentId: Int): Sentence = document(docId).sents(sentId) 14 | 15 | def docEntityProvenances(docId: String, sentId: Int): Seq[(String, Seq[Provenance])] 16 | 17 | def entityIds: Iterable[String] 18 | 19 | def entityHeader(id: String): EntityHeader 20 | 21 | def entityInfo(id: String): EntityInfo 22 | 23 | def entityFreebase(id: String): EntityFreebase 24 | 25 | def entityText(id: String): EntityText 26 | 27 | def entityTypePredictions(id: String): Seq[String] 28 | 29 | def entityTypeProvenances(id: String, etype: String): TypeModelProvenances 30 | 31 | def relationIds: Seq[(String, String)] 32 | 33 | def relevantRelationIds: Iterator[(String, String)] 34 | 35 | def relations(sourceId: String): Seq[(String, String)] = relationIds.filter(id => id._1 == sourceId || id._2 == sourceId) 36 | 37 | def relationHeader(sid: String, tid: String): RelationHeader 38 | 39 | def relationFreebase(sid: String, tid: String): RelationFreebase 40 | 41 | def relationText(sid: String, tid: String): RelationText 42 | 43 | def relationPredictions(sid: String, tid: String): Seq[String] 44 | 45 | def relationProvenances(sid: String, tid: String, rtype: String): RelModelProvenances 46 | 47 | override def toString: String = { 48 | val sb = new StringBuilder 49 | sb append ("------- Documents -------\n") 50 | sb append ("docIds:\t%d\t%s\n" format(docIds.size, docIds.take(10).mkString(","))) 51 | val sents = docIds.map(document(_).sents).flatten 52 | sb append ("sentences:\t%d\t%s\n" format(sents.size, sents.take(4).mkString("\n\t", "\n\t", "..."))) 53 | sb append ("------- Entities -------\n") 54 | sb append ("entIds:\t%d\t%s\n" format(entityIds.size, entityIds.take(10).mkString(","))) 55 | sb append ("headers:\t%s\n" format (entityIds.map(entityHeader(_)).take(4).mkString("\n\t", "\n\t", "..."))) 56 | sb append ("info:\t%s\n" format (entityIds.map(entityInfo(_)).take(4).mkString("\n\t", "\n\t", "..."))) 57 | sb append ("freebase:\t%s\n" format (entityIds.map(entityFreebase(_)).take(4).mkString("\n\t", "\n\t", "..."))) 58 | sb append ("types:\t%s\n" format (entityIds.map(entityTypePredictions(_).mkString(",")).take(10).toSet.mkString("\n\t", "\n\t", "..."))) 59 | sb append ("typeProvenaces:\t%s\n" format ( 60 | entityIds.map(id => entityTypePredictions(id).map(t => id -> t)).flatten.take(10).map(idt => entityTypeProvenances(idt._1, idt._2)).mkString("\n\t", "\n\t", "..."))) 61 | sb append ("------- Relations -------\n") 62 | sb append ("relIds:\t%d\t%s\n" format(relationIds.size, relationIds.take(10).mkString(","))) 63 | sb append ("headers:\t%s\n" format (relationIds.map(p => relationHeader(p._1, p._2)).take(4).mkString("\n\t", "\n\t", "..."))) 64 | sb append ("freebase:\t%s\n" format (relationIds.map(p => relationFreebase(p._1, p._2)).take(4).mkString("\n\t", "\n\t", "..."))) 65 | sb append ("relations:\t%s\n" format (relationIds.map(p => relationPredictions(p._1, p._2).mkString(",")).take(10).distinct.mkString("\n\t", "\n\t", "..."))) 66 | sb append ("relProvenaces:\t%s\n" format ( 67 | relationIds.map(id => relationPredictions(id._1, id._2).map(t => id -> t)).flatten.take(10).map(idt => relationProvenances(idt._1._1, idt._1._2, idt._2)).mkString("\n\t", "\n\t", "..."))) 68 | sb.toString() 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/kba/KBADB.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.kba 2 | 3 | import java.io.File 4 | 5 | import com.typesafe.config.ConfigFactory 6 | import play.api.libs.json.Json 7 | 8 | import scala.collection.mutable.{ArrayBuffer, HashMap} 9 | 10 | import JsonReads._ 11 | 12 | /** 13 | * Created by nacho. 14 | */ 15 | class KBADB { 16 | 17 | val _entities = new ArrayBuffer[EntityKba] 18 | val _documentsPerEntity = new HashMap[String, Seq[DocumentKba]] 19 | val _wordCloudPerEntityPerCluster = new HashMap[String, HashMap[(Int, Long), Seq[WordKba]]] 20 | val _wordCloudPerEntity = new HashMap[String, HashMap[Long, Seq[WordKba]]] 21 | 22 | def documents(entityId: String): Seq[DocumentKba] = _documentsPerEntity(entityId) 23 | def entities : Seq[EntityKba] = _entities 24 | def clusterWordCloud(entityId: String, clusterId: Int, timestamp: Long): Seq[WordKba] = _wordCloudPerEntityPerCluster(entityId)((clusterId, timestamp)) 25 | def documentWordCloud(entityId: String, timestamp: Long): Seq[WordKba] = _wordCloudPerEntity(entityId)(timestamp) 26 | 27 | def readDB: KBADB = { 28 | // read json files 29 | println("reading files for KBA") 30 | val db = new KBADB 31 | 32 | val cfg = ConfigFactory.load() 33 | 34 | // read entities files 35 | println("reading entities file") 36 | val entitiesFileName = cfg.getString("nlp.kba.entitiesFile") 37 | val entitiesFile = io.Source.fromFile(entitiesFileName, "UTF-8") 38 | for (line <- entitiesFile.getLines()) { 39 | val split = line.split("\\t") 40 | //println(split) 41 | db._entities += EntityKba(split(0).trim(), split(1).trim()) 42 | } 43 | entitiesFile.close() 44 | println("read entities file") 45 | 46 | // read staleness files 47 | val stalenessBaseDir = cfg.getString("nlp.kba.stalenessBaseDir") 48 | val stalenessInputFiles = new File(stalenessBaseDir).listFiles(); 49 | println("reading staleness files") 50 | for (file <- stalenessInputFiles) { 51 | if (!file.isDirectory()) { 52 | println("reading file " + file.getName()) 53 | val sf = io.Source.fromFile(file, "UTF-8") 54 | // for(l <- sf.getLines.filter(l => random.nextDouble > 0.1)) 55 | val docArray = new ArrayBuffer[DocumentKba] 56 | for (l <- sf.getLines) { 57 | val doc = Json.fromJson[DocumentKba](Json.parse(l)).get 58 | docArray += doc 59 | } 60 | val entityName = file.getName().replace(".json", "") 61 | db._documentsPerEntity.put(entityName, docArray) 62 | sf.close() 63 | println("read file " + file.getName()) 64 | } 65 | } 66 | println("read staleness files") 67 | 68 | 69 | // read embedding files 70 | val embeddingBaseDir = cfg.getString("nlp.kba.embeddingBaseDir") 71 | val embeddingInputFiles = new File(embeddingBaseDir).listFiles(); 72 | println("reading embedding files") 73 | for (file <- embeddingInputFiles) { 74 | if (!file.isDirectory()) { 75 | println("reading file " + file.getName()) 76 | val sf = io.Source.fromFile(file, "UTF-8") 77 | val embeddings = new ArrayBuffer[EmbeddingKba] 78 | for (l <- sf.getLines) { 79 | val embedding = Json.fromJson[EmbeddingKba](Json.parse(l)).get 80 | embeddings += embedding 81 | } 82 | val entityName = file.getName().replace(".json", "") 83 | val wordCloudPerEntity = new HashMap[Long, Seq[WordKba]] 84 | val wordCloudPerEntityPerCluster = new HashMap[(Int, Long), Seq[WordKba]] 85 | for (embedding <- embeddings) { 86 | wordCloudPerEntity.put(embedding.timestamp, embedding.di) 87 | for (cluster <- embedding.clusters) { 88 | wordCloudPerEntityPerCluster.put((cluster.cj, embedding.timestamp), cluster.cj_emb) 89 | } 90 | } 91 | db._wordCloudPerEntity.put(entityName, wordCloudPerEntity) 92 | db._wordCloudPerEntityPerCluster.put(entityName, wordCloudPerEntityPerCluster) 93 | sf.close() 94 | println("read file " + file.getName()) 95 | } 96 | } 97 | println("read embedding files") 98 | db 99 | } 100 | } 101 | 102 | object KBADB extends KBADB -------------------------------------------------------------------------------- /app/views/provs.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String, ids: Seq[String], db: String) 2 | 3 | 4 | 5 | 6 | 7 | @title 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 27 | 28 | 29 |
30 |
31 | 34 |

Provenance

35 |
36 |
37 |
38 |
39 |
40 |
41 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /public/stylesheets/main.css: -------------------------------------------------------------------------------- 1 | .relation { 2 | stroke: #444; 3 | stroke-width: 5px; 4 | opacity: 0.1; 5 | stroke-linecap: round; 6 | } 7 | 8 | .entity { 9 | stroke: #000; 10 | stroke-width: 1px; 11 | opacity: 0.3; 12 | } 13 | 14 | .perEnt { 15 | fill: #8ac; 16 | } 17 | 18 | .locEnt { 19 | fill: #a8c; 20 | } 21 | 22 | .orgEnt { 23 | fill: #ac8; 24 | } 25 | 26 | .miscEnt { 27 | fill: #888; 28 | } 29 | 30 | .selected { 31 | opacity: 1.0; 32 | } 33 | 34 | .hover { 35 | opacity: 1.0; 36 | } 37 | 38 | path { 39 | stroke: #444444; 40 | stroke-width: 0.5px; 41 | fill: #eeeeee; 42 | } 43 | 44 | body, html { 45 | height: 100%; 46 | } 47 | 48 | .container { 49 | position:relative; 50 | max-width:100%; 51 | width:100%; 52 | height:100%; 53 | padding:0px; 54 | } 55 | 56 | .graphWindow { 57 | /* position:absolute; 58 | width:60%; 59 | left: 0px;*/ 60 | height:100%; 61 | } 62 | 63 | .provWindow { 64 | float:right; 65 | width:20%; 66 | position:absolute; 67 | right:5px; 68 | top:5px; 69 | z-index:100; 70 | visibility: hidden; 71 | overflow:auto; 72 | max-height: 100%; 73 | /*margin-left: 60%; 74 | margin-right:0px;*/ 75 | } 76 | 77 | .kbaWindow { 78 | float:right; 79 | width:58%; 80 | position:absolute; 81 | bottom:0; 82 | z-index:100; 83 | visibility: hidden; 84 | left: 21%; 85 | overflow:auto; 86 | max-height: 270px; 87 | margin-bottom: 5px; 88 | /*margin-left: 60%; 89 | margin-right:0px;*/ 90 | } 91 | 92 | .kbaPanel { 93 | border: 0px; 94 | } 95 | 96 | #infoBoxPanel { 97 | max-height: 200px; 98 | overflow:auto; 99 | } 100 | 101 | .provListGroup { 102 | margin-bottom: 0px; 103 | } 104 | 105 | .prov-item { 106 | padding: 3px; 107 | } 108 | 109 | .provPanelBody { 110 | padding: 5px; 111 | } 112 | 113 | .docId { 114 | font-size: 8px; 115 | margin-right:10px; 116 | } 117 | 118 | .docId::before { 119 | content: "["; 120 | } 121 | 122 | .docId::after { 123 | content: "]"; 124 | } 125 | 126 | .canvas { 127 | background-color: #fefefe; 128 | border: 1px solid #666; 129 | height: 100%; 130 | width: 100%; 131 | position:absolute; 132 | } 133 | 134 | .overlay { 135 | fill: none; 136 | pointer-events: all; 137 | } 138 | 139 | .center { 140 | position: absolute; 141 | top:10px; 142 | left:50%; 143 | z-index:100; 144 | } 145 | 146 | .centerContent { 147 | position: relative; 148 | left:-50%; 149 | z-index:100; 150 | } 151 | 152 | .infoBox { 153 | width: 20%; 154 | float: left; 155 | position:absolute; 156 | top:5px; 157 | left:5px; 158 | visibility: hidden; 159 | overflow:auto; 160 | max-height: 100%; 161 | /* 162 | height: 300px; 163 | padding: 5px; 164 | background-color: #fff; 165 | border: 1px solid #666; 166 | border-radius: 5px; 167 | */ 168 | } 169 | 170 | .closeButton { 171 | float:right; 172 | } 173 | 174 | /* typeahead specific style */ 175 | .typeahead, 176 | .tt-query, 177 | .tt-hint { 178 | width: 100%; 179 | height: 40px; 180 | padding: 4px 6px; 181 | font-size: 16px; 182 | line-height: 30px; 183 | border: 2px solid #ccc; 184 | border-radius: 5px; 185 | outline: none; 186 | } 187 | 188 | .typeahead { 189 | background-color: #fff; 190 | } 191 | 192 | .twitter-typeahead { 193 | width: 250px; 194 | } 195 | 196 | .typeahead:focus { 197 | border: 2px solid #0097cf; 198 | } 199 | 200 | .tt-query { 201 | -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); 202 | -moz-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); 203 | box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); 204 | } 205 | 206 | .tt-hint { 207 | color: #999 208 | } 209 | 210 | .tt-dropdown-menu { 211 | width: 250px; 212 | margin-top: 6px; 213 | padding: 4px 0; 214 | background-color: #fff; 215 | border: 1px solid #ccc; 216 | border: 1px solid rgba(0, 0, 0, 0.2); 217 | border-radius: 5px; 218 | max-height: 150px; 219 | overflow-y: auto; 220 | } 221 | 222 | .tt-suggestion { 223 | padding: 3px 20px; 224 | font-size: 12px; 225 | line-height: 24px; 226 | } 227 | 228 | .tt-suggestion.tt-cursor { 229 | color: #fff; 230 | background-color: #0097cf; 231 | 232 | } 233 | 234 | .tt-suggestion p { 235 | margin: 0; 236 | } 237 | 238 | .gist { 239 | font-size: 14px; 240 | } 241 | 242 | .wordwrap { 243 | white-space: pre-wrap; /* CSS3 */ 244 | white-space: -moz-pre-wrap; /* Firefox */ 245 | white-space: -pre-wrap; /* Opera <7 */ 246 | white-space: -o-pre-wrap; /* Opera 7 */ 247 | word-wrap: break-word; /* IE */ 248 | } -------------------------------------------------------------------------------- /app/views/main.scala.html: -------------------------------------------------------------------------------- 1 | @(title: String, db: String) 2 | 3 | 4 | 5 | 6 | 7 | @title 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 |
23 |
24 | 25 |
26 |
27 | 39 |
40 |
41 | 44 |

Provenance

45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 | 58 |

59 |
60 |
61 |
62 | 63 |
64 | 65 |
66 | 67 |
68 |
69 |
70 | 71 |
72 |
73 |
74 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/data/JsonWrites.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.data 2 | 3 | import org.sameersingh.ervisualizer.kba._ 4 | import play.api.libs.json._ 5 | import play.api.libs.functional.syntax._ 6 | 7 | /** 8 | * Created by sameer on 7/20/14. 9 | */ 10 | object JsonWrites { 11 | implicit val seqStringPairWrites: Writes[Seq[(String, String)]] = new Writes[Seq[(String, String)]] { 12 | override def writes(o: Seq[(String, String)]): JsValue = { 13 | Json.toJson(o.map(p => Json.toJson(Seq(p._1, p._2)))) 14 | } 15 | } 16 | val seqIntPairWrites: Writes[Seq[(Int, Int)]] = new Writes[Seq[(Int, Int)]] { 17 | override def writes(o: Seq[(Int, Int)]): JsValue = { 18 | Json.toJson(o.map(p => Json.toJson(Seq(p._1, p._2)))) 19 | } 20 | } 21 | implicit val provWrites = { 22 | implicit val seqIntPairWritesImplicit = seqIntPairWrites 23 | Json.writes[Provenance] 24 | } 25 | implicit val senWrites = Json.writes[Sentence] 26 | implicit val docWrites = Json.writes[Document] 27 | 28 | implicit val entityHeaderWrites = Json.writes[EntityHeader] 29 | implicit val entityInfoWrites = Json.writes[EntityInfo] 30 | implicit val entityFbWrites = Json.writes[EntityFreebase] 31 | implicit val entityTxtWrites = Json.writes[EntityText] 32 | implicit val entityTypeProvWrites = Json.writes[TypeModelProvenances] 33 | 34 | implicit val relationHeaderWrites = Json.writes[RelationHeader] 35 | implicit val relationFreebaseWrites = Json.writes[RelationFreebase] 36 | implicit val relationTextWrites = Json.writes[RelationText] 37 | implicit val relationProvWrites = Json.writes[RelModelProvenances] 38 | 39 | implicit val wordWrites = Json.writes[Word] 40 | implicit val clusterWrites = Json.writes[Cluster] 41 | implicit val stalenessWrites = Json.writes[Staleness] 42 | implicit val documentWrites = Json.writes[Doc] 43 | implicit val entityWrites = Json.writes[Entity] 44 | 45 | implicit val stalenessKbaWrites = Json.writes[StalenessKba] 46 | 47 | implicit val docKbaWrites: Writes[DocumentKba] = ( 48 | (JsPath \ "streamid").write[String] and 49 | (JsPath \ "timestamp").write[Long] and 50 | (JsPath \ "relevance").write[Int] and 51 | (JsPath \ "score").write[Int] and 52 | (JsPath \ "ci").write[Int] and 53 | (JsPath \ "lambdas").write[Seq[StalenessKba]] 54 | )(unlift(DocumentKba.unapply)) 55 | 56 | implicit val entityKbaWrites = Json.writes[EntityKba] 57 | 58 | implicit val wordKbaWrites = Json.writes[WordKba] 59 | 60 | } 61 | 62 | object JsonReads { 63 | implicit val seqStringPairReads: Reads[Seq[(String, String)]] = new Reads[Seq[(String, String)]] { 64 | override def reads(json: JsValue): JsResult[Seq[(String, String)]] = { 65 | Json.fromJson[Seq[Seq[String]]](json).flatMap(seqs => JsSuccess(seqs.map(seq => seq(0) -> seq(1)))) 66 | } 67 | } 68 | val seqIntPairReads: Reads[Seq[(Int, Int)]] = new Reads[Seq[(Int, Int)]] { 69 | override def reads(json: JsValue): JsResult[Seq[(Int, Int)]] = { 70 | Json.fromJson[Seq[Seq[Int]]](json).flatMap(seqs => JsSuccess(seqs.map(seq => seq(0) -> seq(1)))) 71 | } 72 | } 73 | 74 | implicit val provReads = { 75 | implicit val seqIntPairReadsImplicit = seqIntPairReads 76 | Json.reads[Provenance] 77 | } 78 | implicit val senReads = Json.reads[Sentence] 79 | implicit val docReads = Json.reads[Document] 80 | 81 | implicit val entityHeaderReads = Json.reads[EntityHeader] 82 | implicit val entityInfoReads = Json.reads[EntityInfo] 83 | implicit val entityFbReads = Json.reads[EntityFreebase] 84 | implicit val entityTxtReads = Json.reads[EntityText] 85 | implicit val entityTypeProvReads = Json.reads[TypeModelProvenances] 86 | 87 | implicit val relationHeaderReads = Json.reads[RelationHeader] 88 | implicit val relationFreebaseReads = Json.reads[RelationFreebase] 89 | implicit val relationTextReads = Json.reads[RelationText] 90 | implicit val relationProvReads = Json.reads[RelModelProvenances] 91 | 92 | implicit val wordReads = Json.reads[Word] 93 | implicit val clusterReads = Json.reads[Cluster] 94 | implicit val stalenessReads = Json.reads[Staleness] 95 | implicit val documentReads = Json.reads[Doc] 96 | implicit val entityReads = Json.reads[Entity] 97 | 98 | implicit val stalenessKbaReads = Json.reads[StalenessKba] 99 | 100 | implicit val docKbaReads: Reads[DocumentKba] = ( 101 | (JsPath \ "streamid").read[String] and 102 | (JsPath \ "timestamp").read[Long] and 103 | (JsPath \ "relevance").read[Int] and 104 | (JsPath \ "score").read[Int] and 105 | (JsPath \ "ci").read[Int] and 106 | (JsPath \ "lambdas").read[Seq[StalenessKba]] 107 | )(DocumentKba.apply _) 108 | 109 | implicit val entityKbaReads = Json.reads[EntityKba] 110 | 111 | implicit val wordKbaReads = Json.reads[WordKba] 112 | 113 | implicit val clusterKbaReads: Reads[ClusterKba] = ( 114 | (JsPath \ "cj").read[Int] and 115 | (JsPath \ "cj_emb").read[Seq[WordKba]] 116 | )(ClusterKba.apply _) 117 | 118 | implicit val embeddingKbaReads: Reads[EmbeddingKba] = ( 119 | (JsPath \ "streamid").read[String] and 120 | (JsPath \ "timestamp").read[Long] and 121 | (JsPath \ "di").read[Seq[WordKba]] and 122 | (JsPath \ "clusters").read[Seq[ClusterKba]] 123 | )(EmbeddingKba.apply _) 124 | 125 | } 126 | -------------------------------------------------------------------------------- /public/html/summa/threat.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | UW Summa 5 | 6 | 7 | 8 | 9 | 10 |
11 |

Summa

12 | 13 | 82 | 83 |
84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /data/test/ent.info: -------------------------------------------------------------------------------- 1 | {"id":"m_01w5m","freebaseInfo":{"/common/topic/description":"Columbia University in the City of New York, commonly referred to as Columbia University, is an American private Ivy League research university located in the Morningside Heights neighborhood of Upper Manhattan in New York City. It is the oldest institution of higher learning in the State of New York, the fifth oldest in the United States, and one of the country's nine Colonial Colleges founded before the American Revolution. Today the university operates Columbia Global Centers overseas in Amman, Beijing, Istanbul, Paris, Mumbai, Rio de Janeiro, Santiago and Nairobi.\\nThe university was founded in 1754 as King's College by royal charter of George II of Great Britain. After the American Revolutionary War, King's College briefly became a state entity, and was renamed Columbia College in 1784. The University now operates under a 1787 charter that places the institution under a private board of trustees, and in 1896 it was further renamed Columbia University. That same year, the university's campus was moved from Madison Avenue to its current location in Morningside Heights, where it occupies more than six city blocks, or 32 acres.","/mid":"/m/01w5m","Name":"Columbia University"}} 2 | {"id":"m_025s5v9","freebaseInfo":{"/common/topic/description":"Michelle LaVaughn Robinson Obama, an American lawyer and writer, is the wife of the 44th and current President of the United States, Barack Obama, and the first African-American First Lady of the United States. Raised on the South Side of Chicago, Obama attended Princeton University and Harvard Law School before returning to Chicago to work at the law firm Sidley Austin, where she met her future husband. Subsequently, she worked as part of the staff of Chicago mayor Richard M. Daley, and for the University of Chicago Medical Center.\\nThroughout 2007 and 2008, she helped campaign for her husband's presidential bid. She delivered a keynote address at the 2008 Democratic National Convention and also spoke at the 2012 Democratic National Convention. She is the mother of daughters Malia and Natasha. As the wife of a Senator, and later the First Lady, she has become a fashion icon and role model for women, and an advocate for poverty awareness, nutrition, and healthy eating.","/mid":"/m/025s5v9","/common/topic/image":"/m/04s8ccw","Name":"Michelle Obama"}} 3 | {"id":"m_02mjmr","freebaseInfo":{"/common/topic/description":"Barack Hussein Obama II (/bəˈrɑːk huːˈseɪn oʊˈbɑːmə/; born August 4, 1961) is the 44th and current President of the United States. He is the first African American to hold the office. Obama served as a U.S. Senator representing the state of Illinois from January 2005 to November 2008, when he resigned following his victory in the 2008 presidential election.\\n\\nBorn in Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he was the president of the Harvard Law Review. He was a community organizer in Chicago before earning his law degree. He worked as a civil rights attorney in Chicago and taught constitutional law at the University of Chicago Law School from 1992 to 2004. He served three terms representing the 13th District in the Illinois Senate from 1997 to 2004.","/mid":"/m/02mjmr","/common/topic/image":"/m/059x99z","Name":"Barack Obama"}} 4 | {"id":"m_09c7w0","freebaseInfo":{"/common/topic/description":"The United States of America, commonly referred to as the United States, America, and sometimes the States, is a federal republic consisting of 50 states and a federal district. The 48 contiguous states and Washington, D.C., are in central North America between Canada and Mexico. The state of Alaska is the northwestern part of North America and the state of Hawaii is an archipelago in the mid-Pacific. The country also has five populated and nine unpopulated territories in the Pacific and the Caribbean. At 3.79 million square miles in total and with around 318 million people, the United States is the third or fourth-largest country by total area and third largest by population. It is one of the world's most ethnically diverse and multicultural nations, the product of large-scale immigration from many countries. The geography and climate of the United States is also extremely diverse, and it is home to a wide variety of wildlife.\\nPaleo-Indians migrated from Eurasia to what is now the U.S. mainland around 15,000 years ago, with European colonization beginning in the 16th century. The United States emerged from 13 British colonies located along the Atlantic seaboard.","/mid":"/m/09c7w0","/common/topic/image":"/m/059h_54","Name":"United States of America"}} 5 | {"id":"m_02hrh0_","freebaseInfo":{"/common/topic/description":"Honolulu is the state capital and the most populous city in the U.S. state of Hawaii. It is the county seat of the City and County of Honolulu. Hawaii is a major tourist destination and Honolulu, situated on the island of Oahu, is the main gateway to Hawaii and a major gateway into the United States. The city is also a major hub for international business, military defense, as well as famously being host to a diverse variety of east-west and Pacific culture, cuisine, and traditions.\\nHonolulu is both the westernmost and the southernmost major American city. For statistical purposes, the U.S. Census Bureau recognizes the approximate area commonly referred to as \\\"City of Honolulu\\\" as a census county division. Honolulu is a major financial center of the islands and of the Pacific Ocean. The population of Honolulu CCD was 390,738 at the 2010 census, while the population of the consolidated city and county was 953,207.\\nIn the Hawaiian, Honolulu means \\\"sheltered bay\\\" or \\\"place of shelter\\\"; alternatively, it means \\\"calm port\\\".","/mid":"/m/02hrh0_","/common/topic/image":"/m/03tbtzv","Name":"Honolulu"}} 6 | -------------------------------------------------------------------------------- /public/javascripts/d3/topojson.v1.min.js: -------------------------------------------------------------------------------- 1 | !function(){function t(n,t){function r(t){var r,e=n.arcs[0>t?~t:t],o=e[0];return n.transform?(r=[0,0],e.forEach(function(n){r[0]+=n[0],r[1]+=n[1]})):r=e[e.length-1],0>t?[r,o]:[o,r]}function e(n,t){for(var r in n){var e=n[r];delete t[e.start],delete e.start,delete e.end,e.forEach(function(n){o[0>n?~n:n]=1}),f.push(e)}}var o={},i={},u={},f=[],c=-1;return t.forEach(function(r,e){var o,i=n.arcs[0>r?~r:r];i.length<3&&!i[1][0]&&!i[1][1]&&(o=t[++c],t[c]=r,t[e]=o)}),t.forEach(function(n){var t,e,o=r(n),f=o[0],c=o[1];if(t=u[f])if(delete u[t.end],t.push(n),t.end=c,e=i[c]){delete i[e.start];var a=e===t?t:t.concat(e);i[a.start=t.start]=u[a.end=e.end]=a}else i[t.start]=u[t.end]=t;else if(t=i[c])if(delete i[t.start],t.unshift(n),t.start=f,e=u[f]){delete u[e.end];var s=e===t?t:e.concat(t);i[s.start=e.start]=u[s.end=t.end]=s}else i[t.start]=u[t.end]=t;else t=[n],i[t.start=f]=u[t.end=c]=t}),e(u,i),e(i,u),t.forEach(function(n){o[0>n?~n:n]||f.push([n])}),f}function r(n,r,e){function o(n){var t=0>n?~n:n;(s[t]||(s[t]=[])).push({i:n,g:a})}function i(n){n.forEach(o)}function u(n){n.forEach(i)}function f(n){"GeometryCollection"===n.type?n.geometries.forEach(f):n.type in l&&(a=n,l[n.type](n.arcs))}var c=[];if(arguments.length>1){var a,s=[],l={LineString:i,MultiLineString:u,Polygon:u,MultiPolygon:function(n){n.forEach(u)}};f(r),s.forEach(arguments.length<3?function(n){c.push(n[0].i)}:function(n){e(n[0].g,n[n.length-1].g)&&c.push(n[0].i)})}else for(var h=0,p=n.arcs.length;p>h;++h)c.push(h);return{type:"MultiLineString",arcs:t(n,c)}}function e(r,e){function o(n){n.forEach(function(t){t.forEach(function(t){(f[t=0>t?~t:t]||(f[t]=[])).push(n)})}),c.push(n)}function i(n){return l(u(r,{type:"Polygon",arcs:[n]}).coordinates[0])>0}var f={},c=[],a=[];return e.forEach(function(n){"Polygon"===n.type?o(n.arcs):"MultiPolygon"===n.type&&n.arcs.forEach(o)}),c.forEach(function(n){if(!n._){var t=[],r=[n];for(n._=1,a.push(t);n=r.pop();)t.push(n),n.forEach(function(n){n.forEach(function(n){f[0>n?~n:n].forEach(function(n){n._||(n._=1,r.push(n))})})})}}),c.forEach(function(n){delete n._}),{type:"MultiPolygon",arcs:a.map(function(e){var o=[];if(e.forEach(function(n){n.forEach(function(n){n.forEach(function(n){f[0>n?~n:n].length<2&&o.push(n)})})}),o=t(r,o),(n=o.length)>1)for(var u,c=i(e[0][0]),a=0;n>a;++a)if(c===i(o[a])){u=o[0],o[0]=o[a],o[a]=u;break}return o})}}function o(n,t){return"GeometryCollection"===t.type?{type:"FeatureCollection",features:t.geometries.map(function(t){return i(n,t)})}:i(n,t)}function i(n,t){var r={type:"Feature",id:t.id,properties:t.properties||{},geometry:u(n,t)};return null==t.id&&delete r.id,r}function u(n,t){function r(n,t){t.length&&t.pop();for(var r,e=s[0>n?~n:n],o=0,i=e.length;i>o;++o)t.push(r=e[o].slice()),a(r,o);0>n&&f(t,i)}function e(n){return n=n.slice(),a(n,0),n}function o(n){for(var t=[],e=0,o=n.length;o>e;++e)r(n[e],t);return t.length<2&&t.push(t[0].slice()),t}function i(n){for(var t=o(n);t.length<4;)t.push(t[0].slice());return t}function u(n){return n.map(i)}function c(n){var t=n.type;return"GeometryCollection"===t?{type:t,geometries:n.geometries.map(c)}:t in l?{type:t,coordinates:l[t](n)}:null}var a=g(n.transform),s=n.arcs,l={Point:function(n){return e(n.coordinates)},MultiPoint:function(n){return n.coordinates.map(e)},LineString:function(n){return o(n.arcs)},MultiLineString:function(n){return n.arcs.map(o)},Polygon:function(n){return u(n.arcs)},MultiPolygon:function(n){return n.arcs.map(u)}};return c(t)}function f(n,t){for(var r,e=n.length,o=e-t;o<--e;)r=n[o],n[o++]=n[e],n[e]=r}function c(n,t){for(var r=0,e=n.length;e>r;){var o=r+e>>>1;n[o]n&&(n=~n);var r=o[n];r?r.push(t):o[n]=[t]})}function r(n,r){n.forEach(function(n){t(n,r)})}function e(n,t){"GeometryCollection"===n.type?n.geometries.forEach(function(n){e(n,t)}):n.type in u&&u[n.type](n.arcs,t)}var o={},i=n.map(function(){return[]}),u={LineString:t,MultiLineString:r,Polygon:r,MultiPolygon:function(n,t){n.forEach(function(n){r(n,t)})}};n.forEach(e);for(var f in o)for(var a=o[f],s=a.length,l=0;s>l;++l)for(var h=l+1;s>h;++h){var p,v=a[l],g=a[h];(p=i[v])[f=c(p,g)]!==g&&p.splice(f,0,g),(p=i[g])[f=c(p,v)]!==v&&p.splice(f,0,v)}return i}function s(n,t){function r(n){i.remove(n),n[1][2]=t(n),i.push(n)}var e=g(n.transform),o=m(n.transform),i=v();return t||(t=h),n.arcs.forEach(function(n){var u,f=[],c=0;n.forEach(e);for(var a=1,s=n.length-1;s>a;++a)u=n.slice(a-1,a+2),u[1][2]=t(u),f.push(u),i.push(u);n[0][2]=n[s][2]=1/0;for(var a=0,s=f.length;s>a;++a)u=f[a],u.previous=f[a-1],u.next=f[a+1];for(;u=i.pop();){var l=u.previous,h=u.next;u[1][2]0;){var r=(t+1>>1)-1,o=e[r];if(p(n,o)>=0)break;e[o._=t]=o,e[n._=t=r]=n}}function t(n,t){for(;;){var r=t+1<<1,i=r-1,u=t,f=e[u];if(o>i&&p(e[i],f)<0&&(f=e[u=i]),o>r&&p(e[r],f)<0&&(f=e[u=r]),u===t)break;e[f._=t]=f,e[n._=t=u]=n}}var r={},e=[],o=0;return r.push=function(t){return n(e[t._=o]=t,o++),o},r.pop=function(){if(!(0>=o)){var n,r=e[0];return--o>0&&(n=e[o],t(e[n._=0]=n,0)),r}},r.remove=function(r){var i,u=r._;if(e[u]===r)return u!==--o&&(i=e[o],(p(i,r)<0?n:t)(e[i._=u]=i,u)),u},r}function g(n){if(!n)return y;var t,r,e=n.scale[0],o=n.scale[1],i=n.translate[0],u=n.translate[1];return function(n,f){f||(t=r=0),n[0]=(t+=n[0])*e+i,n[1]=(r+=n[1])*o+u}}function m(n){if(!n)return y;var t,r,e=n.scale[0],o=n.scale[1],i=n.translate[0],u=n.translate[1];return function(n,f){f||(t=r=0);var c=0|(n[0]-i)/e,a=0|(n[1]-u)/o;n[0]=c-t,n[1]=a-r,t=c,r=a}}function y(){}var d={version:"1.6.15",mesh:function(n){return u(n,r.apply(this,arguments))},meshArcs:r,merge:function(n){return u(n,e.apply(this,arguments))},mergeArcs:e,feature:o,neighbors:a,presimplify:s};"function"==typeof define&&define.amd?define(d):"object"==typeof module&&module.exports?module.exports=d:this.topojson=d}(); -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | er-visualizer 2 | ============= 3 | 4 | D3 and Play based visualization for entity-relation graphs, especially for NLP and information extraction 5 | 6 | # Basic Example 7 | 8 | Here we are going to show the visualization with a few entities and relations, although it can handle upto hundreds of thousands of entities and relations. 9 | 10 | The first page show a simple search box to identify subsets of documents to visualize. 11 | 12 | ![Search page](https://github.com/sameersingh/er-visualizer/raw/master/docs/img/search.png) 13 | 14 | The visualization lays out all the extracted entities and relations onto a map as a graph. The entity nodes are sized according to their popularity (in the document collection), and colored according to their types (person, location, or organization). 15 | 16 | Clicking on an entity node bring ups details from Freebase on the left, and detailed textual provenance on the right. The provenance also contained fine-grained types, if part of the annotations. 17 | 18 | ![Entities](https://github.com/sameersingh/er-visualizer/raw/master/docs/img/entity.png) 19 | 20 | The edges represent extracted relations, with the width proportional to the number of mentions of the relation. Clicking on a relation brings up their provenances on the right. 21 | 22 | ![Relations](https://github.com/sameersingh/er-visualizer/raw/master/docs/img/relations.png) 23 | 24 | ## Running the Example 25 | 26 | The following are the instructions for running the basic example shown above 27 | 28 | 1. `sbt clean compile` 29 | 1. `sbt run` 30 | 1. Open [localhost:9000](http://localhost:9000/) 31 | 1. Use `obama` to visualize ~~all~~ both documents. 32 | 33 | # Input Data 34 | 35 | To visualize the documents, they needed to be annotated with basic NLP (NER specifically), linked to Freebase entities, and have relation extracted on a per-sentence level. The following are the list of files that contain this information. 36 | 37 | For the files used for the visualization above, see [data/test](https://github.com/sameersingh/er-visualizer/tree/master/data/test). 38 | 39 | ## Necessary files 40 | 41 | 1. Create a directory where all the files below will go, and specify it in `application.conf` as `nlp.data.baseDir` (See `reference.conf`) 42 | 1. **Documents**: A json file (`docs.json.gz`), as described below (see **Processed Documents**), containing the processed documents with entity linking and relations. 43 | 1. **Entities**: Information about the entities from Freebase, either read from a Mongo server, or read from files `ent.info`, `ent.freebase`, and `ent.head` as prepared from Freebase below (see **Freebase Information**) 44 | 1. `wcounts.txt.gz` and `ecounts.txt.gz`: Gzipped files containing list of keywords and entities for search (generated from `docs.json.gz` using `org.sameersingh.ervisualizer.data.WordCounts`). 45 | 46 | ## Processed Documents 47 | 48 | This will describe how we generate `docs.json.gz` (file name can me modified in the configuration using `docsFile`). 49 | 50 | We will be using `nlp_serde` as the underlying document representation. The library contains data structures for representing most of the NLP annotations, including entity linking and relation extraction, so you can directly wrap your document annotations into those classes, and then write out a documents file using `nlp_serde.writers.PerLineJsonWriter`. See [`org.sameersingh.ervisualizer.data.TestDocs`](https://github.com/sameersingh/er-visualizer/blob/master/app/org/sameersingh/ervisualizer/data/TestDocs.scala) for example annotated documents. 51 | 52 | Or, less desirably, you can write out the JSON files directly from your code (see `data/test/docs.json.gz` for an example). 53 | 54 | ## Freebase Information 55 | 56 | Visualization needs access to Freebase information about the entities that appear in your document collection. 57 | 58 | You can either have a Mongo server running (requires a lot of memory, and might be slower), or create the relevant files yourself (configured using `nlp.data.mongo` flag). The test above uses the file mode, i.e. you don't need to run a Mongo server. 59 | 60 | ### Reading Freebase Info from Mongo 61 | 62 | 1. Download a [freebase RDF dump](http://commondatastorage.googleapis.com/freebase-public/rdf/freebase-rdf-latest.gz), for example `freebase-rdf-2014-07-06-00-00.gz`. 63 | 1. Grep the dump to create a file for each of the following relations (using something like `zcat freebase-rdf-2014-07-06-00-00.gz | grep "" | gzip > $relation.gz`): 64 | - `type.object.id` 65 | - `type.object.name` 66 | - `common.topic.image` 67 | - `common.topic.description` 68 | - `common.topic.notable_types` 69 | - `location.location.geolocation` 70 | - `location.geocode.longitude` 71 | - `location.geocode.latitude` 72 | 1. Start a Mongo server, and run `org.sameersingh.ervisualizer.freebase.LoadMongo` to populate it (change `baseDir`, `host`, and `port` if needed) 73 | 1. Run visualization with `nlp.data.mongo = true` to use the Mongo server. 74 | 75 | ### Reading Freebase Info from Files 76 | 77 | Reading Mongo can be inefficient, and thus it is more efficient to read this information directly from files, as we will describe here. Note that you still need Mongo to generate the files the first time around, but you don't need it after the files have been created. 78 | 79 | The files `ent.info`, `ent.freebase`, and `ent.head` are pretty simple per-line JSON files containing the entity information, corresponding to the case classes in [`Entity.scala`](https://github.com/sameersingh/er-visualizer/blob/master/app/org/sameersingh/ervisualizer/data/Entity.scala). You can use the method below to construct these files, or generate your own directly. The only constraint is that these three files are aligned, i.e. information about the same entity appears in the three files on the same line number. 80 | 81 | If you want to use Mongo to generate these files: 82 | 83 | 1. Previous steps of creating documents and setting up a Mongo server. 84 | 1. Run `org.sameersingh.ervisualizer.freebase.GenerateEntInfo` to generate the files. 85 | 1. Run visualization with `nlp.data.mongo = false`, and you can shut down the Mongo sever. 86 | 87 | # Contact 88 | 89 | Please use Github issues if you have problems/questions. 90 | -------------------------------------------------------------------------------- /app/controllers/Application.scala: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import org.sameersingh.ervisualizer.Logging 5 | import org.sameersingh.ervisualizer.kba.{EntityKBAReader, KBAStore} 6 | import play.api.mvc._ 7 | import org.sameersingh.ervisualizer.data._ 8 | import play.api.libs.json.Json 9 | 10 | import scala.collection.mutable 11 | 12 | object Application extends Controller with Logging { 13 | 14 | val config = ConfigFactory.load() 15 | val defaultDBName = config.getString("nlp.data.defaultDB") 16 | 17 | val _docs = new DocumentStore() 18 | val _dbStore = new DBStore(_docs) 19 | 20 | private val _db: mutable.Map[String, DB] = new mutable.HashMap[String, DB] 21 | private var _entKBA: KBAStore = null 22 | 23 | def db(id: String) = _dbStore.id(id) 24 | def dbQueryId(query: String) = _dbStore.query(query)._1 25 | 26 | def entKBA = _entKBA 27 | 28 | def init() { 29 | _entKBA = EntityKBAReader.read() 30 | val docDir = config.getString("nlp.data.baseDir") 31 | val docsFile = config.getString("nlp.data.docsFile") 32 | DocumentStore.readDocs(_docs, docDir, docsFile) 33 | } 34 | 35 | import org.sameersingh.ervisualizer.data.JsonWrites._ 36 | 37 | def index = search // reset(defaultDBName) 38 | 39 | def search = Action { 40 | Ok(views.html.search("UW Visualizer")) 41 | } 42 | 43 | def page(query: Option[String]) = Action { 44 | logger.info("Query : \"" + query.get + "\"") 45 | if(_docs.numDocs == 0) init() 46 | val (dbId, _) = _dbStore.query(query.getOrElse("")) 47 | logger.info(" Id : " + dbId) 48 | Ok(views.html.main("UW Visualizer - " + query.get, dbId)) 49 | } 50 | 51 | def pageId(dbId: String) = Action { 52 | if(_docs.numDocs == 0) init() 53 | val query = _dbStore.queryIdMap(dbId) 54 | logger.info("Request Id : " + dbId + ", saved query: \"" + query + "\"") 55 | Ok(views.html.main("UW Visualizer - " + query, dbId)) 56 | } 57 | 58 | def entityKBA(id: String) = Action { 59 | println("eKBA: " + id) 60 | Ok(Json.toJson(entKBA.entityKBA(id))) 61 | } 62 | 63 | def relationKBA(sid: String, tid: String) = Action { 64 | println("rKBA: " + sid -> tid) 65 | Ok(Json.toJson(entKBA.relationKBA(sid, tid))) 66 | } 67 | 68 | def document(docId: String, dbId: Option[String]) = Action { 69 | println("doc: " + docId) 70 | //SeeOther("http://allafrica.com/stories/%s.html?viewall=1" format(docId.take(12))) 71 | Ok(Json.prettyPrint(Json.toJson(db(dbId.get).document(docId)))) 72 | } 73 | 74 | def sentence(docId: String, sid: Int, dbName: Option[String]) = Action { 75 | // println("sen: " + docId + ", " + sid) 76 | Ok(Json.toJson(db(dbName.get).document(docId).sents(sid))) 77 | } 78 | 79 | def entityHeaders(dbName: Option[String]) = Action { 80 | println("Entity Headers: " + dbName.get) 81 | Ok(Json.toJson(db(dbName.get).entityIds.map(id => db(dbName.get).entityHeader(id)).toSeq)) 82 | } 83 | 84 | def entityInfo(id: String, dbName: Option[String]) = Action { 85 | println("eInfo: " + id) 86 | Ok(Json.toJson(db(dbName.get).entityInfo(id))) 87 | } 88 | 89 | def entityFreebase(id: String, dbName: Option[String]) = Action { 90 | println("eFb: " + id) 91 | Ok(Json.toJson(db(dbName.get).entityFreebase(id))) 92 | } 93 | 94 | def entityText(id: String, dbName: Option[String], limit: Option[Int]) = Action { 95 | println("eTxt: " + id) 96 | if (limit.isDefined && limit.get > 0) 97 | Ok(Json.toJson(EntityText(id, db(dbName.get).entityText(id).provenances.take(limit.get)))) 98 | else Ok (Json.toJson(db(dbName.get).entityText(id))) 99 | } 100 | 101 | def entityProvs(id: String, dbName: Option[String]) = Action { 102 | println("eTxt: " + id) 103 | Ok(views.html.provs("Entity " + id, Seq(id), dbName.get)) 104 | //Ok(Json.toJson(db(dbName).entityText(id))) 105 | } 106 | 107 | def entityRelations(id: String, dbName: Option[String]) = Action { 108 | println("eRels: " + id) 109 | Ok(Json.toJson(db(dbName.get).relations(id))) 110 | } 111 | 112 | def entityTypes(id: String, dbName: Option[String]) = Action { 113 | println("eT: " + id + ": " + db(dbName.get).entityTypePredictions(id).mkString(", ")) 114 | Ok(Json.toJson(db(dbName.get).entityTypePredictions(id))) 115 | } 116 | 117 | def entityTypeProv(id: String, etype: String, dbName: Option[String], limit: Option[Int]) = Action { 118 | println("eTP: " + id + ", " + etype) 119 | if (limit.isDefined && limit.get > 0) 120 | Ok(Json.toJson(TypeModelProvenances(id, etype, db(dbName.get).entityTypeProvenances(id, etype).provenances.take(limit.get)))) 121 | else Ok(Json.toJson(db(dbName.get).entityTypeProvenances(id, etype))) 122 | } 123 | 124 | def relationHeaders(dbName: Option[String]) = Action { 125 | println("Relation Headers: " + dbName.get) 126 | Ok(Json.toJson(db(dbName.get).relationIds.map(id => db(dbName.get).relationHeader(id._1, id._2)).toSeq)) 127 | } 128 | 129 | def relationFreebase(sid: String, tid: String, dbName: Option[String]) = Action { 130 | println("RelFreebase: " + (sid -> tid)) 131 | Ok(Json.toJson(db(dbName.get).relationFreebase(sid, tid))) 132 | } 133 | 134 | def relationText(sid: String, tid: String, dbName: Option[String], limit: Option[Int]) = Action { 135 | println("RelText: " + (sid -> tid)) 136 | if (limit.isDefined && limit.get > 0) 137 | Ok(Json.toJson(RelationText(sid, tid, db(dbName.get).relationText(sid, tid).provenances.take(limit.get)))) 138 | else Ok(Json.toJson(db(dbName.get).relationText(sid, tid))) 139 | } 140 | 141 | def relationProvs(sid: String, tid: String, dbName: Option[String]) = Action { 142 | println("RelText: " + (sid -> tid)) 143 | Ok(views.html.provs("Relation: %s -> %s ".format(sid, tid), Seq(sid, tid), dbName.get)) 144 | //Ok(Json.toJson(db(dbName).relationText(sid, tid))) 145 | } 146 | 147 | def relationPredictions(sid: String, tid: String, dbName: Option[String]) = Action { 148 | println("RelPred: " + (sid -> tid)) 149 | Ok(Json.toJson(db(dbName.get).relationPredictions(sid, tid))) 150 | } 151 | 152 | def relationProvenances(sid: String, tid: String, rtype: String, dbName: Option[String], limit: Option[Int]) = Action { 153 | println("RelProv: " + (sid -> tid)) 154 | if (limit.isDefined && limit.get > 0) 155 | Ok(Json.toJson(RelModelProvenances(sid, tid, rtype, db(dbName.get).relationProvenances(sid, tid, rtype).provenances.take(limit.get)))) 156 | else Ok(Json.toJson(db(dbName.get).relationProvenances(sid, tid, rtype))) 157 | } 158 | 159 | 160 | } -------------------------------------------------------------------------------- /public/javascripts/main-kba.js: -------------------------------------------------------------------------------- 1 | 2 | var parseDate = d3.time.format('%x'); 3 | var entities = []; 4 | //var scale = 604800; // per week 5 | var scale = 86400; // day 6 | 7 | function parseData(d) { 8 | var clusters = Math.max.apply(null, _.pluck(d, "ci")) 9 | var data = []; 10 | for (i=0; i < clusters; i++) { 11 | data[i] = []; 12 | } 13 | 14 | var maxTimestamp = 0; 15 | var minTimestamp = 999999999999; 16 | 17 | d.map(function(e,i) { 18 | if (e.timestamp > maxTimestamp) { 19 | maxTimestamp = e.timestamp; 20 | } 21 | if (e.timestamp < minTimestamp) { 22 | minTimestamp = e.timestamp; 23 | } 24 | e.lambdas.map(function (c, i) { 25 | data[c.cj-1].push({x: e.timestamp, y: c.dec}); 26 | data[c.cj-1].push({x: e.timestamp, y: c.inc}); 27 | }); 28 | }); 29 | 30 | var bins = Math.round((maxTimestamp - minTimestamp) / scale); 31 | console.log(bins); 32 | 33 | xs = [] 34 | vitals = [] 35 | non_vitals = [] 36 | for (i=0; i < bins; i++) { 37 | xs.push(Math.round(minTimestamp + (scale * i))); 38 | vitals.push(0); 39 | non_vitals.push(0); 40 | } 41 | 42 | d.map(function(e,i) { 43 | var bin = Math.round((e.timestamp - minTimestamp) / scale); 44 | if (e.relevance == 2) { 45 | vitals[bin] += 1; 46 | } else { 47 | non_vitals[bin] += 1; 48 | } 49 | }); 50 | 51 | var staleness = data.map(function(cluster, i) { 52 | return { 53 | key: "C" + (i+1), 54 | values: cluster 55 | }; 56 | }); 57 | 58 | vitals_values = []; 59 | non_vitals_values = []; 60 | for (i=0; i < bins; i++) { 61 | vitals_values.push({x : xs[i], y: vitals[i]}); 62 | non_vitals_values.push({x: xs[i], y: non_vitals[i]}); 63 | } 64 | 65 | var relevance = [ { 66 | key: "Vital", 67 | values: vitals_values 68 | }, { 69 | key: "Non-Vital", 70 | values: non_vitals_values 71 | }]; 72 | return [staleness, relevance] 73 | } 74 | 75 | function registerEvent(src, dst) { 76 | src.dispatch.on("brush", function(evt) { 77 | //dst.brushExtent(evt.extent); 78 | //var oldTransition = dst.transitionDuration(); 79 | //dst.transitionDuration(0); 80 | //dst.dispatch.brush(); 81 | //dst.transitionDuration(oldTransition); 82 | }); 83 | } 84 | 85 | 86 | function getDocuments(e) { 87 | var entity = e.id; 88 | d3.json('/kba/documents/' + e.id, function(error, d) { 89 | if (!error) { 90 | var data = parseData(d); 91 | var relevanceChart = timeChart('#relevance', 'd', data[1]); 92 | relevanceChart.yAxis.axisLabel('number of documents').axisLabelDistance(40); 93 | //relevanceChart.lines.dispatch.on('elementClick', function(e) { 94 | //onRelevanceClick(entity, e.point.x); 95 | //}); 96 | var stalenessChart = timeChart('#staleness', ',.2f', data[0]); 97 | stalenessChart.yAxis.axisLabel('staleness').axisLabelDistance(40); 98 | //stalenessChart.interpolate("basis"); 99 | stalenessChart.lines.dispatch.on('elementClick', function(e) { 100 | onClusterClick(entity, e.series.key.charAt(1), e.point.x); 101 | }); 102 | //registerEvent(relevanceChart, stalenessChart); 103 | //registerEvent(stalenessChart, relevanceChart); 104 | } 105 | }); 106 | } 107 | 108 | function renderModal(d) { 109 | wordCloud(d); 110 | $('#wordcloud').modal(); 111 | } 112 | 113 | function onRelevanceClick(entity, timestamp) { 114 | d3.json('/kba/wordcloud/' + entity + '/' + timestamp, function(error, d) { 115 | if (!error) { 116 | renderModal(d); 117 | } 118 | }); 119 | } 120 | 121 | function onClusterClick(entity, clusterid, timestamp) { 122 | d3.json('/kba/wordcloud/' + entity + '/' + clusterid + '/' + timestamp, function(error, d) { 123 | if (!error) { 124 | renderModal(d); 125 | } 126 | }); 127 | } 128 | 129 | function timeChart(id, format, data) { 130 | var chart = nv.models.lineWithFocusChart(); 131 | chart.xAxis.tickFormat(function(d) { 132 | return parseDate(new Date(d * 1000)); 133 | }); 134 | chart.x2Axis.tickFormat(function(d) { 135 | return parseDate(new Date(d * 1000)); 136 | }); 137 | chart.yAxis.tickFormat(d3.format(format)); 138 | chart.y2Axis.tickFormat(d3.format(format)); 139 | 140 | nv.addGraph(function() { 141 | d3.select(id + ' svg').remove(); 142 | d3.select(id).append('svg'); 143 | d3.select(id + ' svg') 144 | .datum(data) 145 | .transition().duration(500) 146 | .call(chart); 147 | nv.utils.windowResize(chart.update); 148 | return chart; 149 | }); 150 | // remove tooltips 151 | chart.tooltips(false); 152 | chart.lines.dispatch.on('elementMouseover.tooltip', null); 153 | chart.lines.dispatch.on('elementMouseout.tooltip', null); 154 | //chart.color(['#8c510a','#bf812d','#dfc27d','#f6e8c3','#c7eae5','#80cdc1','#35978f','#01665e']); 155 | return chart; 156 | } 157 | 158 | function initTypeahead() { 159 | var bh = new Bloodhound({ 160 | datumTokenizer: Bloodhound.tokenizers.obj.whitespace('name'), 161 | queryTokenizer: Bloodhound.tokenizers.whitespace, 162 | local: $.map(entities, function(e) { return e; }) 163 | }); 164 | bh.initialize(); 165 | $('#entity .typeahead').typeahead({ 166 | hint: true, 167 | highlight: true, 168 | minLength: 1 169 | }, 170 | { 171 | name: 'entities', 172 | displayKey: 'name', 173 | source: bh.ttAdapter() 174 | }) 175 | .on('typeahead:selected', function($e, datum){ 176 | getDocuments(datum); 177 | } 178 | ) 179 | .on('typeahead:autocompleted', function($e, datum){ 180 | $('#entity .typeahead').typeahead('close'); 181 | getDocuments(datum); 182 | }); 183 | } 184 | 185 | function wordCloud(data) { 186 | var words = data.map(function(d) { 187 | return {text: d.t, size: 10 + (d.p / 1000) * 19}; 188 | }); 189 | var fill = d3.scale.category20(); 190 | d3.layout.cloud().size([300, 300]) 191 | .words(words) 192 | .padding(5) 193 | .rotate(function() { return ~~(Math.random() * 2) * 90; }) 194 | .font("Impact") 195 | .fontSize(function(d) { return d.size; }) 196 | .on("end", draw) 197 | .start(); 198 | 199 | function draw(words) { 200 | d3.select("#wordcloud-body svg").remove(); 201 | d3.select("#wordcloud-body").append("svg") 202 | .attr("width", 300) 203 | .attr("height", 300) 204 | .append("g") 205 | .attr("transform", "translate(150,150)") 206 | .selectAll("text") 207 | .data(words) 208 | .enter().append("text") 209 | .style("font-size", function(d) { return d.size + "px"; }) 210 | .style("font-family", "Impact") 211 | .style("fill", function(d, i) { return fill(i); }) 212 | .attr("text-anchor", "middle") 213 | .attr("transform", function(d) { 214 | return "translate(" + [d.x, d.y] + ")rotate(" + d.rotate + ")"; 215 | }) 216 | .text(function(d) { return d.text; }); 217 | } 218 | } 219 | 220 | function run() { 221 | //wordCloud(); 222 | d3.json('/kba/entities', function(data) { 223 | entities = data.map(function(e, i) { 224 | return {id: e.id, name: e.name}; 225 | }); 226 | initTypeahead(); 227 | }); 228 | } -------------------------------------------------------------------------------- /public/html/summa/randy.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | UW Summa 5 | 6 | 7 | 8 | 9 | 10 |
11 |

Summa

12 | 13 |
    14 |
  • 15 | 4-15-2014 As the strategic cooperative partner of Nigeria , China will continue its firm support to the Nigerian government in safeguarding national security and regional stability . 16 |
      17 |
    • 18 | 4-15-2014 As the strategic cooperative partner of Nigeria , China will continue its firm support to the Nigerian government in safeguarding national security and regional stability . 19 |
    • 20 |
    • 21 | 5-8-2014 As good as that may sound , but whichever politician refused to hijack policies in favour of the US was made to face financial espionage or '' corruption charges '' . 22 |
    • 23 |
    • 24 | 5-14-2014 Nigerian Military Human Rights Abuses Wo n't Stop American Search Assistance . 25 |
    • 26 |
    27 |
  • 28 |
  • 29 | 5-28-2014 At least 31 security personnel have been killed following an attack on a military base in Nigeria by Boko Haram fighters , security sources and witnesses said . 30 |
      31 |
    • 32 | 5-28-2014 At least 31 security personnel have been killed following an attack on a military base in Nigeria by Boko Haram fighters , security sources and witnesses said . 33 |
        34 |
      • 35 | 5-21-2014 U.S. military service members form part of an interagency team working out of the U.S. embassy in Abuja that is helping to coordinate the search with Nigerian authorities . 36 |
      • 37 |
      • 38 | 5-22-2014 China will carry out cooperation in six major areas , namely , industrial cooperation , financial cooperation , cooperation on poverty reduction , cooperation on environmental protection , cultural and people-to-people exchanges , and cooperation on peace and security . 39 |
      • 40 |
      • 41 | 5-30-2014 The northeast of Nigeria is plagued by Boko Haram attacks and has been under a state of emergency since May 2013 . 42 |
      • 43 |
      44 |
    • 45 |
    • 46 | 7-7-2014 U.S. officials say they believe reports that more than 60 girls who were kidnapped by the Nigerian terror group Boko Haram have escaped are accurate . 47 |
        48 |
      • 49 | 5-31-2014 Violence in northeastern Nigeria no longer fits the overly simplistic early narrative of Muslims killing Christians . 50 |
      • 51 |
      • 52 | 5-31-2014 Once described as the '' home of peace '' by locals , Maiduguri - the capital of Borno state - is now better known as the epicentre of deadly attacks and abductions that have killed thousands of Nigerians in schools , churches , mosques and markets . 53 |
      • 54 |
      • 55 | 5-31-2014 Today , visitors travelling to Maiduguri by road will notice an absence of uniformed military presence on the streets of the historic town . 56 |
      • 57 |
      • 58 | 7-7-2014 U.S. officials say they believe reports that more than 60 girls who were kidnapped by the Nigerian terror group Boko Haram have escaped are accurate . 59 |
      • 60 |
      61 |
    • 62 |
    • 63 | 8-8-2014 The World Health Organization warned on Friday that the disease is now a '' public health emergency of international concern '' and called for a coordinated international response to stop and reverse the international spread of Ebola . 64 |
    • 65 |
    66 |
  • 67 |
  • 68 | 8-14-2014 The Chinese government and people will not forget that the African people always reach out helping hands to offer timely help and generous support when Chinese people met with difficulties . 69 |
      70 |
    • 71 | 8-11-2014 The Chinese president said that China is willing to support the three countries in containing the spread of Ebola . 72 |
        73 |
      • 74 | 8-11-2014 Xi said that , at this difficult time , the Chinese government and people will stand together with the governments and peoples of the three nations and are willing to offer anti-epidemic supplies to them . 75 |
      • 76 |
      • 77 | 8-14-2014 Chinese government deems it obligatory to support African countries to tackle Ebola epidemic . 78 |
      • 79 |
      • 80 | 9-8-2014 Health experts say most infectious agents would not immediately manifest or make the patient contagious . 81 |
      • 82 |
      83 |
    • 84 |
    • 85 | 9-30-2014 Since 2009 , an estimated 3,600 people have been killed in an insurgency launched by the group known as Boko Haram , which says it wants to establish an Islamic state in northeastern Nigeria . 86 |
        87 |
      • 88 | 9-30-2014 Little is known about Boko Haram and its motivations , and information about the group 's activities remains under a tight coil . 89 |
      • 90 |
      • 91 | 10-2-2014 A region or country is considered Ebola-free after 42 days without any new cases . 92 |
      • 93 |
      • 94 | 10-10-2014 On Friday he claimed to have no knowledge of ongoing negotiations . 95 |
      • 96 |
      97 |
    • 98 |
    • 99 | 10-11-2014 Cameroon 's government announced Saturday that 27 hostages presumed to have been kidnapped by Boko Haram , including 10 Chinese construction workers and the wife of a vice prime minister , had been freed . 100 |
        101 |
      • 102 | 10-11-2014 Cameroon says it does not pay ransoms in kidnapping cases , and Saturday 's brief statement provided no details on the conditions of the hostages ' release . 103 |
      • 104 |
      • 105 | 10-14-2014 Abuja , Nigeria - Six months after the armed group Boko Haram kidnapped 276 Nigerian girls from a boarding school in the northeastern town of Chibok , 219 remain in captivity after 57 escaped . 106 |
      • 107 |
      • 108 | 10-17-2014 Nigeria 's military says it has agreed a truce with Islamist militant group Boko Haram - and says the schoolgirls the group has abducted will be released . 109 |
      • 110 |
      111 |
    • 112 |
    113 |
  • 114 |
115 | 116 |
117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /public/html/summa/maiduguri.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | UW Summa 5 | 6 | 7 | 8 | 9 | 10 |
11 |

Summa

12 | 13 |
    14 |
  • 15 | 1-14-2014 Nigeria 's military on Tuesday blamed Boko Haram militants for a deadly bomb attack that killed at least 17 in a crowded market in Maiduguri , in the latest violence to hit the country 's restive north . 16 |
      17 |
    • 18 | 1-9-2014 The 7 Division of the Nigerian Army in Maiduguri on Thursday said it repelled the attack of suspected Boko Haram insurgents on Damboa Town , Damboa Local Government Area of Borno . 19 |
        20 |
      • 21 | 1-9-2014 This information is contained in a statement issued by the spokesman of the division , Col. Muhammad Dole , in Maiduguri . '' 22 |
      • 23 |
      • 24 | 1-12-2014 Sheriff had arrived in Maiduguri on Sunday after an 11-month absence . 25 |
      • 26 |
      • 27 | 1-14-2014 The Nigerian Army said it has arrested one person over Tuesday 's bomb blast in Maiduguri , Borno State . 28 |
      • 29 |
      30 |
    • 31 |
    • 32 | 1-15-2014 A former governor of Borno State , Ali Modu Sheriff , who has come under attack since Tuesday 's bomb blast in Maiduguri , has said the outlawed Boko Haram sect is not responsible for the blast . 33 |
        34 |
      • 35 | 1-15-2014 Mr. Sheriff blamed Borno State government officials and members of his All Progressives Congress , APC , for the blast that is believed to have killed at least 31 people with dozens more injured . 36 |
      • 37 |
      • 38 | 1-16-2014 Hospital sources in Maiduguri , the Borno State capital yesterday disclosed to journalists that the Tuesday bomb blast at the city 's densely populated commercial area had killed 43 persons . 39 |
      • 40 |
      • 41 | 1-17-2014 Spain has condemned last Tuesday 's attack on a market in Maiduguri , the Borno State capital , where the death toll has now risen to 43 . 42 |
      • 43 |
      44 |
    • 45 |
    • 46 | 1-22-2014 Borno State Governor , Alhaji Kashim Shettima , Tuesday reviewed the dusk to dawn curfew imposed on the state capital , Maiduguri , on December 2 , 2013 following Boko Haram attack on military and security formations . 47 |
        48 |
      • 49 | 1-22-2014 Maiduguri is seen as the outlawed sector 's spiritual base . 50 |
      • 51 |
      • 52 | 1-23-2014 Some students of University of Maiduguri from Gombe State on Tuesday survived a ghastly road accident along the Maiduguri-Biu highway . 53 |
      • 54 |
      • 55 | 1-24-2014 Some villagers in farming communities around Maiduguri said they buried 18 of their neighbours on Wednesday after gunmen suspected to be members of the Boko Haram attacked their communities . 56 |
      • 57 |
      58 |
    • 59 |
    60 |
  • 61 |
  • 62 | 2-23-2014 Countless military posts , countless attacks Investigation by our correspondents reveals that dozens of new military formations and checkpoints , manned by many troops have been established along all the roads leading to Maiduguri . 63 |
      64 |
    • 65 | 2-23-2014 Countless military posts , countless attacks Investigation by our correspondents reveals that dozens of new military formations and checkpoints , manned by many troops have been established along all the roads leading to Maiduguri . 66 |
    • 67 |
    • 68 | 2-26-2014 The Senate Committee on Defence and Army on Wednesday in Abuja urged the Chief of Army Staff to relocate temporarily to Maiduguri . 69 |
    • 70 |
    • 71 | 3-1-2014 Maiduguri -- Twin explosions , Saturday evening , rocked a football viewing centre in Ajilari ward , Jere Council area of Maiduguri , the Borno State capital killing several football fans . 72 |
    • 73 |
    74 |
  • 75 |
  • 76 | 3-11-2014 Nigerian football club Abia Warriors have asked for their weekend match against El Kanemi Warriors be moved from the northeastern city of Maiduguri , the stronghold of Islamist group Boko Haram . 77 |
      78 |
    • 79 | 3-2-2014 Expectedly , several residents of Maiduguri have become apprehensive , fearing that the city may have returned to the dark days of 2012 when there was hardly a single day without explosion . 80 |
        81 |
      • 82 | 3-2-2014 Maiduguri -- It was another tragic day in Maiduguri , Borno State capital , yesterday , after twin explosions reportedly killed about 100 people , some of them football fans . 83 |
      • 84 |
      • 85 | 3-8-2014 Maiduguri -- Since May , 2013 , there has been respite in Maiduguri , Borno State capital and the notorious epicenter of violent insurgency . 86 |
      • 87 |
      • 88 | 3-11-2014 The club cited security concerns as a justification for the proposed venue change following waves of attacks by the insurgents both in Maiduguri and in surrounding areas , but league officials have rejected the appeal . 89 |
      • 90 |
      91 |
    • 92 |
    • 93 | 3-14-2014 Fierce battle between soldiers and members of the Boko Haram sect has forced residents of Maiduguri to flee the city . 94 |
        95 |
      • 96 | 3-14-2014 Photo : http://www.premiumtimesng.com/ Premium Times Maiduguri attack Gunmen suspected to be members of the extremist Boko Haram sect have invaded Maiduguri , the Borno state capital , throwing the city into pandemonium . 97 |
      • 98 |
      • 99 | 3-15-2014 After shootings stopped , soldiers and youth vigilante group were seen combing Maiduguri for escaping terrorists . 100 |
      • 101 |
      • 102 | 3-18-2014 Members of the Boko Haram sect who were displaced from their Sambisa forest stronghold in Borno State and forced back from Maiduguri by the Nigerian Army have re-emerged in the southern part of the state forcing motorists to shun the Biu/Maiduguri highway for fear of being attacked on the road . 103 |
      • 104 |
      105 |
    • 106 |
    • 107 | 3-24-2014 Two suicide bombers driving a Volkswagen saloon car , on Monday , in Maiduguri , rammed their vehicle into a police highway patrol van killing five officers and three civilians , witnesses and security officials said . 108 |
        109 |
      • 110 | 3-21-2014 He said they were surprised to find out that the insurgents have launched attack on them despite the presence of the military detailed to protect lives and property in the area . '' 111 |
      • 112 |
      • 113 | 3-25-2014 Initially , the military had some success in tempering attacks within Maiduguri , but Boko Haram has carried out a series of daring raids in the heart of the city in recent months . 114 |
      • 115 |
      • 116 | 3-26-2014 The latest attack on Maiduguri , the Borno State capital and epicentre of the Boko Haram insurgency , coincided with a protest led my Muslim women to the National Assembly over the mindless killing of Nigerians by the terrorists . 117 |
      • 118 |
      119 |
    • 120 |
    121 |
  • 122 |
123 | 124 |
125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /public/html/summa/janara.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | UW Summa 5 | 6 | 7 | 8 | 9 | 10 |
11 |

Summa

12 | 13 |
    14 |
  • 15 | 5-8-2014 As good as that may sound , but whichever politician refused to hijack policies in favour of the US was made to face financial espionage or '' corruption charges '' . 16 |
      17 |
    • 18 | 5-8-2014 As good as that may sound , but whichever politician refused to hijack policies in favour of the US was made to face financial espionage or '' corruption charges '' . 19 |
        20 |
      • 21 | 5-8-2014 Years , later the CIA while tactically taking advantage of growing sectarian violence in Nigeria , recruited jobless Islamic extremist through Muslim and other traditional leaders offering training indirectly to the group by use of foreign based terror groups . 22 |
      • 23 |
      • 24 | 5-8-2014 Today as Nigerians are reeling from the negative effects of the insurgency that has befallen our dear country and earnestly seeking answers to what all this portends for the future , the GREENWHITE COALITION a citizen 's watchdog can reveal the true nature of this silent , undeclared war of attrition waged against Nigeria by the Government of United States of America . 25 |
      • 26 |
      27 |
    • 28 |
    • 29 | 5-17-2014 Countries neighbouring Nigeria are ready to wage war against the Nigeria-based , al-Qaeda-linked group , Boko Haram , Chad 's president says . 30 |
    • 31 |
    • 32 | 5-28-2014 At least 31 security personnel have been killed following an attack on a military base in Nigeria by Boko Haram fighters , security sources and witnesses said . 33 |
    • 34 |
    35 |
  • 36 |
  • 37 | 5-30-2014 Source 2 in Lagos claims to have heard of a '' Hosni '' through a network of associates . 38 |
      39 |
    • 40 | 5-30-2014 Source 2 in Lagos claims to have heard of a '' Hosni '' through a network of associates . 41 |
        42 |
      • 43 | 5-30-2014 The northeast of Nigeria is plagued by Boko Haram attacks and has been under a state of emergency since May 2013 . 44 |
      • 45 |
      • 46 | 5-30-2014 Nigeria 's president has said he has ordered '' total war '' against the armed group Boko Haram which last month abducted 276 schoolgirls in the northeastern state of Borno . 47 |
      • 48 |
      • 49 | 5-30-2014 Chief of Defence Staff Air Chief Marshal Alex Badeh said any potential armed rescue operation was fraught with danger as the girls could be caught in the crossfire . 50 |
      • 51 |
      52 |
    • 53 |
    • 54 | 5-31-2014 Violence in northeastern Nigeria no longer fits the overly simplistic early narrative of Muslims killing Christians . 55 |
        56 |
      • 57 | 5-31-2014 Today , visitors travelling to Maiduguri by road will notice an absence of uniformed military presence on the streets of the historic town . 58 |
      • 59 |
      • 60 | 5-31-2014 Violence in northeastern Nigeria no longer fits the overly simplistic early narrative of Muslims killing Christians . 61 |
      • 62 |
      • 63 | 5-31-2014 Once described as the '' home of peace '' by locals , Maiduguri - the capital of Borno state - is now better known as the epicentre of deadly attacks and abductions that have killed thousands of Nigerians in schools , churches , mosques and markets . 64 |
      • 65 |
      66 |
    • 67 |
    • 68 | 6-9-2014 Source 7 in Bingi reports no confirmation of the arrival of Lagos-based radical Islamists . 69 |
        70 |
      • 71 | 6-5-2014 Source 1 in Lagos has heard street talk of an impending operation to assassinate the Nigerian Prime Minister . 72 |
      • 73 |
      • 74 | 6-7-2014 Source 7 has heard rumors concerning the arrival in Bingi of potential radical Islamists from Lagos who may be part of a catastrophic plot to kill hundreds , if not thousands , of people in and around Lagos . 75 |
      • 76 |
      • 77 | 6-11-2014 Source 1 in Lagos reports that the following individuals are among the most radical members of the Khoury Habib Mosque in Lagos : Omar Assad , Hani Boutros , and Yousef Najeeb . 78 |
      • 79 |
      80 |
    • 81 |
    • 82 | 6-15-2014 Source 11 in Onitsha states that he has not noted any rise in anti-Nigerian sentiment among Onitsha 's small Moslem community . 83 |
    • 84 |
    • 85 | 6-16-2014 Source 9 in Lagos claims that two of his brother-in-law 's friends , Tawfiq Attuk and Bassam Bahran , staying in Nigeria on extended tourist visas , have expressed their support for Boko Haram activities in the Middle East . 86 |
    • 87 |
    88 |
  • 89 |
  • 90 | 6-21-2014 Source 10 in Abuja describes Al Samarah as leader of the '' virulent anti-Western faction '' among his business associates . 91 |
      92 |
    • 93 | 6-29-2014 She and several of her associates are lobbying for separate schools for boys and girls . 94 |
        95 |
      • 96 | 6-19-2014 Source 4 in Lagos reports no apparent increase in anti-Nigerian rhetoric among the members of his Young Men 's Islamic Association in the aftermath of Operation Iraqi Freedom . 97 |
      • 98 |
      • 99 | 6-22-2014 Source 16 in Benin City cites Malik Mosul as a '' dangerous subversive '' operating in the city . 100 |
      • 101 |
      • 102 | 6-29-2014 She and several of her associates are lobbying for separate schools for boys and girls . 103 |
      • 104 |
      105 |
    • 106 |
    • 107 | 7-2-2014 Source 17 in Uyo reports a political meeting having taken place between Gimmel Faruk and Dimitri Yagdanich , a Bosnian immigrant . 108 |
    • 109 |
    • 110 | 7-5-2014 Source believes this is probable evidence of influx of '' conspiracy-mongers from Lagos . '' 111 |
    • 112 |
    113 |
  • 114 |
  • 115 | 7-8-2014 Source 17 in Uyo reports that his friend Karmij Aziz claims to have been offered a job by one Ali Hakem because of his computer hacking skills . 116 |
      117 |
    • 118 | 7-10-2014 Source 1 in Lagos claims an association between Khaleed Kulloh and Djibouti Jones . 119 |
    • 120 |
    • 121 | 7-11-2014 Source 9 in Lagos claims to have seen Khaleed Kulloh and Phil Salwah together on several occasions at several mosques in the East Side . 122 |
    • 123 |
    • 124 | 7-27-2014 Samagu 's Islamic community is very small and it is unusual for an immigrant to show up and stay for several weeks without family or business ties . 125 |
    • 126 |
    • 127 | 7-28-2014 Source 18 , located in the University of Benin , reports that several of the more radical Islamic students have left resigned from the university to apparently return to Saudi Arabia , yet they are still staying at their hotel in Benin City . 128 |
    • 129 |
    130 |
  • 131 |
132 | 133 |
134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /public/html/summa/iai/title/topic-6.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | UW Summa 5 | 6 | 7 | 8 | 9 | 10 |
11 |

Summa

12 | 13 |
    14 |
  • 15 | 7-2-2013 A recent UN report identified Nigeria has having the highest number of HIV infections in the world in 2012 . '' 16 |
      17 |
    • 18 | 7-2-2013 President Goodluck Jonathan has expressed his disappointment at the continued prevalence of HIV/AIDS in the country , adding that the lack of comprehensive planning to mitigate the disease needs to be addressed . 19 |
        20 |
      • 21 | 7-2-2013 The president ascribed the increase in transmission of the virus within the country to the lack of a comprehensive national plan on how to tackle the transmission and management of the disease . 22 |
      • 23 |
      • 24 | 7-2-2013 Mr. Jonathan said this at the presentation of the President 's Emergency Plan , PERP for HIV/AIDS in Nigeria for 2013-2015 , where he declared that no Nigerian citizen must be allowed to die from HIV henceforth . 25 |
      • 26 |
      • 27 | 7-2-2013 Mr. Idoko said the goal of the president 's initiative is to get the country to attain set universal access target of 80 per cent to HIV/AIDS prevention , treatment and care services . 28 |
      • 29 |
      30 |
    • 31 |
    • 32 | 7-8-2013 The Senate committee on Interior , yesterday expressed worry over the frequent jailbreak recorded in the country since the dreaded Boko Haram insurgency , saying it posed a great threat to the nation 's growth , peace and stability . 33 |
        34 |
      • 35 | 7-8-2013 The committee said they were visiting the Service to know it position on a proposal that Nigeria Prison Service should be removed from exclusive list and taken to concurrent list . 36 |
      • 37 |
      • 38 | 7-11-2013 The House of Representatives Thursday urged the federal government to take measures to address the plight of the refugees in the three states affected by the state of emergency rule . 39 |
      • 40 |
      • 41 | 7-12-2013 The resolution seeking succour for the refugees came on the heels of a motion sponsored by Hon. Abubakar Mahmud Wambai , member representing Mubi North / Mubi , South/Maiha Federal Constituency of Adamawa State . 42 |
      • 43 |
      44 |
    • 45 |
    • 46 | 7-13-2013 This is an eye focusing disorder , a condition that makes '' ; close objects look clear but distant objects appear blurred '' ; . 47 |
        48 |
      • 49 | 7-14-2013 Critical issues like fight against corruption , provision of good governance , basic infrastructure as power , roads , water , healthcare , education , credible election , etc. have all taken back seats in our governmental drive . 50 |
      • 51 |
      • 52 | 8-3-2013 Amnesty and state of emergency hardly go together although Mr President said both would be explored , that it will be a multi-tracked approach . 53 |
      • 54 |
      55 |
    • 56 |
    57 |
  • 58 |
  • 59 | 8-25-2013 From $ 11 per barrel under the military , the price of crude oil has hovered above $ 100 for the past seven years . 60 |
      61 |
    • 62 | 8-13-2013 The federal government has disclosed that it will start training about 2000 youths from the northern-east states under emergency rule as a result of Boko Haram insurgency in order to change their orientation . 63 |
        64 |
      • 65 | 8-13-2013 The programme will start with 1000 youth each from Borno and Yobe states and will extent to Adamawa state later . 66 |
      • 67 |
      • 68 | 8-14-2013 In the past two weeks , a lot has been said and written about the '' ; deportation '' ; of beggars and the destitute from Lagos State . 69 |
      • 70 |
      71 |
    • 72 |
    • 73 | 10-4-2013 In 24 hours , Nigeria shall be marking -LRB- not celebrating -RRB- 53 years as an independent state . 74 |
        75 |
      • 76 | 10-4-2013 The same Economist , in its Failed States Index , placed Nigeria amongst the 10 worst failed states with Somalia topping the list . 77 |
      • 78 |
      • 79 | 11-3-2013 This will ensure the general good of the nation - social , economic , political and cultural tolerance and accommodation . 80 |
      • 81 |
      82 |
    • 83 |
    84 |
  • 85 |
  • 86 | 11-7-2013 The Nigerian Senate on Thursday approved the extension of the state of emergency in three north-eastern states of Adamawa , Borno , and Yobe . 87 |
      88 |
    • 89 | 11-17-2013 Maiduguri -- The extension of the state of emergency in Adamawa , Borno and Yobe states did not come to many people as a surprise , partly due to the new wave of attacks by suspected members of the Boko Haram sect in recent weeks , especially in villages and towns around Borno and Yobe states . 90 |
        91 |
      • 92 | 11-20-2013 Nigerian Lawmakers Have Approved a Six month extension of the state of emergency in areas where troops are fighting Islamist militants . 93 |
      • 94 |
      • 95 | 12-4-2013 The Senate had recently approved President Goodluck Jonathan 's request for extension of emergency rule in Borno , Yobe and Adamawa states after a closed door meeting with security chiefs . 96 |
      • 97 |
      98 |
    • 99 |
    • 100 | 1-11-2014 INEC Chairman , Prof Attahiru Jega , who earlier declared that election would not be conducted in any state under emergency rule in 2015 , said , yesterday , that he did not foreclose election in the six states of Adamawa , Taraba , Bauchi , Yobe , Borno and Gombe . 101 |
        102 |
      • 103 | 12-28-2013 Despite the state of emergency in Yobe State , voters in the state will today go to the poll to elect new local government chairmen and councilors for the 17 local government areas of the state . 104 |
      • 105 |
      • 106 | 1-21-2014 The APC members took the decision after a six-hour meeting . 107 |
      • 108 |
      109 |
    • 110 |
    111 |
  • 112 |
  • 113 | 3-5-2014 President Goodluck Jonathan yesterday said rather than bickering , cordial rapport with the government at the centre and states should be sought to enhance rapid development in such states . 114 |
      115 |
    • 116 | 2-10-2014 While the Constitution defines an indigene of a state in terms of ancestral , nativist ' belonging ' to the state , administrative rules have tended to leave the practical definition of who is an ' indigene ' in the hands of local government officials . 117 |
        118 |
      • 119 | 2-10-2014 The more problematic states pose more challenging policy demands . 120 |
      • 121 |
      • 122 | 3-5-2014 The federal government certainly has Borno , Yobe and Adamawa , among other states , in mind while preparing the defence budget . 123 |
      • 124 |
      125 |
    • 126 |
    • 127 | 3-24-2014 Do n't ignite fire in Nasarawa state , that 's my warning . 128 |
        129 |
      • 130 | 3-24-2014 If anybody puts Nasarawa state on fire only God knows where it will go to , how far it will spread . 131 |
      • 132 |
      • 133 | 4-14-2014 The residents of the states argue that extending the emergency rule , will increase the apprehension in the area rather than lead to immediate resolution of the Boko Haram crisis . 134 |
      • 135 |
      136 |
    • 137 |
    138 |
  • 139 |
  • 140 | 5-13-2014 President Jonathan imposed in three troubled states a state of emergency . 141 |
      142 |
    • 143 | 5-15-2014 House of Representatives has approved President Goodluck Jonathan 's request for extension of the state of emergency currently in place in Adamawa , Borno and Yobe states . 144 |
        145 |
      • 146 | 5-21-2014 The Senate has endorsed the request of the President Goodluck Jonathan on the extension of the state of emergency in Adamawa , Borno and Yobe States for another six months . 147 |
      • 148 |
      • 149 | 6-6-2014 The All Progressives Congress , APC , has faulted Edwin Clark , for asking President Goodluck Jonathan to remove the democratic structures in the three states of Adamawa , Borno and Yobe states , which are under a state of emergency over the activities of the terror group , Boko Haram . 150 |
      • 151 |
      152 |
    • 153 |
    • 154 | 8-25-2014 By Chuks Okocha Elder statesman and Ijaw leader , Chief Edwin Clark yesterday declared that President Goodluck Jonathan would seek re-election in 2015 , insisting that nobody could stop him from doing so because he was constitutionally qualified . 155 |
        156 |
      • 157 | 8-25-2014 Clark further said that President Jonathan had performed creditably well . 158 |
      • 159 |
      • 160 | 9-23-2014 Already , five local government areas in the northern part of the state were excluded from the recent Permanent Voters ' Cards registration exercise . 161 |
      • 162 |
      163 |
    • 164 |
    165 |
  • 166 |
167 | 168 |
169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /public/html/summa/boko.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | UW Summa 5 | 6 | 7 | 8 | 9 | 10 |
11 |

Summa

12 | 13 |
    14 |
  • 15 | 2-2-2014 Nigeria 's Boko Haram was suspected Sunday of killing a popular Muslim cleric , his wife and child in the northern city of Zaria , Kaduna state . 16 |
      17 |
    • 18 | 2-1-2014 Governor Kashim Shettima of Borno State has called on private and corporate bodies to join the efforts of his government towards bringing succour to the victims of Boko Haram insurgents by donating their widow 's mite . 19 |
        20 |
      • 21 | 2-1-2014 He said the sufferings of the victims of Boko Haram in the state offered an opportunity for members of the society to offer assistance . 22 |
      • 23 |
      • 24 | 2-2-2014 Six of the dead victims were burnt beyond recognition . 25 |
      • 26 |
      • 27 | 2-3-2014 Speaker of the House of Representatives Aminu Waziri Tambuwal has called on the governments of Chad , Republics of Niger and Cameroon to cooperate with the Nigeria government in fighting against the Boko Haram insurgents . 28 |
      • 29 |
      30 |
    • 31 |
    • 32 | 2-4-2014 Internationally and locally it has long been known that Nigeria has been engaged in a brutal and protracted civil war which has been raging mostly in the northern part of the country in the last three years . 33 |
        34 |
      • 35 | 2-4-2014 Nigeria 's Muslims have voiced concern about an apparent increase in religious '' profiling '' after hundreds of terror-related arrests in the country 's Christian-majority south . 36 |
      • 37 |
      • 38 | 2-4-2014 Nigeria 's military on Tuesday said that a call by the country 's top military officer for a swift end to the Boko Haram insurgency had been '' taken too literally '' . 39 |
      • 40 |
      • 41 | 2-4-2014 The social and virtual interactive networks for example are fast gaining popularity around the world including the Islamic countries , so Islam feels threatened . 42 |
      • 43 |
      • 44 | 2-4-2014 The goal of the Islamic radicals is to establish a separate country in the northern part of the country where sharia legal system is the rule of law . 45 |
      • 46 |
      47 |
    • 48 |
    • 49 | 2-6-2014 Photo : http://www.vanguardngr.com/ Vanguard President Goodluck Jonathan being received by service chiefs at the presidential change of guards parade at the presidential villa . 50 |
        51 |
      • 52 | 2-6-2014 Jonathan , at the decoration of the service chiefs stressed the urgency of bringing the anti-terror war to a quick end , adding : '' ; None of us will sleep till Nigerians in Borno State can sleep . '' 53 |
      • 54 |
      • 55 | 2-7-2014 President Goodluck Jonathan said that he prays every day not to hand over Boko Haram insurgency to any future president of Nigeria . 56 |
      • 57 |
      • 58 | 2-8-2014 Maiduguri -- Residents who were displaced by the Boko Haram in various communities in Borno State are gradually returning to their homes following intervention by various stakeholders , the National Emergency Management Agency -LRB- NEMA -RRB- said yesterday . 59 |
      • 60 |
      61 |
    • 62 |
    63 |
  • 64 |
  • 65 | 2-10-2014 The counsels to the accused persons prayed the court to use its discretion to grant bail since there are no evidences that the applicants will abate justice , interfere with investigations or jump bail . 66 |
      67 |
    • 68 | 2-11-2014 Arguing the bail application of the third accused person , Abdul Mohammed submitted that by the proof of evidence , it showed that the prosecution had finished investigation and hence there was no possibility of tampering with investigation . 69 |
        70 |
      • 71 | 2-11-2014 Ocholi further submitted that going by the proof of evidence placed before the court , there was nothing linking the second accused person to the alleged crime . 72 |
      • 73 |
      • 74 | 2-11-2014 He further noted that the prosecution did not disclose the identity of the witnesses it would call and so , the third accused could not interfere with the witnesses . 75 |
      • 76 |
      • 77 | 2-11-2014 The counsels argued separately that Section 1 -LRB- 2 -RRB- -LRB- b -RRB- of the amended Terrorism Prevention Act , 2013 which prescribed capital punishment for terrorism offences , confers discretion on the judge , adding that there is no prima facie evidence against the accused persons . 78 |
      • 79 |
      80 |
    • 81 |
    • 82 | 2-12-2014 Reports said heavily armed extremists in 4X4 trucks attacked a mosque , markets and government buildings in a massive assault on Konduga village , which had witnessed attacks before . 83 |
        84 |
      • 85 | 2-12-2014 At least 39 persons were killed in the attack while several others were injured , residents and government officials said . 86 |
      • 87 |
      • 88 | 2-13-2014 Residents of Konduga and neighbouring Mailari village today said Boko Haram terrorists , who attacked Konduga village on Tuesday killing 39 have returned to launch fresh attacks . 89 |
      • 90 |
      • 91 | 2-15-2014 Heavily armed Islamist extremists in 4X4 trucks attacked a mosque , markets and government buildings in a massive assault on Konduga in the troubled state of Borno on Tuesday . 92 |
      • 93 |
      94 |
    • 95 |
    • 96 | 2-16-2014 The attack came three days after gunmen attacked the same village and killed nine soldiers in a broad day shootout . 97 |
        98 |
      • 99 | 2-16-2014 Boko Haram had carried out another attack on villagers in Doron-Baga in Kukawa local government area on the same night even though details of the incident is yet to be made public . 100 |
      • 101 |
      • 102 | 2-17-2014 The actual death toll from the latest attacks by Boko Haram gunmen on Izaghe village in Borno state , north east Nigeria , was higher than the 60 earlier reported . 103 |
      • 104 |
      • 105 | 2-18-2014 A Nigerian state governor says militant group Boko Haram is '' ; better armed and better motivated '' ; than government forces trying to stop their attacks . 106 |
      • 107 |
      108 |
    • 109 |
    110 |
  • 111 |
  • 112 | 2-20-2014 No fewer than 47 people were killed in the early Wednesday 's attack on Bama by dozens of Boko Haram gunmen , according to Lawal Tanko , the police boss , for the besieged state of Borno , in northern Nigeria . 113 |
      114 |
    • 115 | 2-19-2014 Suspected Boko Haram militants armed with explosives attacked Bama , a troubled spot in Nigeria 's northeast on Wednesday , sparking a battle with soldiers that killed a large number of insurgents , the military said . 116 |
        117 |
      • 118 | 2-19-2014 Governor Shettima after visiting President Goodluck Jonathan in Abuja in the wake of last Monday 's attack , declared that Nigeria is in a state of war and that the fight against Boko Haram is far from being won , as the insurgents seem to be more motivated than the Nigerian military . 119 |
      • 120 |
      • 121 | 2-20-2014 The Nigerian military says it is beating Boko Haram and that the recent increase in attacks signifies increased desperation among insurgents . 122 |
      • 123 |
      • 124 | 2-21-2014 Cameroon has stepped up security in the Far North Region following Nigeria 's military crackdown on Boko Haram , which has pushed back the insurgents to border regions and forced thousands of civilians to flee into Cameroon . '' 125 |
      • 126 |
      127 |
    • 128 |
    • 129 | 2-22-2014 Maiduguri -- For 14 days beginning from February 11 , the people of Borno , particularly those residing around the bushy Sambisa Forest , have seen what could be described as ' hell on earth ' following deadly attacks , bombings , killings and destruction of property . 130 |
        131 |
      • 132 | 2-22-2014 During the February 11 attacks , the insurgents invaded Konduga and killed 57 residents , burnt houses and vehicles while abducting some female students in one of the secondary schools . 133 |
      • 134 |
      • 135 | 2-23-2014 Photo : Vanguard.html Vanguard Bombing continues as citizens advocate for talks with Boko Haram Lagos -- THE United States expressed solidarity with locals in the northern parts of Nigeria following a reign of terror by the Boko Haram sect . 136 |
      • 137 |
      • 138 | 2-24-2014 The military offensive has pushed Boko Haram out of towns and cities but attacks continue in more remote , rural areas where the presence of troops is not strong . 139 |
      • 140 |
      141 |
    • 142 |
    • 143 | 2-25-2014 Suspected Boko Haram gunmen on Tuesday opened fire on secondary school students as they slept in a dormitory in Nigeria 's troubled northeastern Yobe state , the military said . 144 |
        145 |
      • 146 | 2-25-2014 Twenty nine people have been declared dead in the latest massacre of sleeping secondary school students in Buni Yadi , in the north eastern Nigerian state of Yobe , by gunmen from Boko Haram . 147 |
      • 148 |
      • 149 | 2-26-2014 Suspected Boko Haram gunmen late Wednesday killed at least 37 people in three separate attacks in northeast Nigeria in Adamawa state , including at a theological college . 150 |
      • 151 |
      • 152 | 2-27-2014 The blood letting continues in Nigeria 's North east No fewer than 32 people have been killed by suspected Boko Haram gunmen in three separate attacks in northeast Nigeria , including at a theological college , a local government official and residents said on Thursday . 153 |
      • 154 |
      155 |
    • 156 |
    157 |
  • 158 |
159 | 160 |
161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /public/javascripts/listCollapse.js: -------------------------------------------------------------------------------- 1 | /*************************************************************************************** 2 | Nested list collapsing script written by Mark Wilton-Jones - 21/11/2003 3 | Version 2.3.0 - this script takes existing HTML nested UL or OL lists, and collapses them 4 | Updated 13/02/2004 to allow links in root of expanding branch 5 | Updated 09/09/2004 to allow state to be saved 6 | Updated 07/10/2004 to allow page address links to be highlighted 7 | Updated 28/11/2004 to allow you to force expand/collapse links to use just the extraHTML 8 | Updated 23/09/2006 to add expandCollapseAll and to allow selfLink to locate custom links 9 | **************************************************************************************** 10 | 11 | Please see http://www.howtocreate.co.uk/jslibs/ for details and a demo of this script 12 | Please see http://www.howtocreate.co.uk/jslibs/termsOfUse.html for terms of use 13 | _________________________________________________________________________ 14 | 15 | You can put as many lists on the page as you like, each list may have a different format. 16 | 17 | To use: 18 | _________________________________________________________________________ 19 | 20 | Inbetween the tags, put: 21 | 22 | 23 | _________________________________________________________________________ 24 | 25 | Define the HTML. Note that to correctly nest lists, child OLs or ULs should be children of an LI element, 26 | not direct descendents of their parent OL/UL. The text used to expand the branch should be written 27 | between the
  • tag and the
      tag, and should only contain HTML that is permitted inside an 'A' 28 | element. Note; Opera 7 will lose any style attributes you define in this text - use classes instead. 29 | 30 | 55 | ________________________________________________________________________ 56 | Now you need to trigger the collapsing, using tag. If using either onload technique, you must not use 58 | any other scripts that rely on the onload event. 59 | 60 | compactMenu(theRootID,shouldAutoCollapse,extraHTML[,useMinimalLink]); 61 | oID = string: ID of root nest element, must be a UL or OL; this will not be collapsed, but any child 62 | UL/OLs will be (note, if the root nest element is a UL, all child lists should be ULs - the same is 63 | true for OLs; if the root nest element is OL, all child lists should be OLs) 64 | shouldAutoCollapse = bool: auto-collapse unused branches 65 | extraHTML = string: HTML to insert to collapsible branches - usually '± ' 66 | useMinimalLink = bool: normally the expand/collapse link will use both extraHTML and the original list 67 | item text - if the list item text is already a link, this will not be included - set this option to 68 | true to force the script to use only the extraHTML as the link, even if the rest of the list item is 69 | not a link - this option will only be respected if you also provide some extraHTML 70 | 71 | eg 1. 72 | 73 | 74 | eg 2. 75 | 78 | 79 | eg 3. 80 | 83 | 84 | 85 | stateToFromStr(theRootID); 86 | oID = string: ID of root nest element, must be a UL or OL; returns a string representing all expanding 87 | branches - can be used with my cookie script to save state when unloading the page 88 | stateToFromStr(theRootID,stringRepresentation); 89 | oID = string: ID of root nest element, must be a UL or OL; 90 | stringRepresentation = string: string representation of expanded branches, as created above 91 | must be called _after_ collapsing the list - values can be recovered from cookies using my cookie script 92 | note: this facility will not be able to take changes in the list structure into account - use session cookies 93 | or short-term cookies to avoid longer term structure change problems 94 | 95 | selfLink(theRootID,newClass,shouldExpandBranch[,linkHref]); 96 | theRootID = string: ID of root nest element, must be a UL or OL; 97 | newClass = string: new class name to add to any existing class names 98 | shouldExpandBranch = bool: expand branches to show the first matching link 99 | linkHref = string: by default, it will try to locate links to the current page address - you can 100 | override that here by giving a specific address that it should look for. 101 | Allows you to highlight links to the current page that appear in the list 102 | must be called _after_ collapsing the list 103 | address hash and port are not included in the comparison - links containing href="#" are always ignored 104 | 105 | expandCollapseAll(theRootID,shouldExpand); 106 | theRootID = string: ID of root nest element, must be a UL or OL that has been collapsed using compactMenu 107 | shouldExpand = bool: says if it should expand all branches (true) or collapse all branches (false) 108 | Expands/collapses all branches in a collapsed list. Must not be used with auto-collapsing lists. 109 | 110 | My cookie script is available on http://www.howtocreate.co.uk/jslibs/ 111 | 113 | ____________________________________________________________________________________________________*/ 114 | var openLists = [], oIcount = 0; 115 | function compactMenu(oID,oAutoCol,oPlMn,oMinimalLink) { 116 | if( !document.getElementsByTagName || !document.childNodes || !document.createElement ) { return; } 117 | var baseElement = document.getElementById( oID ); if( !baseElement ) { return; } 118 | compactChildren( baseElement, 0, oID, oAutoCol, oPlMn, baseElement.tagName.toUpperCase(), oMinimalLink && oPlMn ); 119 | } 120 | function compactChildren( oOb, oLev, oBsID, oCol, oPM, oT, oML ) { 121 | if( !oLev ) { oBsID = escape(oBsID); if( oCol ) { openLists[oBsID] = []; } } 122 | for( var x = 0, y = oOb.childNodes; x < y.length; x++ ) { if( y[x].tagName ) { 123 | //for each immediate LI child 124 | var theNextUL = y[x].getElementsByTagName( oT )[0]; 125 | if( theNextUL ) { 126 | //collapse the first UL/OL child 127 | theNextUL.style.display = 'none'; 128 | //create a link for expanding/collapsing 129 | var newLink = document.createElement('A'); 130 | newLink.setAttribute( 'href', '#' ); 131 | newLink.onclick = new Function( 'clickSmack(this,' + oLev + ',\'' + oBsID + '\',' + oCol + ',\'' + escape(oT) + '\');return false;' ); 132 | //wrap everything upto the child U/OL in the link 133 | if( oML ) { var theHTML = ''; } else { 134 | var theT = y[x].innerHTML.toUpperCase().indexOf('<'+oT); 135 | var theA = y[x].innerHTML.toUpperCase().indexOf('= oLevel; x-=1 ) { if( openLists[oBsID][x] ) { 150 | openLists[oBsID][x].style.display = 'none'; if( oLevel != x ) { openLists[oBsID][x] = null; } 151 | } } 152 | if( oThisOb == openLists[oBsID][oLevel] ) { openLists[oBsID][oLevel] = null; } 153 | else { oThisOb.style.display = 'block'; openLists[oBsID][oLevel] = oThisOb; } 154 | } else { oThisOb.style.display = ( oThisOb.style.display == 'block' ) ? 'none' : 'block'; } 155 | } 156 | function stateToFromStr(oID,oFStr) { 157 | if( !document.getElementsByTagName || !document.childNodes || !document.createElement ) { return ''; } 158 | var baseElement = document.getElementById( oID ); if( !baseElement ) { return ''; } 159 | if( !oFStr && typeof(oFStr) != 'undefined' ) { return ''; } if( oFStr ) { oFStr = oFStr.split(':'); } 160 | for( var oStr = '', l = baseElement.getElementsByTagName(baseElement.tagName), x = 0; l[x]; x++ ) { 161 | if( oFStr && MWJisInTheArray( l[x].MWJuniqueID, oFStr ) && l[x].style.display == 'none' ) { l[x].parentNode.getElementsByTagName('a')[0].onclick(); } 162 | else if( l[x].style.display != 'none' ) { oStr += (oStr?':':'') + l[x].MWJuniqueID; } 163 | } 164 | return oStr; 165 | } 166 | function MWJisInTheArray(oNeed,oHay) { for( var i = 0; i < oHay.length; i++ ) { if( oNeed == oHay[i] ) { return true; } } return false; } 167 | function selfLink(oRootElement,oClass,oExpand,oLink) { 168 | var tmpLink; 169 | if(!document.getElementsByTagName||!document.childNodes) { return; } 170 | oRootElement = document.getElementById(oRootElement); 171 | if( oLink ) { 172 | tmpLink = document.createElement('a'); 173 | tmpLink.setAttribute('href',oLink); 174 | } 175 | for( var x = 0, y = oRootElement.getElementsByTagName('a'); y[x]; x++ ) { 176 | if( y[x].getAttribute('href') && !y[x].href.match(/#$/) && getRealAddress(y[x]) == getRealAddress(oLink?tmpLink:location) ) { 177 | y[x].className = (y[x].className?(y[x].className+' '):'') + oClass; 178 | if( oExpand ) { 179 | oExpand = false; 180 | for( var oEl = y[x].parentNode, ulStr = ''; oEl != oRootElement && oEl != document.body; oEl = oEl.parentNode ) { 181 | if( oEl.tagName && oEl.tagName == oRootElement.tagName ) { ulStr = oEl.MWJuniqueID + (ulStr?(':'+ulStr):''); } } 182 | stateToFromStr(oRootElement.id,ulStr); 183 | } } } } 184 | function getRealAddress(oOb) { return oOb.protocol + ( ( oOb.protocol.indexOf( ':' ) + 1 ) ? '' : ':' ) + oOb.hostname + ( ( typeof(oOb.pathname) == typeof(' ') && oOb.pathname.indexOf('/') != 0 ) ? '/' : '' ) + oOb.pathname + oOb.search; } 185 | function expandCollapseAll(oElID,oState) { 186 | if(!document.getElementsByTagName||!document.childNodes) { return; } 187 | var oEl = document.getElementById(oElID); 188 | var oT = oEl.tagName; 189 | var oULs = oEl.getElementsByTagName(oT); 190 | for( var i = 0, oLnk; i < oULs.length; i++ ) { 191 | if( typeof(oULs[i].MWJuniqueID) != 'undefined' ) { 192 | oLnk = oULs[i].parentNode.getElementsByTagName( 'a' )[0]; 193 | if( oLnk && ( ( oState && oULs[i].style.display == 'none' ) || ( !oState && oULs[i].style.display != 'none' ) ) ) { 194 | oLnk.onclick(); 195 | } } } } -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/data/DocumentStore.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.data 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import org.sameersingh.ervisualizer.Logging 5 | import nlp_serde.FileUtil 6 | import nlp_serde.readers.PerLineJsonReader 7 | 8 | import scala.collection.mutable 9 | import scala.collection.mutable.{ArrayBuffer, HashSet, HashMap} 10 | 11 | /** 12 | * @author sameer 13 | * @since 1/25/15. 14 | */ 15 | class DocumentStore { 16 | type Id = String 17 | val docMap = new HashMap[Id, nlp_serde.Document] 18 | val keywordsMap = new HashMap[String, HashSet[Id]] 19 | val topicsMap = new HashMap[String, HashSet[Id]] 20 | val entitiesMap = new HashMap[String, HashSet[Id]] 21 | 22 | val keywords = new HashSet[String] 23 | val entities = new HashSet[String] 24 | 25 | def numDocs = docMap.size 26 | 27 | def +=(d: nlp_serde.Document): nlp_serde.Document = { 28 | docMap.getOrElseUpdate(d.id, d) 29 | // touch to instantiate maps 30 | d.mentions 31 | d.entity 32 | d.sentences.foreach(s => { 33 | s.depTree = None 34 | s.parseTree = None 35 | s.tokens.foreach(t => { 36 | t.pos = None 37 | t.ner = None 38 | }) 39 | }) 40 | d 41 | } 42 | 43 | def apply(id: Id) = docMap(id) 44 | 45 | def get(id: Id) = docMap.get(id) 46 | 47 | def addKeywords(d: nlp_serde.Document): Unit = { 48 | this += d 49 | for (s <- d.sentences; t <- s.tokens; lemma <- t.lemma; key = lemma.toLowerCase; if (keywords(key))) { 50 | keywordsMap.getOrElseUpdate(key, new HashSet[Id]) += d.id 51 | } 52 | } 53 | 54 | def addEntities(doc: nlp_serde.Document): Unit = { 55 | this += doc 56 | for (e <- doc.entities; if (!e.freebaseIds.isEmpty); key = FreebaseReader.convertFbIdToId(e.freebaseIds.maxBy(_._2)._1); if (entities(key))) { 57 | entitiesMap.getOrElseUpdate(key, new HashSet[Id]) += doc.id 58 | } 59 | } 60 | 61 | def addTopic(d: nlp_serde.Document, word: String): Unit = { 62 | this += d 63 | topicsMap.getOrElseUpdate(word.toLowerCase, new HashSet[Id]) += d.id 64 | } 65 | 66 | def query(queryString: String): Iterable[Id] = { 67 | var results: HashSet[Id] = null 68 | if(queryString.isEmpty) return docMap.keys 69 | for(q <- queryString.split("\\s")) { 70 | val ids = if(q.startsWith("topic:")) { 71 | topicsMap.getOrElse(q.drop(6), Set.empty[Id]) 72 | } else if(q.startsWith("ent:")) { 73 | entitiesMap.getOrElse(q.drop(4).replaceAll("_", " "), Set.empty[Id]) 74 | } else { 75 | keywordsMap.getOrElse(q, Set.empty[Id]) 76 | } 77 | if(results == null) { 78 | results = new mutable.HashSet[Id]() 79 | results ++= ids 80 | } else { 81 | results.retain((i:Id) => ids(i)) 82 | } 83 | } 84 | if(results == null) { 85 | results = new mutable.HashSet[Id]() 86 | } 87 | results 88 | } 89 | } 90 | 91 | object DocumentStore extends Logging { 92 | val stopWords = HashSet("a", "able", "about", "above", "according", "accordingly", "across", "actually", "after", "afterwards", "again", "against", "all", "allow", "allows", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "an", "and", "another", "any", "anybody", "anyhow", "anyone", "anything", "anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "are", "around", "as", "aside", "ask", "asking", "associated", "at", "available", "away", "awfully", "b", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both", "brief", "but", "by", "c", "came", "can", "cannot", "cant", "cause", "causes", "certain", "certainly", "changes", "clearly", "co", "com", "come", "comes", "concerning", "consequently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "course", "currently", "d", "definitely", "described", "despite", "did", "different", "do", "does", "doing", "done", "down", "downwards", "during", "e", "each", "edu", "eg", "eight", "either", "else", "elsewhere", "enough", "entirely", "especially", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "f", "far", "few", "fifth", "first", "five", "followed", "following", "follows", "for", "former", "formerly", "forth", "four", "from", "further", "furthermore", "g", "get", "gets", "getting", "given", "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings", "h", "had", "happens", "hardly", "has", "have", "having", "he", "hello", "help", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "hi", "him", "himself", "his", "hither", "hopefully", "how", "howbeit", "however", "i", "ie", "if", "ignored", "immediate", "in", "inasmuch", "inc", "indeed", "indicate", "indicated", "indicates", "inner", "insofar", "instead", "into", "inward", "is", "it", "its", "itself", "j", "just", "k", "keep", "keeps", "kept", "know", "knows", "known", "l", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "like", "liked", "likely", "little", "look", "looking", "looks", "ltd", "m", "mainly", "many", "may", "maybe", "me", "mean", "meanwhile", "merely", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "n", "name", "namely", "nd", "near", "nearly", "necessary", "need", "needs", "neither", "never", "nevertheless", "new", "next", "nine", "no", "nobody", "non", "none", "noone", "nor", "normally", "not", "nothing", "novel", "now", "nowhere", "o", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "overall", "own", "p", "particular", "particularly", "per", "perhaps", "placed", "please", "plus", "possible", "presumably", "probably", "provides", "q", "que", "quite", "qv", "r", "rather", "rd", "re", "really", "reasonably", "regarding", "regardless", "regards", "relatively", "respectively", "right", "s", "said", "same", "saw", "say", "saying", "says", "second", "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "she", "should", "since", "six", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specified", "specify", "specifying", "still", "sub", "such", "sup", "sure", "t", "take", "taken", "tell", "tends", "th", "than", "thank", "thanks", "thanx", "that", "thats", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "theres", "thereupon", "these", "they", "think", "third", "this", "thorough", "thoroughly", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "took", "toward", "towards", "tried", "tries", "truly", "try", "trying", "twice", "two", "u", "un", "under", "unfortunately", "unless", "unlikely", "until", "unto", "up", "upon", "us", "use", "used", "useful", "uses", "using", "usually", "uucp", "v", "value", "various", "very", "via", "viz", "vs", "w", "want", "wants", "was", "way", "we", "welcome", "well", "went", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "willing", "wish", "with", "within", "without", "wonder", "would", "would", "x", "y", "yes", "yet", "you", "your", "yours", "yourself", "yourselves", "z", "zero") 93 | 94 | def readTopics(dir: String, typ: String): Map[String, Int] = { 95 | val result = new mutable.HashMap[String, Int]() 96 | val file = dir + "iai/" + typ + "/id_topics.tsv" 97 | for(l <- io.Source.fromFile(file).getLines()) { 98 | val split = l.split("\t") 99 | assert(split.length == 2) 100 | result(split(0)) = split(1).toInt 101 | } 102 | result.toMap 103 | } 104 | 105 | def readWords(file: String, min: Int = 20): Iterable[String] = { 106 | val s = FileUtil.inputSource(file, true) 107 | val result = new ArrayBuffer[String]() 108 | for(l <- s.getLines()) { 109 | val split = l.split("\t") 110 | assert(split.length == 2) 111 | val (w,c) = split(0) -> split(1).toInt 112 | if(c > min && !stopWords(w)) 113 | result += w 114 | } 115 | s.close() 116 | result 117 | } 118 | 119 | def readDocs(store: DocumentStore, dir: String, docsFile: String): Unit = { 120 | logger.info("Reading counts") 121 | store.keywords ++= readWords(dir + "/wcounts.txt.gz", 0) 122 | store.entities ++= readWords(dir + "/ecounts.txt.gz", 0).map(mid => FreebaseReader.convertFbIdToId(mid)) 123 | logger.info(" # words : " + store.keywords.size) 124 | logger.info(" # entities : " + store.entities.size) 125 | logger.info("Reading title topics") 126 | //val titleTopics = readTopics(dir, "title") 127 | logger.info("Reading content topics") 128 | //val contentTopics = readTopics(dir, "content") 129 | logger.info("Reading documents") 130 | val docsPath = dir + "/" + docsFile 131 | val dotEvery = 100 132 | val lineEvery = 1000 133 | var docIdx = 0 134 | for(doc <- new PerLineJsonReader().read(docsPath)) { 135 | store += doc 136 | // add topics 137 | //titleTopics.get(doc.id).foreach(t => store.addTopic(doc, "title" + t)) 138 | //contentTopics.get(doc.id).foreach(t => store.addTopic(doc, "content" + t)) 139 | // add entities 140 | store.addEntities(doc) 141 | // add words 142 | store.addKeywords(doc) 143 | docIdx += 1 144 | if(docIdx % dotEvery == 0) print(".") 145 | if(docIdx % lineEvery == 0) println(": read " + docIdx + " docs, " + store.keywords.size + " words, " 146 | + store.topicsMap.size + " topics, " + store.entitiesMap.size + " entities.") 147 | } 148 | logger.info("Done.") 149 | logger.info("Entities: " + store.entitiesMap.take(10).map(_._1).mkString(", ")) 150 | logger.info("Words: " + store.keywordsMap.take(10).map(_._1).mkString(", ")) 151 | logger.info("Topics: " + store.topicsMap.take(10).map(_._1).mkString(", ")) 152 | } 153 | } 154 | 155 | object WordCounts { 156 | def main(args: Array[String]): Unit = { 157 | val baseDir = ConfigFactory.load().getString("nlp.data.baseDir") 158 | val docsFile = ConfigFactory.load().getString("nlp.data.docsFile") 159 | val docsPath = baseDir + "/" + docsFile 160 | val wcounts = new mutable.HashMap[String, Int]() 161 | val ecounts = new mutable.HashMap[String, Int]() 162 | val dotEvery = 100 163 | val lineEvery = 1000 164 | var docIdx = 0 165 | for(d <- new PerLineJsonReader().read(docsPath)) { 166 | for (s <- d.sentences; t <- s.tokens; lemma <- t.lemma; key = lemma.toLowerCase) { 167 | wcounts(key) = 1 + wcounts.getOrElse(key, 0) 168 | } 169 | for (e <- d.entities; if (!e.freebaseIds.isEmpty); key = e.freebaseIds.maxBy(_._2)._1) { 170 | ecounts(key) = 1 + ecounts.getOrElseUpdate(key, 0) 171 | } 172 | docIdx += 1 173 | if(docIdx % dotEvery == 0) print(".") 174 | if(docIdx % lineEvery == 0) println(": read " + docIdx + " docs, " + wcounts.size + " words, " + ecounts.size + " entities.") 175 | } 176 | val ww = FileUtil.writer(baseDir + "/wcounts.txt.gz", true) 177 | for((word,c) <- wcounts.toSeq.sortBy(-_._2)) { 178 | ww.println(word + "\t" + c) 179 | } 180 | ww.flush() 181 | ww.close 182 | val ew = FileUtil.writer(baseDir + "/ecounts.txt.gz", true) 183 | for((ent,c) <- ecounts.toSeq.sortBy(-_._2)) { 184 | ew.println(ent + "\t" + c) 185 | } 186 | ew.flush() 187 | ew.close 188 | } 189 | } -------------------------------------------------------------------------------- /test/org/sameersingh/ervisualizer/data/InMemoryDBTest.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.data 2 | 3 | import org.junit.Test 4 | import scala.collection.mutable 5 | import java.io.File 6 | 7 | /** 8 | * @author sameer 9 | * @since 6/12/14. 10 | */ 11 | class InMemoryDBTest { 12 | 13 | def initDB: DB = { 14 | val db = new InMemoryDB 15 | // docs 16 | val doc1Id = "DOC_000" 17 | db._documents(doc1Id) = Document(doc1Id, "", "", "", "Obama is awesome. Yes, he is married to Michelle. He is a resident of USA, same as George W. and Michelle.", 18 | Seq(Sentence(doc1Id, 0, "Obama is awesome."), 19 | Sentence(doc1Id, 1, "Yes, he is married to Michelle."), 20 | Sentence(doc1Id, 2, "He is a resident of USA, same as George W. and Michelle."))) 21 | 22 | // entities 23 | val barackId = "BarackObama" 24 | db._entityIds += barackId 25 | db._entityHeader(barackId) = EntityHeader(barackId, "Barack Obama", "PER", 1.0) 26 | db._entityInfo(barackId) = EntityInfo(barackId, Map("/mid" -> "/m/02mjmr", 27 | "/common/topic/description" -> 28 | "Barack Hussein Obama II is the 44th and current President of the United States, and the first African American to hold the office. Born in Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he served as president of the Harvard Law Review. He was a community organizer in Chicago before earning his law degree. He worked as a civil rights attorney and taught constitutional law at the University of Chicago Law School from 1992 to 2004. He served three terms representing the 13th District in the Illinois Senate from 1997 to 2004, running unsuccessfully for the United States House of Representatives in 2000. In 2004, Obama received national attention during his campaign to represent Illinois in the United States Senate with his victory in the March Democratic Party primary, his keynote address at the Democratic National Convention in July, and his election to the Senate in November. He began his presidential campaign in 2007 and, after a close primary campaign against Hillary Rodham Clinton in 2008, he won sufficient delegates in the Democratic Party primaries to receive the presidential nomination.", 29 | "/common/topic/image" -> "/m/02nqg_h")) 30 | db._entityFreebase(barackId) = EntityFreebase(barackId, Seq("US President")) 31 | val michelleId = "MichelleObama" 32 | db._entityIds += michelleId 33 | db._entityHeader(michelleId) = EntityHeader(michelleId, "Michelle Obama", "PER", 0.5) 34 | db._entityInfo(michelleId) = EntityInfo(michelleId, Map("/mid" -> "/m/025s5v9", 35 | "/common/topic/description" -> 36 | "Michelle LaVaughn Robinson Obama, an American lawyer and writer, is the wife of the 44th and current President of the United States, Barack Obama, and the first African-American First Lady of the United States. Raised on the South Side of Chicago, she is a graduate of Princeton University and Harvard Law School, and spent the early part of her legal career working at the law firm Sidley Austin, where she met her future husband. Subsequently, she worked as part of the staff of Chicago mayor Richard M. Daley, and for the University of Chicago Medical Center. Throughout 2007 and 2008, she helped campaign for her husband's presidential bid. She delivered a keynote address at the 2008 Democratic National Convention and also spoke at the 2012 Democratic National Convention. She is the mother of daughters Malia and Natasha. As the wife of a Senator, and later the First Lady, she has become a fashion icon and role model for women, and an advocate for poverty awareness, nutrition, and healthy eating.", 37 | "/common/topic/image" -> "/m/04s8ccw")) 38 | db._entityFreebase(michelleId) = EntityFreebase(michelleId, Seq("Celebrity")) 39 | val georgeId = "GeorgeBush" 40 | db._entityIds += georgeId 41 | db._entityHeader(georgeId) = EntityHeader(georgeId, "George W. Bush", "PER", 0.75) 42 | db._entityInfo(georgeId) = EntityInfo(georgeId, Map("/mid" -> "/m/09b6zr", 43 | "/common/topic/description" -> 44 | "George Walker Bush is an American politician and businessman who served as the 43rd President of the United States from 2001 to 2009, and the 46th Governor of Texas from 1995 to 2000. The eldest son of Barbara and George H. W. Bush, he was born in New Haven, Connecticut. After graduating from Yale University in 1968 and Harvard Business School in 1975, Bush worked in oil businesses. He married Laura Welch in 1977 and ran unsuccessfully for the House of Representatives shortly thereafter. He later co-owned the Texas Rangers baseball team before defeating Ann Richards in the 1994 Texas gubernatorial election. Bush was elected president in 2000 after a close and controversial election, becoming the fourth president to be elected while receiving fewer popular votes nationwide than his opponent. Bush is the second president to have been the son of a former president, the first being John Quincy Adams. He is also the brother of Jeb Bush, former Governor of Florida. Eight months into Bush's first term as president, the September 11, 2001 terrorist attacks occurred.", 45 | "/common/topic/image" -> "/m/02bs94j")) 46 | db._entityFreebase(georgeId) = EntityFreebase(georgeId, Seq("US President")) 47 | val usaId = "USA" 48 | db._entityIds += usaId 49 | db._entityHeader(usaId) = EntityHeader(usaId, "United State of America", "LOC", 0.8) 50 | db._entityInfo(usaId) = EntityInfo(usaId, Map("/mid" -> "/m/09c7w0", 51 | "/common/topic/description" -> 52 | "The United States of America, commonly referred to as the United States, America, and sometimes the States, is a federal republic consisting of 50 states and a federal district. The 48 contiguous states and Washington, D.C., are in central North America between Canada and Mexico. The state of Alaska is the northwestern part of North America and the state of Hawaii is an archipelago in the mid-Pacific. The country also has five populated and nine unpopulated territories in the Pacific and the Caribbean. At 3.71 million square miles and with around 318 million people, the United States is the world's third or fourth-largest country by total area and third-largest by population. It is one of the world's most ethnically diverse and multicultural nations, the product of large-scale immigration from many countries. The geography and climate of the United States is also extremely diverse, and it is home to a wide variety of wildlife. Paleo-Indians migrated from Eurasia to what is now the U.S. mainland around 15,000 years ago, with European colonization beginning in the 16th century. The United States emerged from 13 British colonies located along the Atlantic seaboard.", 53 | "/common/topic/image" -> "/m/02nbh90")) 54 | db._entityFreebase(usaId) = EntityFreebase(usaId, Seq("Country")) 55 | 56 | // entity text provenances 57 | db._entityText(barackId) = EntityText(barackId, Seq(Provenance(doc1Id, 0, Seq(0 -> 5)), Provenance(doc1Id, 1, Seq(5 -> 7)), Provenance(doc1Id, 2, Seq(0 -> 2)))) 58 | db._entityText(michelleId) = EntityText(michelleId, Seq(Provenance(doc1Id, 1, Seq(22 -> 30)), Provenance(doc1Id, 2, Seq(47 -> 55)))) 59 | db._entityText(georgeId) = EntityText(georgeId, Seq(Provenance(doc1Id, 2, Seq(33 -> 42)))) 60 | db._entityText(usaId) = EntityText(usaId, Seq(Provenance(doc1Id, 2, Seq(20 -> 23)))) 61 | 62 | // entity type provenances 63 | db._entityTypePredictions(barackId) = Seq("person") 64 | db._entityTypeProvenances.getOrElseUpdate(barackId, new mutable.HashMap)("person") = 65 | TypeModelProvenances(barackId, "person", Seq(Provenance(doc1Id, 1, Seq(5 -> 7)))) 66 | db._entityTypePredictions(michelleId) = Seq("person") 67 | db._entityTypeProvenances.getOrElseUpdate(michelleId, new mutable.HashMap)("person") = 68 | TypeModelProvenances(michelleId, "person", Seq(Provenance(doc1Id, 1, Seq(22 -> 30)), Provenance(doc1Id, 2, Seq(47 -> 55)))) 69 | db._entityTypePredictions(georgeId) = Seq("person") 70 | db._entityTypeProvenances.getOrElseUpdate(georgeId, new mutable.HashMap)("person") = 71 | TypeModelProvenances(georgeId, "person", Seq(Provenance(doc1Id, 2, Seq(32 -> 41)))) 72 | db._entityTypePredictions(usaId) = Seq("country") 73 | db._entityTypeProvenances.getOrElseUpdate(usaId, new mutable.HashMap)("country") = 74 | TypeModelProvenances(usaId, "country", Seq(Provenance(doc1Id, 2, Seq(20 -> 23)))) 75 | 76 | // relations 77 | val barackMichelleId = barackId -> michelleId 78 | db._relationIds += barackMichelleId 79 | db._relationHeader(barackMichelleId) = RelationHeader(barackMichelleId._1, barackMichelleId._2, 0.25) 80 | db._relationFreebase(barackMichelleId) = RelationFreebase(barackMichelleId._1, barackMichelleId._2, Seq("/people/person/spouse")) 81 | val georgeUSAId = georgeId -> usaId 82 | db._relationIds += georgeUSAId 83 | db._relationHeader(georgeUSAId) = RelationHeader(georgeUSAId._1, georgeUSAId._2, 1.0) 84 | db._relationFreebase(georgeUSAId) = RelationFreebase(georgeUSAId._1, georgeUSAId._2, Seq("/location/president")) 85 | val barackUSAId = barackId -> usaId 86 | db._relationIds += barackUSAId 87 | db._relationHeader(barackUSAId) = RelationHeader(barackUSAId._1, barackUSAId._2, 0.75) 88 | db._relationFreebase(barackUSAId) = RelationFreebase(barackUSAId._1, barackUSAId._2, Seq("/location/president")) 89 | val michelleUSAId = michelleId -> usaId 90 | db._relationIds += michelleUSAId 91 | db._relationHeader(michelleUSAId) = RelationHeader(michelleUSAId._1, michelleUSAId._2, 0.25) 92 | db._relationFreebase(michelleUSAId) = RelationFreebase(michelleUSAId._1, michelleUSAId._2, Seq("/location/citizen")) 93 | 94 | // relations text provenances 95 | db._relationText(barackMichelleId) = RelationText(barackMichelleId._1, barackMichelleId._2, Seq(Provenance(doc1Id, 1, Seq(5 -> 7, 22 -> 30)))) 96 | db._relationText(georgeUSAId) = RelationText(barackMichelleId._1, barackMichelleId._2, Seq(Provenance(doc1Id, 2, Seq(33 -> 42, 20 -> 23)))) 97 | db._relationText(barackUSAId) = RelationText(barackMichelleId._1, barackMichelleId._2, Seq(Provenance(doc1Id, 2, Seq(0 -> 2, 20 -> 23)))) 98 | db._relationText(michelleUSAId) = RelationText(michelleUSAId._1, michelleUSAId._2, Seq(Provenance(doc1Id, 2, Seq(47 -> 55, 20 -> 23)))) 99 | 100 | // relations type provenances 101 | db._relationPredictions(barackMichelleId) = Seq("spouse") 102 | db._relationProvenances.getOrElseUpdate(barackMichelleId, new mutable.HashMap)("spouse") = 103 | RelModelProvenances(barackMichelleId._1, barackMichelleId._2, "spouse", Seq(Provenance(doc1Id, 1, Seq(5 -> 7, 22 -> 30)))) 104 | 105 | db._relationPredictions(georgeUSAId) = Seq("citizen") 106 | db._relationProvenances.getOrElseUpdate(georgeUSAId, new mutable.HashMap)("citizen") = 107 | RelModelProvenances(georgeUSAId._1, georgeUSAId._2, "citizen", Seq(Provenance(doc1Id, 2, Seq(33 -> 42, 20 -> 23)))) 108 | 109 | db._relationPredictions(barackUSAId) = Seq("citizen") 110 | db._relationProvenances.getOrElseUpdate(barackUSAId, new mutable.HashMap)("citizen") = 111 | RelModelProvenances(barackUSAId._1, barackUSAId._2, "citizen", Seq(Provenance(doc1Id, 2, Seq(0 -> 2, 20 -> 23)))) 112 | 113 | db._relationPredictions(michelleUSAId) = Seq("citizen") 114 | db._relationProvenances.getOrElseUpdate(michelleUSAId, new mutable.HashMap)("citizen") = 115 | RelModelProvenances(michelleUSAId._1, michelleUSAId._2, "citizen", Seq(Provenance(doc1Id, 2, Seq(47 -> 55, 20 -> 23)))) 116 | 117 | db 118 | } 119 | 120 | def writeFiles(db: DB): String = { 121 | val f = File.createTempFile("ervisualizer.data", (System.currentTimeMillis() % 1000).toString) 122 | if (f.exists() && f.isFile) f.delete() 123 | f.mkdirs() 124 | InMemoryDB.writeDB(f.getCanonicalPath, db) 125 | f.getCanonicalPath 126 | } 127 | 128 | @Test 129 | def testAll() { 130 | val db = initDB 131 | println(db) 132 | val dir = writeFiles(db) 133 | println("dir: " + dir) 134 | val ndb = InMemoryDB.readFromTSV(dir) 135 | println(ndb) 136 | } 137 | 138 | } -------------------------------------------------------------------------------- /app/org/sameersingh/ervisualizer/nlp/ReadProcessedDocs.scala: -------------------------------------------------------------------------------- 1 | package org.sameersingh.ervisualizer.nlp 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import org.sameersingh.ervisualizer.data._ 5 | import scala.collection.mutable.ArrayBuffer 6 | import scala.collection.mutable 7 | import scala.Some 8 | import org.sameersingh.ervisualizer.data.Sentence 9 | import org.sameersingh.ervisualizer.data.Document 10 | import java.io.{FileOutputStream, OutputStreamWriter, PrintWriter} 11 | import org.sameersingh.ervisualizer.freebase.MongoIO 12 | import play.api.libs.json.Json 13 | 14 | /** 15 | * @author sameer 16 | * @since 7/15/14. 17 | */ 18 | /* 19 | class ReadProcessedDocs(val baseDir: String, val filelist: String) { 20 | 21 | val reader = new ReadD2DDocs(baseDir) 22 | 23 | def sentences(fid: String): Seq[String] = { 24 | val sents = io.Source.fromFile(baseDir + "/processed/%s.sent" format (fid), "UTF-8") 25 | val strings = new ArrayBuffer[String] 26 | for (s <- sents.getLines()) { 27 | strings += s 28 | } 29 | sents.close() 30 | strings 31 | } 32 | 33 | var errors = 0 34 | 35 | class Mention(val docId: String, 36 | val sentId: Int, 37 | val toks: ArrayBuffer[String] = new ArrayBuffer, 38 | var nerTag: Option[String] = None, 39 | var wiki: Option[(String, String, Int)] = None, 40 | val figerTypes: mutable.HashSet[String] = new mutable.HashSet) { 41 | override def toString: String = 42 | "%s %d:\t%s\t%s\t%s\t%s" format(docId, sentId, toks.mkString(", "), nerTag, wiki, figerTypes.mkString(", ")) 43 | 44 | def provenance(s: Sentence): Option[Provenance] = { 45 | assert(s.sentId == sentId) 46 | assert(s.docId == docId) 47 | val searchString = { 48 | var init = toks.mkString(" ").replaceAll(" '", "'").replaceAll("\\\\/", "/") 49 | if(init.contains("Defense") && s.string.contains("Defence")) init = init.replaceAll("Defense", "Defence") 50 | if(init.contains("Defence") && s.string.contains("Defense")) init = init.replaceAll("Defence", "Defense") 51 | if(init.contains("defense") && s.string.contains("defence")) init = init.replaceAll("defense", "defence") 52 | if(init.contains("defence") && s.string.contains("defense")) init = init.replaceAll("defence", "defense") 53 | if(init.contains("rganization") && s.string.contains("rganisation")) init = init.replaceAll("rganization", "rganisation") 54 | if(init.contains("rganisation") && s.string.contains("rganization")) init = init.replaceAll("rganisation", "rganization") 55 | if(init.contains("Labour") && s.string.contains("Labor")) init = init.replaceAll("Labour", "Labor") 56 | if(init.contains("Labor") && s.string.contains("Labour")) init = init.replaceAll("Labor", "Labour") 57 | if(init.contains(". ") && !s.string.contains(init)) init = init.replaceAll("\\. ", ".") 58 | if(init.contains(" ,") && !s.string.contains(init)) init = init.replaceAll(" ,", ",") 59 | init 60 | } 61 | val start = s.string.indexOf(searchString) 62 | if (start < 0) { 63 | errors += 1 64 | println("Cannot find {%s} in {%s}" format(searchString, s.string)) 65 | None 66 | } else { 67 | val end = start + searchString.length 68 | Some(Provenance(s.docId, s.sentId, Seq(start -> end))) 69 | } 70 | } 71 | } 72 | 73 | def readMentions(fid: String): Seq[Mention] = { 74 | val mentions = new ArrayBuffer[Mention] 75 | var currentSentId = 0 76 | var currentTokId = 0 77 | var currentMention: Mention = null 78 | 79 | def endCurrentMention() { 80 | if (currentMention != null) mentions += currentMention 81 | currentMention = null 82 | } 83 | 84 | def beginNewMention() { 85 | assert(currentMention == null) 86 | currentMention = new Mention(fid, currentSentId) 87 | } 88 | 89 | def addToCurrMention(tok: String, seg: String, figer: Seq[String], wiki: (String, String, Int), ner: String) { 90 | if (seg == "O") { 91 | endCurrentMention() 92 | return 93 | } 94 | if (seg.startsWith("B-")) { 95 | endCurrentMention() 96 | beginNewMention() 97 | } 98 | if (seg.startsWith("I-")) { 99 | assert(currentMention != null) 100 | } 101 | currentMention.toks += tok 102 | assert(currentMention.nerTag.forall(_ == ner)) 103 | if (ner != "O") currentMention.nerTag = Some(ner) 104 | assert(currentMention.wiki.forall(_ == wiki)) 105 | if (wiki._1 != "O") currentMention.wiki = Some(wiki) 106 | assert(currentMention.figerTypes.isEmpty || currentMention.figerTypes.toSet == figer.toSet, "Figer mismatch: " + currentMention.figerTypes.mkString(",") + ", new: " + figer.mkString(",")) 107 | currentMention.figerTypes ++= figer 108 | } 109 | 110 | val figerIter = io.Source.fromFile(baseDir + "/processed/%s.figer" format (fid), "UTF-8").getLines() 111 | val segsIter = io.Source.fromFile(baseDir + "/processed/%s.segment" format (fid), "UTF-8").getLines() 112 | val wikiIter = io.Source.fromFile(baseDir + "/processed/%s.wiki" format (fid), "UTF-8").getLines() 113 | 114 | while (figerIter.hasNext && segsIter.hasNext && wikiIter.hasNext) { 115 | val figer = figerIter.next().split("\t") 116 | val segs = segsIter.next().split("\t") 117 | val wiki = wikiIter.next().split("\t") 118 | if (figer.length == 1) { 119 | // end of sentence 120 | assert(segs.length == 1) 121 | assert(wiki.length == 1) 122 | assert(figer(0).isEmpty) 123 | assert(segs(0).isEmpty) 124 | assert(wiki(0).isEmpty) 125 | // end of mention? 126 | endCurrentMention() 127 | currentTokId = 0 128 | currentSentId += 1 129 | } else { 130 | val tok = figer(0) 131 | assert(tok == segs(0) && tok == wiki(0)) 132 | val seg = segs(1) 133 | val ner = segs(2) 134 | val figerStr = figer(1) 135 | val figerList: Seq[String] = if (figerStr == "O") Seq.empty else figerStr.drop(2).split(",").map(_.drop(1).replaceAll("\\/", ":")).toSeq 136 | val wikiTriplet = if (wiki(1) == "O") ("O", "", 0) else (wiki(1), wiki(2), wiki(3).toInt) 137 | addToCurrMention(tok, seg, figerList, wikiTriplet, ner) 138 | currentTokId += 1 139 | } 140 | } 141 | assert(!figerIter.hasNext && !segsIter.hasNext && !wikiIter.hasNext) 142 | // end of sentence 143 | endCurrentMention() 144 | mentions 145 | } 146 | 147 | class EntityInfo { 148 | val mentions = new mutable.HashMap[String, ArrayBuffer[Mention]] 149 | val sentences = new mutable.HashMap[(String, Int), mutable.HashSet[String]] 150 | 151 | def +=(m: Mention) { 152 | val mid = m.wiki.get._1.drop(1).replaceFirst("/", "_") 153 | mentions.getOrElseUpdate(mid, new ArrayBuffer) += m 154 | sentences.getOrElseUpdate(m.docId -> m.sentId, new mutable.HashSet) += mid 155 | } 156 | } 157 | 158 | def assimilateMentions(mentions: Seq[Mention], einfo: EntityInfo) { 159 | for (m <- mentions) { 160 | if (m.wiki.isDefined && m.wiki.get._1 != "null") { 161 | einfo += m 162 | } 163 | } 164 | } 165 | 166 | def entityToDB(mid: String, mentions: Seq[Mention], db: InMemoryDB, maxMentions: Double) { 167 | // TODO get types from mongo, and then filter mentions according to that, then continue if mentions.isNotEmpty 168 | db._entityIds += mid 169 | // header 170 | val name = mentions.head.wiki.get._2 171 | val nerTag = mentions.map(_.nerTag.get).groupBy(x => x).map(p => p._1 -> p._2.size).toSeq.sortBy(-_._2).head._1 172 | // normalize popularity? 173 | db._entityHeader(mid) = EntityHeader(mid, name, nerTag, mentions.size / maxMentions) 174 | // info 175 | // empty for now, filled in later 176 | db._entityInfo(mid) = EntityInfo(mid, Map.empty) 177 | // freebase 178 | // empty for now, filled in later 179 | db._entityFreebase(mid) = EntityFreebase(mid, Seq.empty) 180 | // text provenances 181 | val provenances = mentions.map(m => m.provenance(db.document(m.docId).sents(m.sentId))) 182 | val distinctProvenances = provenances.flatten.distinct 183 | db._entityText(mid) = EntityText(mid, distinctProvenances) 184 | for(p <- distinctProvenances) { 185 | assert(p.tokPos.length == 1) 186 | val map = db._docEntityProvenances.getOrElseUpdate(p.docId -> p.sentId, new mutable.HashMap) 187 | map(mid) = map.getOrElse(mid, Seq.empty) ++ Seq(p) 188 | } 189 | // figer provenances 190 | val figerTypes = new mutable.LinkedHashSet[String] 191 | mentions.foreach(figerTypes ++= _.figerTypes) 192 | db._entityTypePredictions(mid) = figerTypes.toSeq 193 | db._entityTypeProvenances.getOrElseUpdate(mid, new mutable.HashMap) ++= mentions 194 | .zip(provenances) 195 | .map({ 196 | case (m, p) => { 197 | m.figerTypes.map(ft => ft ->(m, p)).toSeq 198 | } 199 | }).flatten 200 | .groupBy(_._1) 201 | .map({ 202 | case (s, v) => s -> v.map(_._2) 203 | }).map({ 204 | case (s, mps) => s -> TypeModelProvenances(mid, s, mps.map(_._2).flatten.distinct) 205 | }) 206 | } 207 | 208 | def readDoc(fid: String, path: String, db: InMemoryDB, einfo: EntityInfo) { 209 | //println("--- doc: " + fid + " ---") 210 | val origName = "Nigeria/%s/stories/%s" format(path, fid) 211 | // read document 212 | val ddoc = reader.readDoc(fid, origName) 213 | // read sentences 214 | val sentStrings = sentences(fid) 215 | val doc = Document(fid, origName, ddoc.title, ddoc.cite, ddoc.text, sentStrings.zipWithIndex.map(si => Sentence(fid, si._2, si._1))) 216 | db._documents(doc.docId) = doc 217 | // read mentions 218 | val mentions = readMentions(fid) 219 | assimilateMentions(mentions, einfo) 220 | } 221 | 222 | def readAllDocs: (DB, EntityInfo) = { 223 | val db = new InMemoryDB 224 | val einfo = new EntityInfo 225 | val fileList = io.Source.fromFile(baseDir + "/" + filelist, "UTF-8") 226 | var numRead = 0 227 | for (line <- fileList.getLines()) { 228 | val split = line.split("\t") 229 | val fid = split(0).dropRight(4) 230 | val path = split(1) 231 | readDoc(fid, path, db, einfo) 232 | numRead += 1 233 | if (numRead % 79 == 0) print(".") 234 | } 235 | println() 236 | fileList.close() 237 | println(einfo.mentions.size + ": " + einfo.mentions.toSeq.sortBy(-_._2.size).take(10).map({ 238 | case (k, v) => k + "(" + v.size + ")" 239 | }).mkString(",")) 240 | // add entities to DB 241 | val maxMentions = einfo.mentions.map(_._2.size).max.toDouble 242 | for ((mid, ms) <- einfo.mentions) { 243 | entityToDB(mid, ms, db, maxMentions) 244 | } 245 | println("Num of errors: " + errors) 246 | (db, einfo) 247 | } 248 | } 249 | 250 | 251 | object ReadProcessedDocs extends App { 252 | val baseDir = ConfigFactory.load().getString("nlp.data.baseDir") 253 | val filelist = ConfigFactory.load().getString("nlp.data.filelist") 254 | val reader = new ReadProcessedDocs(baseDir, filelist) 255 | val db = reader.readAllDocs 256 | /* 257 | println("Writing mids") 258 | val entityIdWriter = new PrintWriter(baseDir + "/d2d.mids") 259 | for(mid <- db._1.entityIds) { 260 | entityIdWriter.println(mid) 261 | } 262 | entityIdWriter.flush() 263 | entityIdWriter.close() 264 | */ 265 | println("Starting Mongo") 266 | val mongo = new MongoIO("localhost", 27017) 267 | mongo.updateDB(db._1.asInstanceOf[InMemoryDB]) 268 | println("Writing Mongo") 269 | val entityHeaderWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(baseDir + "/d2d.ent.head"), "UTF-8")) 270 | val entityInfoWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(baseDir + "/d2d.ent.info"), "UTF-8")) 271 | val entityFreebaseWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(baseDir + "/d2d.ent.freebase"), "UTF-8")) 272 | import org.sameersingh.ervisualizer.data.JsonWrites._ 273 | for(mid <- db._1.entityIds) { 274 | entityHeaderWriter.println(Json.toJson(db._1.entityHeader(mid))) 275 | entityInfoWriter.println(Json.toJson(db._1.entityInfo(mid))) 276 | entityFreebaseWriter.println(Json.toJson(db._1.entityFreebase(mid))) 277 | } 278 | entityHeaderWriter.flush() 279 | entityHeaderWriter.close() 280 | entityInfoWriter.flush() 281 | entityInfoWriter.close() 282 | entityFreebaseWriter.flush() 283 | entityFreebaseWriter.close() 284 | //println(db) 285 | } 286 | */ -------------------------------------------------------------------------------- /public/javascripts/d3/d3.layout.cloud.js: -------------------------------------------------------------------------------- 1 | // Word cloud layout by Jason Davies, http://www.jasondavies.com/word-cloud/ 2 | // Algorithm due to Jonathan Feinberg, http://static.mrfeinberg.com/bv_ch03.pdf 3 | (function() { 4 | function cloud() { 5 | var size = [256, 256], 6 | text = cloudText, 7 | font = cloudFont, 8 | fontSize = cloudFontSize, 9 | fontStyle = cloudFontNormal, 10 | fontWeight = cloudFontNormal, 11 | rotate = cloudRotate, 12 | padding = cloudPadding, 13 | spiral = archimedeanSpiral, 14 | words = [], 15 | timeInterval = Infinity, 16 | event = d3.dispatch("word", "end"), 17 | timer = null, 18 | cloud = {}; 19 | 20 | cloud.start = function() { 21 | var board = zeroArray((size[0] >> 5) * size[1]), 22 | bounds = null, 23 | n = words.length, 24 | i = -1, 25 | tags = [], 26 | data = words.map(function(d, i) { 27 | d.text = text.call(this, d, i); 28 | d.font = font.call(this, d, i); 29 | d.style = fontStyle.call(this, d, i); 30 | d.weight = fontWeight.call(this, d, i); 31 | d.rotate = rotate.call(this, d, i); 32 | d.size = ~~fontSize.call(this, d, i); 33 | d.padding = padding.call(this, d, i); 34 | return d; 35 | }).sort(function(a, b) { return b.size - a.size; }); 36 | 37 | if (timer) clearInterval(timer); 38 | timer = setInterval(step, 0); 39 | step(); 40 | 41 | return cloud; 42 | 43 | function step() { 44 | var start = +new Date, 45 | d; 46 | while (+new Date - start < timeInterval && ++i < n && timer) { 47 | d = data[i]; 48 | d.x = (size[0] * (Math.random() + .5)) >> 1; 49 | d.y = (size[1] * (Math.random() + .5)) >> 1; 50 | cloudSprite(d, data, i); 51 | if (d.hasText && place(board, d, bounds)) { 52 | tags.push(d); 53 | event.word(d); 54 | if (bounds) cloudBounds(bounds, d); 55 | else bounds = [{x: d.x + d.x0, y: d.y + d.y0}, {x: d.x + d.x1, y: d.y + d.y1}]; 56 | // Temporary hack 57 | d.x -= size[0] >> 1; 58 | d.y -= size[1] >> 1; 59 | } 60 | } 61 | if (i >= n) { 62 | cloud.stop(); 63 | event.end(tags, bounds); 64 | } 65 | } 66 | } 67 | 68 | cloud.stop = function() { 69 | if (timer) { 70 | clearInterval(timer); 71 | timer = null; 72 | } 73 | return cloud; 74 | }; 75 | 76 | cloud.timeInterval = function(x) { 77 | if (!arguments.length) return timeInterval; 78 | timeInterval = x == null ? Infinity : x; 79 | return cloud; 80 | }; 81 | 82 | function place(board, tag, bounds) { 83 | var perimeter = [{x: 0, y: 0}, {x: size[0], y: size[1]}], 84 | startX = tag.x, 85 | startY = tag.y, 86 | maxDelta = Math.sqrt(size[0] * size[0] + size[1] * size[1]), 87 | s = spiral(size), 88 | dt = Math.random() < .5 ? 1 : -1, 89 | t = -dt, 90 | dxdy, 91 | dx, 92 | dy; 93 | 94 | while (dxdy = s(t += dt)) { 95 | dx = ~~dxdy[0]; 96 | dy = ~~dxdy[1]; 97 | 98 | if (Math.min(dx, dy) > maxDelta) break; 99 | 100 | tag.x = startX + dx; 101 | tag.y = startY + dy; 102 | 103 | if (tag.x + tag.x0 < 0 || tag.y + tag.y0 < 0 || 104 | tag.x + tag.x1 > size[0] || tag.y + tag.y1 > size[1]) continue; 105 | // TODO only check for collisions within current bounds. 106 | if (!bounds || !cloudCollide(tag, board, size[0])) { 107 | if (!bounds || collideRects(tag, bounds)) { 108 | var sprite = tag.sprite, 109 | w = tag.width >> 5, 110 | sw = size[0] >> 5, 111 | lx = tag.x - (w << 4), 112 | sx = lx & 0x7f, 113 | msx = 32 - sx, 114 | h = tag.y1 - tag.y0, 115 | x = (tag.y + tag.y0) * sw + (lx >> 5), 116 | last; 117 | for (var j = 0; j < h; j++) { 118 | last = 0; 119 | for (var i = 0; i <= w; i++) { 120 | board[x + i] |= (last << msx) | (i < w ? (last = sprite[j * w + i]) >>> sx : 0); 121 | } 122 | x += sw; 123 | } 124 | delete tag.sprite; 125 | return true; 126 | } 127 | } 128 | } 129 | return false; 130 | } 131 | 132 | cloud.words = function(x) { 133 | if (!arguments.length) return words; 134 | words = x; 135 | return cloud; 136 | }; 137 | 138 | cloud.size = function(x) { 139 | if (!arguments.length) return size; 140 | size = [+x[0], +x[1]]; 141 | return cloud; 142 | }; 143 | 144 | cloud.font = function(x) { 145 | if (!arguments.length) return font; 146 | font = d3.functor(x); 147 | return cloud; 148 | }; 149 | 150 | cloud.fontStyle = function(x) { 151 | if (!arguments.length) return fontStyle; 152 | fontStyle = d3.functor(x); 153 | return cloud; 154 | }; 155 | 156 | cloud.fontWeight = function(x) { 157 | if (!arguments.length) return fontWeight; 158 | fontWeight = d3.functor(x); 159 | return cloud; 160 | }; 161 | 162 | cloud.rotate = function(x) { 163 | if (!arguments.length) return rotate; 164 | rotate = d3.functor(x); 165 | return cloud; 166 | }; 167 | 168 | cloud.text = function(x) { 169 | if (!arguments.length) return text; 170 | text = d3.functor(x); 171 | return cloud; 172 | }; 173 | 174 | cloud.spiral = function(x) { 175 | if (!arguments.length) return spiral; 176 | spiral = spirals[x + ""] || x; 177 | return cloud; 178 | }; 179 | 180 | cloud.fontSize = function(x) { 181 | if (!arguments.length) return fontSize; 182 | fontSize = d3.functor(x); 183 | return cloud; 184 | }; 185 | 186 | cloud.padding = function(x) { 187 | if (!arguments.length) return padding; 188 | padding = d3.functor(x); 189 | return cloud; 190 | }; 191 | 192 | return d3.rebind(cloud, event, "on"); 193 | } 194 | 195 | function cloudText(d) { 196 | return d.text; 197 | } 198 | 199 | function cloudFont() { 200 | return "serif"; 201 | } 202 | 203 | function cloudFontNormal() { 204 | return "normal"; 205 | } 206 | 207 | function cloudFontSize(d) { 208 | return Math.sqrt(d.value); 209 | } 210 | 211 | function cloudRotate() { 212 | return (~~(Math.random() * 6) - 3) * 30; 213 | } 214 | 215 | function cloudPadding() { 216 | return 1; 217 | } 218 | 219 | // Fetches a monochrome sprite bitmap for the specified text. 220 | // Load in batches for speed. 221 | function cloudSprite(d, data, di) { 222 | if (d.sprite) return; 223 | c.clearRect(0, 0, (cw << 5) / ratio, ch / ratio); 224 | var x = 0, 225 | y = 0, 226 | maxh = 0, 227 | n = data.length; 228 | --di; 229 | while (++di < n) { 230 | d = data[di]; 231 | c.save(); 232 | c.font = d.style + " " + d.weight + " " + ~~((d.size + 1) / ratio) + "px " + d.font; 233 | var w = c.measureText(d.text + "m").width * ratio, 234 | h = d.size << 1; 235 | if (d.rotate) { 236 | var sr = Math.sin(d.rotate * cloudRadians), 237 | cr = Math.cos(d.rotate * cloudRadians), 238 | wcr = w * cr, 239 | wsr = w * sr, 240 | hcr = h * cr, 241 | hsr = h * sr; 242 | w = (Math.max(Math.abs(wcr + hsr), Math.abs(wcr - hsr)) + 0x1f) >> 5 << 5; 243 | h = ~~Math.max(Math.abs(wsr + hcr), Math.abs(wsr - hcr)); 244 | } else { 245 | w = (w + 0x1f) >> 5 << 5; 246 | } 247 | if (h > maxh) maxh = h; 248 | if (x + w >= (cw << 5)) { 249 | x = 0; 250 | y += maxh; 251 | maxh = 0; 252 | } 253 | if (y + h >= ch) break; 254 | c.translate((x + (w >> 1)) / ratio, (y + (h >> 1)) / ratio); 255 | if (d.rotate) c.rotate(d.rotate * cloudRadians); 256 | c.fillText(d.text, 0, 0); 257 | if (d.padding) c.lineWidth = 2 * d.padding, c.strokeText(d.text, 0, 0); 258 | c.restore(); 259 | d.width = w; 260 | d.height = h; 261 | d.xoff = x; 262 | d.yoff = y; 263 | d.x1 = w >> 1; 264 | d.y1 = h >> 1; 265 | d.x0 = -d.x1; 266 | d.y0 = -d.y1; 267 | d.hasText = true; 268 | x += w; 269 | } 270 | var pixels = c.getImageData(0, 0, (cw << 5) / ratio, ch / ratio).data, 271 | sprite = []; 272 | while (--di >= 0) { 273 | d = data[di]; 274 | if (!d.hasText) continue; 275 | var w = d.width, 276 | w32 = w >> 5, 277 | h = d.y1 - d.y0; 278 | // Zero the buffer 279 | for (var i = 0; i < h * w32; i++) sprite[i] = 0; 280 | x = d.xoff; 281 | if (x == null) return; 282 | y = d.yoff; 283 | var seen = 0, 284 | seenRow = -1; 285 | for (var j = 0; j < h; j++) { 286 | for (var i = 0; i < w; i++) { 287 | var k = w32 * j + (i >> 5), 288 | m = pixels[((y + j) * (cw << 5) + (x + i)) << 2] ? 1 << (31 - (i % 32)) : 0; 289 | sprite[k] |= m; 290 | seen |= m; 291 | } 292 | if (seen) seenRow = j; 293 | else { 294 | d.y0++; 295 | h--; 296 | j--; 297 | y++; 298 | } 299 | } 300 | d.y1 = d.y0 + seenRow; 301 | d.sprite = sprite.slice(0, (d.y1 - d.y0) * w32); 302 | } 303 | } 304 | 305 | // Use mask-based collision detection. 306 | function cloudCollide(tag, board, sw) { 307 | sw >>= 5; 308 | var sprite = tag.sprite, 309 | w = tag.width >> 5, 310 | lx = tag.x - (w << 4), 311 | sx = lx & 0x7f, 312 | msx = 32 - sx, 313 | h = tag.y1 - tag.y0, 314 | x = (tag.y + tag.y0) * sw + (lx >> 5), 315 | last; 316 | for (var j = 0; j < h; j++) { 317 | last = 0; 318 | for (var i = 0; i <= w; i++) { 319 | if (((last << msx) | (i < w ? (last = sprite[j * w + i]) >>> sx : 0)) 320 | & board[x + i]) return true; 321 | } 322 | x += sw; 323 | } 324 | return false; 325 | } 326 | 327 | function cloudBounds(bounds, d) { 328 | var b0 = bounds[0], 329 | b1 = bounds[1]; 330 | if (d.x + d.x0 < b0.x) b0.x = d.x + d.x0; 331 | if (d.y + d.y0 < b0.y) b0.y = d.y + d.y0; 332 | if (d.x + d.x1 > b1.x) b1.x = d.x + d.x1; 333 | if (d.y + d.y1 > b1.y) b1.y = d.y + d.y1; 334 | } 335 | 336 | function collideRects(a, b) { 337 | return a.x + a.x1 > b[0].x && a.x + a.x0 < b[1].x && a.y + a.y1 > b[0].y && a.y + a.y0 < b[1].y; 338 | } 339 | 340 | function archimedeanSpiral(size) { 341 | var e = size[0] / size[1]; 342 | return function(t) { 343 | return [e * (t *= .1) * Math.cos(t), t * Math.sin(t)]; 344 | }; 345 | } 346 | 347 | function rectangularSpiral(size) { 348 | var dy = 4, 349 | dx = dy * size[0] / size[1], 350 | x = 0, 351 | y = 0; 352 | return function(t) { 353 | var sign = t < 0 ? -1 : 1; 354 | // See triangular numbers: T_n = n * (n + 1) / 2. 355 | switch ((Math.sqrt(1 + 4 * sign * t) - sign) & 3) { 356 | case 0: x += dx; break; 357 | case 1: y += dy; break; 358 | case 2: x -= dx; break; 359 | default: y -= dy; break; 360 | } 361 | return [x, y]; 362 | }; 363 | } 364 | 365 | // TODO reuse arrays? 366 | function zeroArray(n) { 367 | var a = [], 368 | i = -1; 369 | while (++i < n) a[i] = 0; 370 | return a; 371 | } 372 | 373 | var cloudRadians = Math.PI / 180, 374 | cw = 1 << 11 >> 5, 375 | ch = 1 << 11, 376 | canvas, 377 | ratio = 1; 378 | 379 | if (typeof document !== "undefined") { 380 | canvas = document.createElement("canvas"); 381 | canvas.width = 1; 382 | canvas.height = 1; 383 | ratio = Math.sqrt(canvas.getContext("2d").getImageData(0, 0, 1, 1).data.length >> 2); 384 | canvas.width = (cw << 5) / ratio; 385 | canvas.height = ch / ratio; 386 | } else { 387 | // Attempt to use node-canvas. 388 | canvas = new Canvas(cw << 5, ch); 389 | } 390 | 391 | var c = canvas.getContext("2d"), 392 | spirals = { 393 | archimedean: archimedeanSpiral, 394 | rectangular: rectangularSpiral 395 | }; 396 | c.fillStyle = c.strokeStyle = "red"; 397 | c.textAlign = "center"; 398 | 399 | if (typeof module === "object" && module.exports) module.exports = cloud; 400 | else (d3.layout || (d3.layout = {})).cloud = cloud; 401 | })(); -------------------------------------------------------------------------------- /public/html/summa/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | UW D2D Demo 5 | 6 | 7 | 8 | 9 | 10 |
      11 |

      Summa

      12 |
        13 |
      • 14 | Abuja Bomb Topic: Articles identified by IAI related to "abuja bombing" 15 |
      • 16 |
      • 17 | Boko Haram Threat: Manually selected articles identified by AFRL Analyst 18 |
      • 19 |
      • 20 | Maiduguri: Articles with "Maiduguri" from 01/2014 to 03/2014 21 |
      • 22 |
      • 23 | Boko Haram: Articles with "Boko Haram" from 02/2014 24 |
      • 25 |
      • 26 | Kidnappings: Articles with "Chibok" from 2014 27 |
      • 28 |
      • 29 | China: Summary of "China" articles from 2014 30 |
      • 31 | 32 | 33 | 34 |
      35 | 36 |

      Topic Summaries

      37 |
        38 | 39 | 40 | 41 |
      • 42 | Topic 1: million, drugs, drug, ndlea, arrests, customs, cocaine, arrested, airport, liberia 43 |
      • 44 | 45 | 46 | 47 |
      • 48 | Topic 3: bomb, blast, abuja, kano, attack, explosion, suicide, killed, bombing, dead 49 |
      • 50 |
      • 51 | Topic 4: haram, boko, jonathan, northern, leaders, insurgency, nigerians, urges, end, stop 52 |
      • 53 | 54 | 55 | 56 |
      • 57 | Topic 6: emergency, rule, state, extension, borno, yobe, adamawa, jonathan, senate, north-east 58 |
      • 59 |
      • 60 | Topic 7: boko, haram, military, insurgency, chief, terrorism, army, security, nigerian, police 61 |
      • 62 |
      • 63 | Topic 8: kill, gunmen, borno, attack, killed, insurgents, village, yobe, attacks, fresh 64 |
      • 65 |
      • 66 | Topic 9: day, jonathan, democracy, president, chibok, visit, soyinka, children's, centenary, celebration 67 |
      • 68 |
      • 69 | Topic 10: military, borno, army, maiduguri, jtf, boko, haram, attack, civilian, terrorists 70 |
      • 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 |
      • 90 | Topic 17: north, east, insurgency, insecurity, youths, jonathan, south, youth, warns, igbo 91 |
      • 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 |
      • 102 | Topic 21: security, yobe, students, schools, police, lagos, killings, protest, killing, govt 103 |
      • 104 | 105 | 106 | 107 |
      • 108 | Topic 23: chibok, girls, jonathan, schoolgirls, back, abducted, parents, abduction, bringbackourgirls, bring 109 |
      • 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 |
      191 | 192 | 193 | 194 | --------------------------------------------------------------------------------