├── models └── .gitignore ├── project ├── build.properties └── plugins.sbt ├── slides ├── images │ ├── types.png │ └── ner_pipeline.png └── finagle-essentials.asc ├── .travis.yml ├── .gitignore ├── src ├── main │ ├── thrift │ │ └── recognizer.thrift │ └── scala │ │ └── com │ │ └── twitter │ │ └── finagle │ │ └── examples │ │ └── names │ │ ├── NameServer.scala │ │ ├── NameResult.scala │ │ ├── thriftscala │ │ ├── NaiveNameRecognizerService.scala │ │ └── SafeNameRecognizerService.scala │ │ ├── ModelLoader.scala │ │ └── NameRecognizer.scala └── test │ └── scala │ └── com │ └── twitter │ └── finagle │ └── example │ └── names │ └── NameRecognizerTest.scala ├── download-models.sh ├── sbt ├── README.md └── LICENSE /models/.gitignore: -------------------------------------------------------------------------------- 1 | *.bin 2 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.9 2 | 3 | -------------------------------------------------------------------------------- /slides/images/types.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/finagle/finagle-example-name-finder/HEAD/slides/images/types.png -------------------------------------------------------------------------------- /slides/images/ner_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/finagle/finagle-example-name-finder/HEAD/slides/images/ner_pipeline.png -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: scala 3 | 4 | before_script: 5 | - ./download-models.sh 6 | - travis_retry sbt update 7 | 8 | script: sbt test 9 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | resolvers += "Twitter's Repository" at "https://maven.twttr.com/" 2 | 3 | addSbtPlugin("com.twitter" %% "scrooge-sbt-plugin" % "4.1.0") 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | finagle-native/lib/finaglenative.jar 2 | target/ 3 | dist/ 4 | project/boot/ 5 | project/plugins/project/ 6 | project/plugins/src_managed/ 7 | *.log 8 | *.tmproj 9 | lib_managed/ 10 | *.swp 11 | *.iml 12 | out/ 13 | .ensime 14 | *~ 15 | *# 16 | .#* 17 | .ivyjars 18 | .idea 19 | .DS_Store 20 | sbt-launch-0.7.5.jar 21 | sbt-launch.jar 22 | slides/finagle-essentials.html 23 | -------------------------------------------------------------------------------- /src/main/thrift/recognizer.thrift: -------------------------------------------------------------------------------- 1 | namespace java com.twitter.finagle.examples.names.thriftjava 2 | #@namespace scala com.twitter.finagle.examples.names.thriftscala 3 | 4 | struct NameRecognizerResult { 5 | 1: list persons; 6 | 2: list locations; 7 | 3: list organizations; 8 | } 9 | 10 | exception NameRecognizerException { 11 | 1: string description; 12 | } 13 | 14 | service NameRecognizerService { 15 | NameRecognizerResult findNames(1: string lang, 2: string document) 16 | throws(1: NameRecognizerException ex) 17 | } 18 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/finagle/examples/names/NameServer.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.finagle.examples.names 2 | 3 | import com.twitter.finagle.Thrift 4 | import com.twitter.finagle.examples.names.thriftscala._ 5 | import com.twitter.server.TwitterServer 6 | import com.twitter.util.Await 7 | 8 | object NamesServer extends TwitterServer { 9 | val service = SafeNameRecognizerService.create(Seq("en"), 4, 4) 10 | 11 | def main() { 12 | val server = Thrift.serveIface("localhost:9090", Await.result(service)) 13 | 14 | onExit { 15 | server.close() 16 | } 17 | 18 | Await.ready(server) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /download-models.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # Download English-language models. 4 | wget -P models/ -nd http://opennlp.sourceforge.net/models-1.5/en-ner-location.bin 5 | wget -P models/ -nd http://opennlp.sourceforge.net/models-1.5/en-ner-organization.bin 6 | wget -P models/ -nd http://opennlp.sourceforge.net/models-1.5/en-ner-person.bin 7 | wget -P models/ -nd http://opennlp.sourceforge.net/models-1.5/en-sent.bin 8 | wget -P models/ -nd http://opennlp.sourceforge.net/models-1.5/en-token.bin 9 | 10 | # Download Spanish-language models. 11 | wget -P models/ -nd http://opennlp.sourceforge.net/models-1.5/es-ner-location.bin 12 | wget -P models/ -nd http://opennlp.sourceforge.net/models-1.5/es-ner-organization.bin 13 | wget -P models/ -nd http://opennlp.sourceforge.net/models-1.5/es-ner-person.bin 14 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/finagle/examples/names/NameResult.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.finagle.examples.names 2 | 3 | /** 4 | * Represents the result of running name recognition on a text. 5 | */ 6 | case class NameResult(persons: Seq[String], locations: Seq[String], organizations: Seq[String]) { 7 | lazy val personCounts: Map[String, Int] = countOccurrences(persons) 8 | lazy val locationCounts: Map[String, Int] = countOccurrences(locations) 9 | lazy val organizationCounts: Map[String, Int] = countOccurrences(organizations) 10 | 11 | protected def countOccurrences(names: Seq[String]): Map[String, Int] = { 12 | names.groupBy(identity) map { 13 | case (name, occurrences) => name -> occurrences.size 14 | } 15 | } 16 | 17 | /** 18 | * Combine with another set of results. 19 | */ 20 | def ++(other: NameResult): NameResult = { 21 | NameResult( 22 | persons ++ other.persons, 23 | locations ++ other.locations, 24 | organizations ++ other.organizations) 25 | } 26 | } 27 | 28 | object NameResult { 29 | val Empty = NameResult(Seq.empty, Seq.empty, Seq.empty) 30 | 31 | /** 32 | * We often want to combine partial results as we process a body of text. 33 | */ 34 | def sum(results: Seq[NameResult]) = results.foldLeft(Empty) { 35 | case (acc, result) => acc ++ result 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /sbt: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sbtver=0.13.8 4 | sbtjar=sbt-launch.jar 5 | sbtsha128=57d0f04f4b48b11ef7e764f4cea58dee4e806ffd 6 | 7 | sbtrepo=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch 8 | 9 | if [ ! -f $sbtjar ]; then 10 | echo "downloading $sbtjar" 1>&2 11 | if ! curl -L --silent --fail --remote-name $sbtrepo/$sbtver/$sbtjar; then 12 | exit 1 13 | fi 14 | fi 15 | 16 | checksum=`openssl dgst -sha1 $sbtjar | awk '{ print $2 }'` 17 | if [ "$checksum" != $sbtsha128 ]; then 18 | echo "bad $sbtjar. delete $sbtjar and run $0 again." 19 | exit 1 20 | fi 21 | 22 | [ -f ~/.sbtconfig ] && . ~/.sbtconfig 23 | 24 | java -ea \ 25 | $SBT_OPTS \ 26 | $JAVA_OPTS \ 27 | -Djava.net.preferIPv4Stack=true \ 28 | -XX:+AggressiveOpts \ 29 | -XX:+UseParNewGC \ 30 | -XX:+UseConcMarkSweepGC \ 31 | -XX:+CMSParallelRemarkEnabled \ 32 | -XX:+CMSClassUnloadingEnabled \ 33 | -XX:ReservedCodeCacheSize=128m \ 34 | -XX:MaxPermSize=1024m \ 35 | -XX:SurvivorRatio=128 \ 36 | -XX:MaxTenuringThreshold=0 \ 37 | -Xss8M \ 38 | -Xms512M \ 39 | -Xmx2G \ 40 | -server \ 41 | -jar $sbtjar "$@" 42 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/finagle/example/names/NameRecognizerTest.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.finagle.examples.names 2 | 3 | import com.twitter.util.{Return, Try} 4 | import org.scalatest.{BeforeAndAfter, FunSuite} 5 | 6 | class NameRecognizerTest extends FunSuite { 7 | val recognizer: Try[NameRecognizer] = NameRecognizer.create("en") 8 | 9 | test("NameRecognizer should load successfully") { 10 | assert(recognizer.isReturn) 11 | } 12 | 13 | test("NameRecognizer.findNames should find names in a document") { 14 | val document = """ 15 | John Adams found his residence abroad rather irksome and unpleasant, and 16 | he longed to return to his happy home. But his services as a diplomatist 17 | were needed in England. 18 | """ 19 | 20 | val result = recognizer.map(_.findNames(document)) 21 | 22 | val expected = NameResult(Seq("John Adams"), Seq("England"), Seq.empty) 23 | 24 | assert(result === Return(expected)) 25 | } 26 | 27 | test("NameRecognizer.findNamesInSentence should find names in a sentence") { 28 | val sentence = """ 29 | He led the Assembly, as Henry Clay afterwards led the Senate, and Canning 30 | led the House of Commons, by that inspired logic which few could resist. 31 | """ 32 | 33 | val result = recognizer.map(_.findNamesInSentence(sentence)) 34 | 35 | val expected = NameResult( 36 | Seq("Henry Clay", "Canning"), 37 | Seq.empty, 38 | Seq("Assembly", "Senate", "House of Commons")) 39 | 40 | assert(result === Return(expected)) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/finagle/examples/names/thriftscala/NaiveNameRecognizerService.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.finagle.examples.names.thriftscala 2 | 3 | import com.twitter.finagle.examples.names.NameRecognizer 4 | import com.twitter.util.{Future, Return, Throw, Try} 5 | import scala.collection.mutable 6 | 7 | /** 8 | * A naive service definition that implements the trait defined by Scrooge. 9 | * 10 | * There are several serious problems with this implementation! First of all, 11 | * it's not thread-safe: for each language we're using a single NameRecognizer 12 | * instance (which maintains internal state during processing). It will also 13 | * block a Finagle thread while attempting to read models for an unknown 14 | * language from disk. 15 | */ 16 | class NaiveNameRecognizerService(recognizers: Map[String, NameRecognizer]) 17 | extends NameRecognizerService[Future] { 18 | 19 | def getRecognizer(lang: String): Future[NameRecognizer] = 20 | Future { 21 | recognizers.get(lang) 22 | } flatMap { 23 | case Some(rec) => Future.value(rec) 24 | case None => Future.const(NameRecognizer.create(lang)) 25 | } 26 | 27 | def findNames(lang: String, document: String): Future[NameRecognizerResult] = 28 | getRecognizer(lang) map { recognizer => 29 | val result = recognizer.findNames(document) 30 | 31 | new NameRecognizerResult { 32 | val persons = result.persons 33 | val locations = result.locations 34 | val organizations = result.organizations 35 | } 36 | } 37 | } 38 | 39 | object NaiveNameRecognizerService { 40 | /** 41 | * A simple constructor that synchronously creates a service with an initial 42 | * set of language models, encapsulating errors in a Try. 43 | */ 44 | def create(langs: Seq[String]): Try[NameRecognizerService[Future]] = { 45 | val recognizersByName: Seq[Try[(String, NameRecognizer)]] = langs map { lang => 46 | NameRecognizer.create(lang) map { recognizer => 47 | lang -> recognizer 48 | } 49 | } 50 | 51 | Try.collect(recognizersByName) map { recognizers => 52 | new NaiveNameRecognizerService(recognizers.toMap) 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/finagle/examples/names/thriftscala/SafeNameRecognizerService.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.finagle.examples.names.thriftscala 2 | 3 | import com.twitter.concurrent.{AsyncQueue, NamedPoolThreadFactory} 4 | import com.twitter.finagle.examples.names.NameRecognizer 5 | import com.twitter.util.{Future, FuturePool, NonFatal} 6 | import java.util.concurrent.Executors 7 | 8 | /** 9 | * A simple service implementation that implements the trait defined by Scrooge. 10 | * 11 | * Each service owns a queue of recognizers for a set of languages. For each 12 | * request, if we have a queue for the given language, we grab a recognizer from 13 | * that queue (and return it when processing is finished). If the language is 14 | * unknown, we try to load the models for it in a future pool (in order to avoid 15 | * blocking the Finagle thread). 16 | */ 17 | class SafeNameRecognizerService( 18 | recognizers: Map[String, AsyncQueue[NameRecognizer]], 19 | futurePool: FuturePool) 20 | extends NameRecognizerService[Future] { 21 | 22 | def loadRecognizer(lang: String): Future[NameRecognizer] = 23 | futurePool { 24 | Future.const(NameRecognizer.create(lang)) 25 | }.flatten 26 | 27 | def getRecognizer(lang: String): Future[NameRecognizer] = 28 | Future { 29 | recognizers.get(lang) 30 | } flatMap { 31 | case Some(queue) => queue.poll() 32 | case None => loadRecognizer(lang) 33 | } 34 | 35 | def findNames(lang: String, document: String): Future[NameRecognizerResult] = 36 | getRecognizer(lang) flatMap { recognizer => 37 | Future { 38 | val result = recognizer.findNames(document) 39 | 40 | new NameRecognizerResult { 41 | val persons = result.persons 42 | val locations = result.locations 43 | val organizations = result.organizations 44 | } 45 | } ensure { 46 | recognizers.get(lang) foreach { queue => 47 | queue.offer(recognizer) 48 | } 49 | } 50 | } 51 | } 52 | 53 | object SafeNameRecognizerService { 54 | val futurePoolName = "NameRecognizerServiceFuturePool" 55 | 56 | /** 57 | * An asynchronous constructor that creates a `NameRecognizerService` with a 58 | * future pool backed by an `ExecutorService` for blocking operations and with 59 | * pools of recognizers for a given set of languages. 60 | */ 61 | def create( 62 | langs: Seq[String], 63 | numThreads: Int, 64 | numRecognizers: Int): Future[NameRecognizerService[Future]] = { 65 | 66 | Future.collect { 67 | langs map { lang => 68 | Future.const { 69 | NameRecognizer.create(lang, numRecognizers) map { recognizers => 70 | val queue = new AsyncQueue[NameRecognizer] 71 | 72 | recognizers foreach { recognizer => queue.offer(recognizer) } 73 | 74 | lang -> queue 75 | } 76 | } 77 | } 78 | } map { recognizers => 79 | val futurePool = FuturePool( 80 | Executors.newFixedThreadPool( 81 | numThreads, 82 | new NamedPoolThreadFactory(futurePoolName, makeDaemons = true) 83 | ) 84 | ) 85 | 86 | new SafeNameRecognizerService(recognizers.toMap, futurePool) 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/finagle/examples/names/ModelLoader.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.finagle.examples.names 2 | 3 | import com.twitter.logging.Logger 4 | import com.twitter.util.{Return, Throw, Try} 5 | import java.io.{File, FileInputStream, FileNotFoundException, InputStream} 6 | import opennlp.tools.namefind.{NameFinderME, TokenNameFinder, TokenNameFinderModel} 7 | import opennlp.tools.sentdetect.{SentenceDetector, SentenceDetectorME, SentenceModel} 8 | import opennlp.tools.tokenize.{Tokenizer, TokenizerME, TokenizerModel} 9 | 10 | /** 11 | * A helper class that allows us to keep most of the gritty details about how 12 | * our models are deserialized out of the definition of 13 | * [[com.twitter.finagle.examples.names.NameRecognizer]]. 14 | * 15 | * Note that for some languages we may not have model files for sentence 16 | * boundary detection or tokenization; in these cases we fall back to the 17 | * English-language models. 18 | */ 19 | class ModelLoader(baseDir: File) { 20 | private val log = Logger.get(getClass) 21 | 22 | /** 23 | * A utility method that uses a provided function to do something with an 24 | * input stream that it is guaranteed to close. 25 | */ 26 | private[this] def loadModel[M](file: File)(f: InputStream => M): Try[M] = 27 | Try { 28 | new FileInputStream(file) 29 | } flatMap { stream => 30 | Try { 31 | f(stream) 32 | } ensure { 33 | stream.close() 34 | } 35 | } 36 | 37 | protected def loadSentenceDetectorModel(file: File): Try[SentenceModel] = 38 | loadModel[SentenceModel](file) { stream => new SentenceModel(stream) } 39 | 40 | protected def loadTokenizerModel(file: File): Try[TokenizerModel] = 41 | loadModel[TokenizerModel](file) { stream => new TokenizerModel(stream) } 42 | 43 | protected def loadNameFinderModel(file: File): Try[TokenNameFinderModel] = 44 | loadModel[TokenNameFinderModel](file) { stream => new TokenNameFinderModel(stream) } 45 | 46 | protected def createSentenceDetector(model: SentenceModel): SentenceDetector = 47 | new SentenceDetectorME(model) 48 | 49 | protected def createTokenizer(model: TokenizerModel): Tokenizer = 50 | new TokenizerME(model) 51 | 52 | protected def createNameFinder(model: TokenNameFinderModel): TokenNameFinder = 53 | new NameFinderME(model) 54 | 55 | protected def defaultSentenceDetectorModel(lang: String): File = { 56 | val langModel = new File(baseDir, s"$lang-sent.bin") 57 | 58 | if (!langModel.exists || !langModel.isFile) { 59 | log.info(s"$langModel does not exist for language $lang; using English model.") 60 | new File(baseDir, "en-sent.bin") 61 | } else { 62 | langModel 63 | } 64 | } 65 | 66 | protected def defaultTokenizerModel(lang: String): File = { 67 | val langModel = new File(baseDir, s"$lang-token.bin") 68 | 69 | if (!langModel.exists || !langModel.isFile) { 70 | log.info(s"$langModel does not exist for language $lang; using English model.") 71 | new File(baseDir, "en-token.bin") 72 | } else { 73 | langModel 74 | } 75 | } 76 | 77 | protected def defaultPersonalNameModel(lang: String): File = { 78 | new File(baseDir, s"$lang-ner-person.bin") 79 | } 80 | 81 | protected def defaultLocationNameModel(lang: String): File = { 82 | new File(baseDir, s"$lang-ner-location.bin") 83 | } 84 | 85 | protected def defaultOrganizationNameModel(lang: String): File = { 86 | new File(baseDir, s"$lang-ner-organization.bin") 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/finagle/examples/names/NameRecognizer.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.finagle.examples.names 2 | 3 | import com.twitter.finagle.examples.names.thriftscala.NameRecognizerException 4 | import com.twitter.util.{Throw, Try} 5 | import java.io.File 6 | import opennlp.tools.namefind.TokenNameFinder 7 | import opennlp.tools.sentdetect.SentenceDetector 8 | import opennlp.tools.tokenize.{Tokenizer, TokenizerME} 9 | import opennlp.tools.util.Span 10 | 11 | /** 12 | * Processes text to extract names of people, places, and organizations. Note 13 | * that this class and its underlining OpenNLP processing tools are not 14 | * thread-safe. 15 | */ 16 | class NameRecognizer( 17 | val lang: String, 18 | sentenceDetector: SentenceDetector, 19 | tokenizer: Tokenizer, 20 | personalNameFinder: TokenNameFinder, 21 | locationNameFinder: TokenNameFinder, 22 | organizationNameFinder: TokenNameFinder) { 23 | 24 | /** 25 | * The default interface to the recognizer; finds names in a document and then 26 | * clears adaptive data that was gathered during the processing. 27 | */ 28 | def findNames(document: String): NameResult = { 29 | val sentences = sentenceDetector.sentDetect(document) 30 | val tokenized = sentences map { sentence => tokenizer.tokenize(sentence) } 31 | val results = tokenized map { tokens => findNamesInTokens(tokens) } 32 | val result = NameResult.sum(results) 33 | 34 | clearAfterDocument() 35 | 36 | result 37 | } 38 | 39 | /** 40 | * In some cases the user may wish to process a single sentence out of 41 | * context and clear adaptive data immediately. 42 | */ 43 | def findNamesInSentence(sentence: String): NameResult = { 44 | val tokenized = tokenizer.tokenize(sentence) 45 | val result = findNamesInTokens(tokenized) 46 | 47 | clearAfterDocument() 48 | 49 | result 50 | } 51 | 52 | protected def clearAfterDocument(): Unit = { 53 | personalNameFinder.clearAdaptiveData() 54 | locationNameFinder.clearAdaptiveData() 55 | organizationNameFinder.clearAdaptiveData() 56 | } 57 | 58 | protected def findNamesInTokens(tokens: Array[String]): NameResult = { 59 | val personalNames = identifyNames(personalNameFinder, tokens) 60 | val locationNames = identifyNames(locationNameFinder, tokens) 61 | val organizationNames = identifyNames(organizationNameFinder, tokens) 62 | 63 | NameResult(personalNames, locationNames, organizationNames) 64 | } 65 | 66 | protected def identifyNames(finder: TokenNameFinder, tokens: Array[String]): Seq[String] = { 67 | Span.spansToStrings(finder.find(tokens), tokens) 68 | } 69 | } 70 | 71 | object NameRecognizer extends ModelLoader(new File("models")) { 72 | /** 73 | * Creates a specified number of identical recognizers given a language 74 | * identifier and paths to the OpenNLP models. 75 | */ 76 | def create( 77 | lang: String, 78 | count: Int, 79 | sentenceDetectorFile: File, 80 | tokenizerFile: File, 81 | personalNameFile: File, 82 | locationNameFile: File, 83 | organizationNameFile: File): Try[Seq[NameRecognizer]] = { 84 | 85 | for { 86 | sentenceDetectorModel <- loadSentenceDetectorModel(sentenceDetectorFile) 87 | tokenizerModel <- loadTokenizerModel(tokenizerFile) 88 | personalNameFinderModel <- loadNameFinderModel(personalNameFile) 89 | locationNameFinderModel <- loadNameFinderModel(locationNameFile) 90 | organizationNameFinderModel <- loadNameFinderModel(organizationNameFile) 91 | } yield { 92 | Seq.fill(count) { 93 | new NameRecognizer( 94 | lang, 95 | createSentenceDetector(sentenceDetectorModel), 96 | createTokenizer(tokenizerModel), 97 | createNameFinder(personalNameFinderModel), 98 | createNameFinder(locationNameFinderModel), 99 | createNameFinder(organizationNameFinderModel)) 100 | } 101 | } 102 | } rescue { 103 | case ex: Throwable => Throw( 104 | NameRecognizerException(s"Unable to load models for language $lang") 105 | ) 106 | } 107 | 108 | /** 109 | * Creates a specified number of identical recognizers given a language 110 | * identifier (using the default paths to the OpenNLP models). 111 | */ 112 | def create(lang: String, count: Int): Try[Seq[NameRecognizer]] = 113 | create( 114 | lang, 115 | count, 116 | defaultSentenceDetectorModel(lang), 117 | defaultTokenizerModel(lang), 118 | defaultPersonalNameModel(lang), 119 | defaultLocationNameModel(lang), 120 | defaultOrganizationNameModel(lang)) 121 | 122 | /** 123 | * Creates a recognizer given a language identifier (using the default paths 124 | * to the OpenNLP models). 125 | */ 126 | def create(lang: String): Try[NameRecognizer] = 127 | create(lang, 1) map { recognizers => recognizers.head } 128 | } 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Finagle name finder 2 | 3 | [![Build status](https://img.shields.io/travis/finagle/finagle-example-name-finder/master.svg)](http://travis-ci.org/finagle/finagle-example-name-finder) 4 | 5 | This project is a demonstration of how you can use Finagle to build a service 6 | that will identify names of people, places, and organizations in any text you 7 | throw at it. It's primarily intended as a pedagogical example, and it's used in 8 | a "Finagle Essentials" course taught by 9 | [Twitter University](https://twitter.com/university) 10 | (you can view the slides for the course 11 | [here](https://finagle.github.io/finagle-example-name-finder), and the source 12 | for the slides is included 13 | [in this repository](https://github.com/finagle/finagle-example-name-finder/tree/master/slides)). 14 | 15 | The project includes a small Scala wrapper for the named entity recognition API 16 | provided by [OpenNLP](https://opennlp.apache.org/) (a Java library for natural 17 | language processing), together with a couple of implementations (one good and 18 | one bad) of Finagle [Thrift](https://thrift.apache.org/) services that expose 19 | the functionality provided by the wrapper. 20 | 21 | The following quick start guide describes how to get started with the project in 22 | an SBT console, and the API documentation is available on 23 | [this repository's GitHub Pages site](https://finagle.github.io/finagle-example-name-finder/docs). 24 | Please get in touch via [@finagle](https://twitter.com/finagle) 25 | or the [Finaglers mailing list](https://groups.google.com/d/forum/finaglers) 26 | if you have any questions about the code here, and we're always happy to see 27 | pull requests with additional examples or other improvements! 28 | 29 | Quick start 30 | ----------- 31 | 32 | You'll need to download the OpenNLP model files before you can run the project 33 | tests or examples: 34 | 35 | ``` 36 | sh ./download-models.sh 37 | ``` 38 | 39 | Now when you run `./sbt console` from the project root, [Scrooge][1] will 40 | generate our Thrift service and client traits, and then it'll 41 | compile them along with the rest of our code and start a Scala console. Paste 42 | the following lines to start a server running locally on port 9090: 43 | 44 | ``` scala 45 | import com.twitter.finagle.Thrift 46 | import com.twitter.finagle.examples.names.thriftscala._ 47 | 48 | val server = SafeNameRecognizerService.create(Seq("en"), 4, 4) map { service => 49 | Thrift.serveIface("localhost:9090", service) 50 | } onSuccess { _ => 51 | println("Server started successfully") 52 | } onFailure { ex => 53 | println("Could not start the server: " + ex) 54 | } 55 | ``` 56 | 57 | Now you can create a client to speak to the server: 58 | 59 | ``` scala 60 | import com.twitter.finagle.Thrift 61 | import com.twitter.finagle.examples.names.thriftscala._ 62 | 63 | val client = 64 | Thrift.newIface[NameRecognizerService.FutureIface]("localhost:9090") 65 | 66 | val doc = """ 67 | An anomaly which often struck me in the character of my friend Sherlock Holmes 68 | was that, although in his methods of thought he was the neatest and most 69 | methodical of mankind, and although also he affected a certain quiet primness of 70 | dress, he was none the less in his personal habits one of the most untidy men 71 | that ever drove a fellow-lodger to distraction. Not that I am in the least 72 | conventional in that respect myself. The rough-and-tumble work in Afghanistan, 73 | coming on the top of a natural Bohemianism of disposition, has made me rather 74 | more lax than befits a medical man. 75 | """ 76 | 77 | client.findNames("en", doc) onSuccess { response => 78 | println("People: " + response.persons.mkString(", ")) 79 | println("Places: " + response.locations.mkString(", ")) 80 | } onFailure { ex => 81 | println("Something bad happened: " + ex.getMessage) 82 | } 83 | ``` 84 | 85 | This will print the following: 86 | 87 | ``` 88 | People: Sherlock Holmes 89 | Places: Afghanistan 90 | ``` 91 | 92 | As we'd expect. We can also attempt to find names in a Spanish document, since 93 | while we didn't preload the Spanish models when we created our service, we did 94 | download them, so the service will be able to load them if asked: 95 | 96 | ``` scala 97 | val esDoc = """ 98 | Alrededor de 1902 fue el primero en aplicar una descarga eléctrica en un tubo 99 | sellado y con gas neón con la idea de crear una lámpara. Inspirado en parte por 100 | la invención de Daniel McFarlan Moore, la lámpara de Moore, Claude inventó la 101 | lámpara de neón mediante la descarga eléctrica de un gas inerte comprobando que 102 | el brillo era considerable. 103 | """ 104 | 105 | client.findNames("es", esDoc) onSuccess { response => 106 | println("People: " + response.persons.mkString(", ")) 107 | println("Places: " + response.locations.mkString(", ")) 108 | } onFailure { ex => 109 | println("Something bad happened: " + ex.getMessage) 110 | } 111 | ``` 112 | 113 | [1]: https://twitter.github.io/scrooge/ 114 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /slides/finagle-essentials.asc: -------------------------------------------------------------------------------- 1 | = Finagle Essentials 2 | 3 | link:https://twitter.com/travisbrown[@travisbrown] 4 | 5 | 6 | == Introduction 7 | 8 | * What we're doing 9 | * How we'll do it 10 | * Getting set up 11 | 12 | === Goals 13 | 14 | * Learn to think in terms of Finagle's core abstractions 15 | * Develop a deeper understanding of link:https://twitter.github.io/util/docs/#com.twitter.util.Try[+Try+] and link:https://twitter.github.io/util/docs/#com.twitter.util.Future[+Future+] 16 | * Learn how to write a Thrift IDL and generate bindings with link:http://twitter.github.io/scrooge/[Scrooge] 17 | * Understand the challenges of working with blocking code in the context of Finagle 18 | 19 | === Approach 20 | 21 | * This is a hands-on lab! 22 | * Some slides, lots of code, some whiteboard 23 | * Follow along in the Scala REPL 24 | * Interrupt with questions at any time 25 | 26 | === Side note 27 | 28 | * This slide deck is built with link:https://github.com/twitter/cdk[CDK] 29 | * Press +t+ for a table of contents or +h+ for more options 30 | * Visit link:https://github.com/finagle/finagle-example-name-finder/blob/master/slides/finagle-essentials.asc[the GitHub repository] to view or edit the source 31 | 32 | === Getting started 33 | 34 | What you need 35 | 36 | * Git 37 | * JDK (7+) 38 | 39 | Cloning the project 40 | 41 | [source,bash] 42 | ---- 43 | git clone https://github.com/finagle/finagle-example-name-finder.git 44 | ---- 45 | 46 | Downloading the models (more about this soon) 47 | 48 | [source,bash] 49 | ---- 50 | cd finagle-example-name-finder 51 | sh download-models.sh 52 | ---- 53 | 54 | === Useful links 55 | 56 | * link:https://github.com/finagle/finagle-example-name-finder[This project] 57 | * link:https://twitter.github.io/util/docs/index.html#package[Twitter Util API docs] 58 | * link:https://twitter.github.io/finagle/docs/index.html#package[Finagle API docs] 59 | * link:https://twitter.github.io/finagle/guide/Quickstart.html[Finagle quickstart] 60 | 61 | === Build system 62 | 63 | Note: we'll use link:http://www.scala-sbt.org/[SBT] today 64 | 65 | * This is mostly irrelevant 66 | * This isn't a course about build tools 67 | * You could also use link:http://pantsbuild.github.io/[Pants] 68 | 69 | === What's a REPL? 70 | 71 | * REPL == "Read Eval Print Loop" 72 | * SBT runner is included here, and you can start it with +./sbt+ 73 | * From the SBT console, +console+ will open a REPL 74 | * That's (almost) all you need to know about SBT 75 | 76 | === What should it look like? 77 | 78 | [source,bash] 79 | ---- 80 | travis@sidmouth finagle-example-name-finder(master)$ ./sbt 81 | [info] ... 82 | > console 83 | [info] Starting scala interpreter... 84 | [info] 85 | Welcome to Scala version 2.10.4 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_67). 86 | Type in expressions to have them evaluated. 87 | Type :help for more information. 88 | 89 | scala> import com.twitter.util.Try 90 | import com.twitter.util.Try 91 | ---- 92 | 93 | == Named-entity recognition 94 | 95 | * What is it? 96 | * What is link:https://opennlp.apache.org/[OpenNLP]? 97 | 98 | === Our example project 99 | 100 | * Start with a Java library that finds names in text 101 | * Wrap it in an idiomatic Scala API 102 | * Expose its functionality as a Finagle service 103 | 104 | === What is named-entity recognition? 105 | 106 | Kinds of "names" commonly recognized 107 | 108 | * [red]#People# 109 | * [blue]#Places# 110 | * [fuchsia]#Organizations# 111 | * [green]#Monetary values# 112 | 113 | === Example input 114 | 115 | On account of the bequest of the late Ezekiah Hopkins, of Lebanon, Pennsylvania, U. S. A., there is now another vacancy open which entitles a member of the League to a salary of £4 a week for purely nominal services. 116 | 117 | === Example output 118 | 119 | On account of the bequest of the late [red]#Ezekiah Hopkins#, of [blue]#Lebanon, Pennsylvania, U. S. A.#, there is now another vacancy open which entitles a member of the [fuchsia]#League# to a salary of [green]#£4# a week for purely nominal services. 120 | 121 | === Pipeline 122 | 123 | image::images/ner_pipeline.png[Named entity recognition pipeline] 124 | 125 | === OpenNLP 126 | 127 | * Java link:https://opennlp.apache.org/[Natural language processing library] providing NER 128 | * Not as accurate out of the box as e.g. link:http://nlp.stanford.edu/software/index.shtml[Stanford NLP], but... 129 | * Apache License, version 2.0 130 | * Easy to train new models for specific domains 131 | 132 | === Example usage 133 | 134 | [source,scala] 135 | ---- 136 | val sentDetector = new SentenceDetectorME(new SentenceModel(sdStream)) 137 | val tokenizer = new TokenizerME(new TokenizerModel(tokStream)) 138 | val finder = new NameFinderME(new TokenNameFinderModel(nfStream)) 139 | 140 | val sentences = sentDetector.sentDetect(document) 141 | val tokenized = sentences map { s => tokenizer.tokenize(s) } 142 | val nameSpans: Seq[String] = tokenized map { tokens => 143 | Span.spansToStrings(finder.find(tokens), tokens) 144 | } 145 | 146 | finder.clearAdaptiveData() 147 | ---- 148 | 149 | === Limitations of the API in this context 150 | 151 | * Lots of methods throw exceptions 152 | * Processing is synchronous 153 | * Not thread-safe 154 | 155 | [source,scala] 156 | ---- 157 | java.lang.IllegalArgumentException: The span [268..276) is outside 158 | the given text which has length 155! 159 | ---- 160 | 161 | === Goal for our example project 162 | 163 | * Handle errors gracefully 164 | * Scale to take advantage of multiple processors 165 | * Scale to take advantage of multiple machines 166 | 167 | == Writing a Scala wrapper 168 | 169 | * Modeling the possibility of failure with types 170 | 171 | === Handling errors with exceptions 172 | 173 | [source,scala] 174 | ---- 175 | def parseAndIncrement(input: String): Int = input.toInt + 1 176 | ---- 177 | 178 | === Modeling failure as a value 179 | 180 | [source,scala] 181 | ---- 182 | def parseAndIncrement(input: String): Try[Int] = 183 | Try { input.toInt } map { i => i + 1 } 184 | ---- 185 | 186 | === Chaining computations that may fail 187 | 188 | [source,scala] 189 | ---- 190 | def safeDivide(n: Int, d: Int): Try[Int] = Try { n / d } 191 | 192 | val good = for { 193 | n <- parseAndIncrement("5") 194 | d <- parseAndIncrement("1") 195 | result <- safeDivide(n, d) 196 | } yield result 197 | 198 | val bad1 = for { 199 | n <- parseAndIncrement("5") 200 | d <- parseAndIncrement("-1") 201 | result <- safeDivide(n, d) 202 | } yield result 203 | 204 | val bad2 = for { 205 | n <- parseAndIncrement("v") 206 | d <- parseAndIncrement("1") 207 | result <- safeDivide(n, d) 208 | } yield result 209 | ---- 210 | 211 | === Desugaring 212 | 213 | [source,scala] 214 | ---- 215 | val good = for { 216 | n <- parseAndIncrement("5") 217 | d <- parseAndIncrement("1") 218 | result <- safeDivide(n, d) 219 | } yield result 220 | 221 | val sugarFreeGood = parseAndIncrement("5").flatMap { n => 222 | parseAndIncrement("1").flatMap { d => 223 | safeDivide(n, d) 224 | } 225 | } 226 | ---- 227 | 228 | === Other methods 229 | 230 | [source,scala] 231 | ---- 232 | val tries = Seq("1", "2", "3").map(parseAndIncrement) 233 | 234 | Try.collect(tries) 235 | 236 | bad2.getOrElse(0) 237 | 238 | bad2.rescue { 239 | case t: NumberFormatException => com.twitter.util.Return(0) 240 | } 241 | ---- 242 | 243 | === Relationship to +Option+ and +Either+ 244 | 245 | * +Option+: container of one or zero elements 246 | * +Either+: one of two types of things 247 | * +Try+: container of one element or an exception 248 | 249 | === +Try+ in the Scala standard library 250 | 251 | * Semantically (almost) identical 252 | * Some of the names are different 253 | 254 | === Testing with ScalaTest 255 | 256 | * Using +FunSuite+, +assert+, and +===+ 257 | 258 | == Introduction to Finagle 259 | 260 | * Futures 261 | * Services 262 | * Servers 263 | * Clients 264 | * Filters 265 | 266 | === Futures 267 | 268 | Like +Try+, but with an extra state 269 | 270 | * Not yet completed 271 | * Failed 272 | * Successfully completed (or "satisfied") 273 | 274 | === Futures, try, etc. 275 | 276 | image::images/types.png[Types] 277 | 278 | === Future combinators 279 | 280 | Like +Try+, can be combined using +map+, +flatMap+, +handle+, +rescue+, etc. 281 | 282 | Also allows registration of callbacks: 283 | 284 | * +onSuccess(f: A => Unit)+ 285 | * +onFailure(ex: Throwable => Unit)+ 286 | 287 | More about the when and where of where futures run later 288 | 289 | === Services 290 | 291 | A service is a function 292 | 293 | [source,scala] 294 | ---- 295 | class Service[-Req, +Rep] extends (Req => Future[Rep]) 296 | ---- 297 | 298 | * +Try+ models failure as a value 299 | * +Future+ models both failure and delay as a value 300 | 301 | === What services aren't 302 | 303 | * The service API doesn't know anything about the network 304 | 305 | [source,scala] 306 | ---- 307 | import com.twitter.finagle.Service 308 | import com.twitter.util.Future 309 | 310 | val parserService = new Service[String, Int] { 311 | def apply(request: String) = Future(request.toInt) 312 | } 313 | ---- 314 | 315 | === Servers 316 | 317 | Servers make services available on the network over a protocol 318 | 319 | [source,scala] 320 | ---- 321 | import com.twitter.finagle.Httpx 322 | 323 | val myHttpService: Service[HttpRequest, HttpResponse] = ??? 324 | 325 | val server = Http.serve(":8080", myHttpService) 326 | ---- 327 | 328 | * +Http+ is a +Server[HttpRequest, HttpResponse]+ 329 | * +server+ is a +ListeningServer+ 330 | 331 | === Clients 332 | 333 | The term "client" is overloaded 334 | 335 | * The link:https://twitter.github.io/finagle/docs/#com.twitter.finagle.Client[+Client+] interface creates "clients" (in the second sense) for a specific protocol 336 | * A "materialized client" is a link:https://twitter.github.io/finagle/docs/#com.twitter.finagle.ServiceFactory[+ServiceFactory+] 337 | * In some cases the +Services+ created by a +ServiceFactory+ are called "clients" 338 | * Instances of Scrooge's +ThriftService+ are often called "clients" 339 | 340 | === Example: HTTP client 341 | 342 | A client in our third sense 343 | 344 | [source,scala] 345 | ---- 346 | val client: Service[HttpRequest, HttpResponse] = 347 | Http.newService("www.google.com:80") 348 | ---- 349 | 350 | === Filters 351 | 352 | Filters have a complicated-looking type: 353 | 354 | [source,scala] 355 | ---- 356 | class Filter[-ReqIn, +RepOut, +ReqOut, -RepIn] 357 | extends (ReqIn, Service[ReqOut, RepIn]) => Future[RepOut] 358 | ---- 359 | 360 | Filters are actually relatively simple: they're just service transformers 361 | 362 | === Timeout filter 363 | 364 | link:https://twitter.github.io/finagle/docs/#com.twitter.finagle.service.TimeoutFilter[+TimeoutFilter+] is an example of a link:https://twitter.github.io/finagle/docs/com/twitter/finagle/SimpleFilter.html[+SimpleFilter+] (doesn't change types) 365 | 366 | [source,scala] 367 | ---- 368 | import com.twitter.conversions.time._ 369 | import com.twitter.finagle.util.DefaultTimer 370 | import com.twitter.finagle.service.TimeoutFilter 371 | 372 | val myTimeoutFilter = 373 | new TimeoutFilter[String, Int](1.second, DefaultTimer.twitter) 374 | ---- 375 | 376 | === Using a timeout filter 377 | 378 | [source,scala] 379 | ---- 380 | import com.twitter.util.FuturePool 381 | 382 | val slowParserService = new Service[String, Int] { 383 | def apply(request: String) = FuturePool.unboundedPool { 384 | Thread.sleep(5000); request.toInt 385 | } 386 | } 387 | 388 | val myService = myTimeoutFilter andThen slowParserService 389 | ---- 390 | 391 | === Protocols 392 | 393 | Finagle is designed to make it possible to define many components in a protocol-agnostic fashion 394 | 395 | * We'll be building servers and clients that speak the Thrift protocol today 396 | * The Finagle link:http://twitter.github.io/finagle/guide/Quickstart.html[Quickstart] gives an HTTP example 397 | * Other supported protocols include Redis, Protobuf, MySQL, SMTP, ZooKeeper, etc. 398 | * See link:https://github.com/finagle/finagle-serial[finagle-serial] for example with Mux as session-layer protocol 399 | * See link:https://github.com/finagle/finagle-smtp[finagle-smtp] for example of custom protocol 400 | 401 | == Thrift and Scrooge 402 | 403 | * The link:https://thrift.apache.org/docs/idl[Thrift interface description language] allows us to define data types and service interfaces 404 | * Bindings for specific languages are created using code generation tools 405 | * We'll be using Twitter's link:http://twitter.github.io/scrooge/[Scrooge] via an SBT plugin 406 | 407 | === Where's the code? 408 | 409 | * Generated automatically when we compile with +sbt compile+ 410 | * Lives in +target/scala-2.10/src_managed/main+ 411 | 412 | === Implementing the Scrooge interfaces 413 | 414 | We need to define a method implementation for every function in our service 415 | 416 | == More about futures 417 | 418 | * When and where do they run? 419 | * How can we control that? 420 | * How are they different from the futures in the standard library? 421 | 422 | === Pop quiz 423 | 424 | When do these return? 425 | 426 | [source,scala] 427 | ---- 428 | import com.twitter.util.Future 429 | 430 | val f1 = Future { Thread.sleep(5000) } 431 | val f2 = Future { 0 }.map { _ => Thread.sleep(5000) } 432 | val f3 = Future.value(Thread.sleep(5000)) 433 | val f4 = for { 434 | a <- Future { 1 } 435 | b <- Future { Thread.sleep(5000); 2 } 436 | } yield a + b 437 | ---- 438 | 439 | === Extra credit 440 | 441 | When do these return? 442 | 443 | [source,scala] 444 | ---- 445 | import scala.concurrent.Future 446 | import scala.concurrent.ExecutionContext.Implicits.global 447 | 448 | val f1 = Future { Thread.sleep(5000) } 449 | val f2 = Future { 0 }.map { _ => Thread.sleep(5000) } 450 | val f3 = Future.successful(Thread.sleep(5000)) 451 | val f4 = for { 452 | a <- Future { 1 } 453 | b <- Future { Thread.sleep(5000); 2 } 454 | } yield a + b 455 | ---- 456 | 457 | === From the quickstart 458 | 459 | [source,scala] 460 | ---- 461 | val service = new Service[HttpRequest, HttpResponse] { 462 | def apply(req: HttpRequest): Future[HttpResponse] = 463 | Future.value(new DefaultHttpResponse( 464 | req.getProtocolVersion, HttpResponseStatus.OK)) 465 | } 466 | ---- 467 | 468 | We need to be careful with I/O, since +Future.value+ blocks a Finagle thread 469 | 470 | 471 | === Rule of thumb (courtesy of Moses Nakamura) 472 | 473 | * Never await result of another Finagle request 474 | * 90% blocking, 10% busy: put it on another thread 475 | * 10% blocking, 90% busy: put it on another thread 476 | * 0% blocking, 100% busy, very uneven workload: another thread 477 | * 0% blocking, 100% busy, even workload: probably okay 478 | 479 | == Future pools 480 | 481 | [source,scala] 482 | ---- 483 | import com.twitter.util.FuturePool 484 | 485 | val pool = FuturePool.unboundedPool 486 | 487 | val f1 = pool { Thread.sleep(5000); 0 } 488 | val f2 = pool { 0 }.flatMap { i => pool { expensiveOp(i) }} 489 | ---- 490 | 491 | === Sequencing computations === 492 | 493 | [source,scala] 494 | ---- 495 | for { 496 | foo <- getFoo() 497 | bar <- getBar(foo) 498 | baz <- getBaz(foo) // Don't do this! 499 | } yield (bar, baz) 500 | ---- 501 | 502 | What's wrong here? 503 | 504 | === One solution === 505 | 506 | [source,scala] 507 | ---- 508 | for { 509 | foo <- getFoo() 510 | bar = getBar(foo) 511 | baz = getBaz(foo) 512 | barValue <- bar 513 | bazValue <- baz 514 | } yield (barValue, bazValue) 515 | ---- 516 | 517 | === A better solution ==- 518 | 519 | [source,scala] 520 | ---- 521 | for { 522 | foo <- getFoo() 523 | pair <- getBar(foo).join(getBaz(foo)) 524 | } yield pair 525 | ---- 526 | 527 | "Applicative" sequencing 528 | 529 | === More applicative sequencing === 530 | 531 | [source,scala] 532 | ---- 533 | Future.collect(futures: Seq[Future[Int]]) 534 | ---- 535 | 536 | == Putting it all together 537 | 538 | * A bad solution 539 | * A better solution 540 | 541 | === A bad solution 542 | 543 | What's wrong with +NaiveNameRecognizerService+? 544 | 545 | (Hint: there's more than one thing) 546 | 547 | === A better solution 548 | 549 | * Use a future pool to keep Finagle threads free when we're doing IO 550 | * Use resource pools to control access to our non-thread-safe objects 551 | 552 | == twitter-server 553 | 554 | link:https://twitter.github.io/twitter-server/index.html[twitter-server]: an alternative +App+ 555 | 556 | * Flags for configuration 557 | * Logging and metrics 558 | * Admin HTTP interface 559 | * Lifecycle management endpoints for e.g. Mesos's job manager 560 | 561 | === Using twitter-server 562 | 563 | [source,bash] 564 | ---- 565 | ./sbt run 566 | ---- 567 | 568 | Then visit the link:http://localhost:9990/admin[admin page], 569 | link:http://localhost:9990/admin/metrics.json?pretty=true[metrics], etc. on port 9990 570 | 571 | --------------------------------------------------------------------------------