├── doc
    ├── schema.png
    ├── flockdb-layout.png
    ├── find-and-delete.png
    └── blog.md
├── project
    ├── release.properties
    ├── build.properties
    ├── plugins
    │   └── Plugins.scala
    └── build
    │   └── FlockDBProject.scala
├── .gitignore
├── .ensime
├── src
    ├── main
    │   ├── scala
    │   │   └── com
    │   │   │   └── twitter
    │   │   │       └── flockdb
    │   │   │           ├── JobSchedulable.scala
    │   │   │           ├── conversions
    │   │   │               └── Numeric.scala
    │   │   │           ├── Page.scala
    │   │   │           ├── EdgeQuery.scala
    │   │   │           ├── SelectQuery.scala
    │   │   │           ├── Priority.scala
    │   │   │           ├── QueryTerm.scala
    │   │   │           ├── operations
    │   │   │               ├── ExecuteOperation.scala
    │   │   │               ├── ExecuteOperations.scala
    │   │   │               ├── ExecuteOperationType.scala
    │   │   │               ├── SelectOperation.scala
    │   │   │               └── SelectOperationType.scala
    │   │   │           ├── Cursor.scala
    │   │   │           ├── Main.scala
    │   │   │           ├── Direction.scala
    │   │   │           ├── State.scala
    │   │   │           ├── UuidGenerator.scala
    │   │   │           ├── ForwardingManager.scala
    │   │   │           ├── config
    │   │   │               └── FlockDB.scala
    │   │   │           ├── queries
    │   │   │               ├── SimpleQuery.scala
    │   │   │               ├── UnionQuery.scala
    │   │   │               ├── WhereInQuery.scala
    │   │   │               ├── Query.scala
    │   │   │               ├── DifferenceQuery.scala
    │   │   │               ├── IntersectionQuery.scala
    │   │   │               ├── ExecuteCompiler.scala
    │   │   │               └── SelectCompiler.scala
    │   │   │           ├── Metadata.scala
    │   │   │           ├── Edge.scala
    │   │   │           ├── shards
    │   │   │               ├── Shard.scala
    │   │   │               ├── ReadWriteShardAdapter.scala
    │   │   │               └── Optimism.scala
    │   │   │           ├── StatsCollectingQuery.scala
    │   │   │           ├── jobs
    │   │   │               ├── Legacy.scala
    │   │   │               ├── multi
    │   │   │               │   └── Multi.scala
    │   │   │               ├── single
    │   │   │               │   └── Single.scala
    │   │   │               └── Copy.scala
    │   │   │           ├── Select.scala
    │   │   │           ├── ResultWindow.scala
    │   │   │           └── EdgesService.scala
    │   └── thrift
    │   │   └── Flockdb.thrift
    ├── test
    │   └── scala
    │   │   └── com
    │   │       └── twitter
    │   │           └── flockdb
    │   │               ├── ConfigValidationSpec.scala
    │   │               ├── unit
    │   │                   ├── EdgeSpec.scala
    │   │                   ├── UnionQuerySpec.scala
    │   │                   ├── SeqQuery.scala
    │   │                   ├── IntersectionQuerySpec.scala
    │   │                   ├── DifferenceQuerySpec.scala
    │   │                   ├── WhereInQuerySpec.scala
    │   │                   ├── SimpleQuerySpec.scala
    │   │                   ├── EdgesSpec.scala
    │   │                   ├── LegacyJobParserSpec.scala
    │   │                   ├── JobSpec.scala
    │   │                   └── SelectCompilerSpec.scala
    │   │               ├── integration
    │   │                   ├── FlockFixRegressionSpec.scala
    │   │                   ├── IntersectionSpec.scala
    │   │                   ├── SelectCompilerSpec.scala
    │   │                   ├── OptimisticLockRegressionSpec.scala
    │   │                   ├── BlackHoleLockingRegressionSpec.scala
    │   │                   └── CopySpec.scala
    │   │               └── ConfiguredSpecification.scala
    └── scripts
    │   ├── setup-env.sh
    │   ├── mkshards.rb
    │   └── start.sh
├── LICENSE
├── TODO
├── config
    ├── development.scala
    ├── test.scala
    └── production.scala
└── README.markdown


/doc/schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twitter-archive/flockdb/HEAD/doc/schema.png


--------------------------------------------------------------------------------
/doc/flockdb-layout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twitter-archive/flockdb/HEAD/doc/flockdb-layout.png


--------------------------------------------------------------------------------
/doc/find-and-delete.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twitter-archive/flockdb/HEAD/doc/find-and-delete.png


--------------------------------------------------------------------------------
/project/release.properties:
--------------------------------------------------------------------------------
1 | #Automatically generated by ReleaseManagement
2 | #Mon Apr 02 14:57:48 PDT 2012
3 | version=1.8.15
4 | sha1=d005f33e04350fbc74d0dd5ddee4214c2f973e4a
5 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | #Project properties
2 | #Mon Apr 02 14:57:48 PDT 2012
3 | project.organization=com.twitter
4 | project.name=flockdb
5 | sbt.version=0.7.4
6 | project.version=1.8.16-SNAPSHOT
7 | build.scala.versions=2.8.1
8 | project.initialize=false
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | dist/*
 2 | *.log
 3 | flock.ipr
 4 | flock.iws
 5 | target/
 6 | .DS_Store
 7 | kestrel/*
 8 | *.hprof.txt
 9 | lib_managed/
10 | src_managed/
11 | project/boot/
12 | project/plugins/project/
13 | ignore/
14 | flockdb.tmproj
15 | *.iml
16 | .idea/
17 | 


--------------------------------------------------------------------------------
/.ensime:
--------------------------------------------------------------------------------
 1 | ;; This config was generated using ensime-config-gen. Feel free to customize its contents manually.
 2 | 
 3 | (
 4 | 
 5 | :project-package "com.twitter.flockdb"
 6 | 
 7 | :use-sbt t
 8 | 
 9 | :sources ("target/gen-java")
10 | 
11 | :compile-jars ("lib_managed")
12 | 
13 | )
14 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/JobSchedulable.scala:
--------------------------------------------------------------------------------
1 | package com.twitter.flockdb
2 | 
3 | import com.twitter.gizzard.scheduler._
4 | 
5 | trait JobSchedulable {
6 |   def schedule(tableId: Int, forwardingManager: ForwardingManager, scheduler: PrioritizingJobScheduler, priority: Int)
7 | }
8 | 


--------------------------------------------------------------------------------
/project/plugins/Plugins.scala:
--------------------------------------------------------------------------------
1 | import sbt._
2 | 
3 | class Plugins(info: ProjectInfo) extends PluginDefinition(info) {
4 |   val twttrRepo = "twitter.com" at "http://maven.twttr.com"
5 | 
6 |   val standardProject = "com.twitter" % "standard-project" % "0.12.6"
7 |   val scrooge         = "com.twitter" % "sbt-scrooge"      % "2.3.1"
8 | }
9 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/conversions/Numeric.scala:
--------------------------------------------------------------------------------
 1 | package com.twitter.flockdb.conversions
 2 | 
 3 | object Numeric {
 4 |   class RichAnyVal(wrapped: AnyVal) {
 5 |     def toLong = {
 6 |       wrapped match {
 7 |         case i: Int => i.toLong
 8 |         case n: Long => n
 9 |       }
10 |     }
11 | 
12 |     def toInt = {
13 |       wrapped match {
14 |         case i: Int => i
15 |         case n: Long => n.toInt
16 |       }
17 |     }
18 |   }
19 | 
20 |   implicit def anyValToRichAnyVal(v: AnyVal) = new RichAnyVal(v)
21 | }
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2011 Twitter, Inc.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 | http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/Page.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | case class Page(count: Int, cursor: Cursor)
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/EdgeQuery.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | case class EdgeQuery(term: QueryTerm, page: Page)
20 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/ConfigValidationSpec.scala:
--------------------------------------------------------------------------------
 1 | package com.twitter.flockdb
 2 | 
 3 | import org.specs.Specification
 4 | import com.twitter.util.Eval
 5 | import java.io.File
 6 | import com.twitter.flockdb
 7 | 
 8 | 
 9 | object ConfigValidationSpec extends Specification {
10 |   "Configuration Validation" should {
11 |     "production.scala" >> {
12 |       val config = Eval[flockdb.config.FlockDB](new File("config/production.scala"))
13 |       config mustNot beNull
14 |     }
15 |     "development.scala" >> {
16 |       val config = Eval[flockdb.config.FlockDB](new File("config/development.scala"))
17 |       config mustNot beNull
18 |     }
19 | 
20 |     "test.scala" >> {
21 |       val config = Eval[flockdb.config.FlockDB](new File("config/test.scala"))
22 |       config mustNot beNull
23 |     }
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/SelectQuery.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | import operations.SelectOperation
20 | 
21 | case class SelectQuery(operations: Seq[SelectOperation], page: Page)
22 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/Priority.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | 
20 | object Priority extends Enumeration {
21 |   val Low = Value(1)
22 |   val Medium = Value(2)
23 |   val High = Value(3)
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/QueryTerm.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | 
20 | case class QueryTerm(sourceId: Long, graphId: Int, isForward: Boolean,
21 |                      destinationIds: Option[Seq[Long]], var states: Seq[State])
22 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/operations/ExecuteOperation.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package operations
19 | 
20 | 
21 | case class ExecuteOperation(operationType: ExecuteOperationType.Value, term: QueryTerm,
22 |                             position: Option[Long])
23 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/operations/ExecuteOperations.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package operations
19 | 
20 | 
21 | case class ExecuteOperations(operations: Seq[ExecuteOperation], executeAt: Option[Int],
22 |                              priority: Priority.Value)
23 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | TODO:
 2 | 
 3 | move com.twitter.service.flock.Reset into gizzard
 4 | move com.twitter.results into gizzard or its own package.
 5 | move ByteSwapper into gizzard
 6 | merge in Gizzard's no_reflection branch.
 7 | move StatsCollectingQuery where??
 8 | create a `main` file by copying glock
 9 | create simple exceptionWrappingProxy using new gizzard niceness interface (cf Rowz)
10 | rename Edges.scala to Flockdb.scala
11 | figure out where Flock.thrift goes
12 | move State into com.twitter.flockdb; make a copy for Groups; they're only identical on accident. not the same thing.
13 | copy the production.conf config from glock (but be careful with passwords!!!!)
14 | rename Edges.thrift Flock.thrift
15 | figure out how to package conf file??
16 | 
17 | then:
18 | make sure it works in development mode
19 | use flocker.rb to create a bunch of shards
20 | use the ruby gem to insert a bunch of data and make a bunch of queries.
21 | write readme's.
22 | write blog post.


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/operations/ExecuteOperationType.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb.operations
18 | 
19 | 
20 | object ExecuteOperationType extends Enumeration {
21 |   val Add = Value(1)
22 |   val Remove = Value(2)
23 |   val Archive = Value(3)
24 |   val Negate = Value(4)
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/operations/SelectOperation.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package operations
19 | 
20 | 
21 | case class SelectOperation(operationType: SelectOperationType.Value, term: Option[QueryTerm]) {
22 |   override def clone() = SelectOperation(operationType, term)
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/operations/SelectOperationType.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb.operations
18 | 
19 | 
20 | object SelectOperationType extends Enumeration {
21 |   val SimpleQuery = Value(1)
22 |   val Intersection = Value(2)
23 |   val Union = Value(3)
24 |   val Difference = Value(4)
25 | }
26 | 


--------------------------------------------------------------------------------
/project/build/FlockDBProject.scala:
--------------------------------------------------------------------------------
 1 | import sbt._
 2 | import Process._
 3 | import com.twitter.sbt._
 4 | 
 5 | class FlockDBProject(info: ProjectInfo) extends StandardLibraryProject(info)
 6 | with CompileThriftScrooge
 7 | with DefaultRepos
 8 | with SubversionPublisher {
 9 | 
10 |   override def filterScalaJars = false
11 |   val scalaTools = "org.scala-lang" % "scala-compiler" % "2.8.1"
12 | 
13 |   val gizzard = "com.twitter" % "gizzard"         % "3.0.13" withSources()
14 |   val scrooge = "com.twitter" % "scrooge-runtime" % "1.0.3" withSources()
15 | 
16 |   val asm       = "asm"                     % "asm"          % "1.5.3" % "test"
17 |   val cglib     = "cglib"                   % "cglib"        % "2.2"   % "test"
18 |   val hamcrest  = "org.hamcrest"            % "hamcrest-all" % "1.1"   % "test"
19 |   val jmock     = "org.jmock"               % "jmock"        % "2.4.0" % "test"
20 |   val objenesis = "org.objenesis"           % "objenesis"    % "1.1"   % "test"
21 |   val specs     = "org.scala-tools.testing" % "specs_2.8.1"  % "1.6.6" % "test"
22 | 
23 |   override def subversionRepository = Some("https://svn.twitter.biz/maven-public/")
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/Cursor.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | object Cursor {
20 |   def cursorZip(seq: Seq[Long]) = for (i <- seq) yield (i, Cursor(i))
21 | 
22 |   val End = new Cursor(0)
23 |   val Start = new Cursor(-1)
24 | }
25 | 
26 | case class Cursor(position: Long) extends Ordered[Cursor] {
27 |   def compare(that: Cursor) = position.compare(that.position)
28 |   def reverse = new Cursor(-position)
29 |   def magnitude = new Cursor(math.abs(position))
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/Main.scala:
--------------------------------------------------------------------------------
 1 | package com.twitter.flockdb
 2 | 
 3 | import com.twitter.util.Eval
 4 | import com.twitter.logging.Logger
 5 | import com.twitter.ostrich.admin.{Service, ServiceTracker, RuntimeEnvironment, AdminHttpService}
 6 | import java.io.File
 7 | 
 8 | import com.twitter.flockdb.config.{FlockDB => FlockDBConfig}
 9 | 
10 | object Main {
11 |   val log = Logger.get
12 | 
13 |   var adminServer: Option[AdminHttpService] = None
14 | 
15 |   def main(args: Array[String]) {
16 |     try {
17 |       log.info("Starting FlockDB.")
18 | 
19 |       val eval    = new Eval
20 |       val config  = eval[FlockDBConfig](args.map(new File(_)): _*)
21 |       val runtime = new RuntimeEnvironment(this)
22 | 
23 |       Logger.configure(config.loggers)
24 |       adminServer = config.adminConfig()(runtime)
25 | 
26 |       val service = new FlockDB(config)
27 | 
28 |       ServiceTracker.register(service)
29 |       service.start()
30 | 
31 |     } catch {
32 |       case e => {
33 |         log.fatal(e, "Exception in initialization: ", e.getMessage)
34 |         log.fatal(e.getStackTrace.toString)
35 |         System.exit(1)
36 |       }
37 |     }
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/Direction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | abstract sealed class Direction(val id: Int) {
20 |   val opposite: Direction
21 | }
22 | 
23 | object Direction {
24 |   def apply(id: Int) = id match {
25 |     case Forward.id => Forward
26 |     case Backward.id => Backward
27 |   }
28 | 
29 |   def apply(isForward: Boolean) = if (isForward) Forward else Backward
30 | 
31 |   case object Forward extends Direction(0) {
32 |     val opposite = Direction.Backward
33 |   }
34 | 
35 |   case object Backward extends Direction(1) {
36 |     val opposite = Direction.Forward
37 |   }
38 | 
39 |   val All = List(Forward, Backward)
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/State.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | abstract class State(val id: Int, val name: String, val ordinal: Int) extends Ordered[State] {
20 |   def max(other: State) = if (this > other) this else other
21 |   def compare(s: State) = ordinal.compare(s.ordinal)
22 | }
23 | 
24 | object State {
25 |   def apply(id: Int) = id match {
26 |     case Normal.id => Normal
27 |     case Removed.id => Removed
28 |     case Archived.id => Archived
29 |     case Negative.id => Negative
30 |   }
31 | 
32 |   case object Normal extends State(0, "Normal", 0)
33 |   case object Negative extends State(3, "Negative", 1)
34 |   case object Removed extends State(1, "Removed", 3)
35 |   case object Archived extends State(2, "Archived", 2)
36 | }
37 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/EdgeSpec.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package unit
19 | 
20 | import com.twitter.util.Time
21 | import org.specs.mock.{ClassMocker, JMocker}
22 | import jobs.single._
23 | 
24 | object EdgeSpec extends ConfiguredSpecification with JMocker with ClassMocker  {
25 |   val now = Time.fromSeconds(124)
26 |   val source = 1
27 |   val dest = 2
28 |   val pos = 0
29 |   val graph = 5
30 |   val count = 0
31 |   val forwardingManager = mock[ForwardingManager]
32 | 
33 |   "Edge" should {
34 |     "becomes correct job" in {
35 |       val edge = new Edge(source, dest, pos, now, count, State.Normal)
36 |       edge.toJob(graph, forwardingManager) mustEqual new Single(source, graph, dest, pos, State.Normal, now, forwardingManager, OrderedUuidGenerator)
37 |     }
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/UuidGenerator.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | import java.util.Random
20 | 
21 | trait UuidGenerator extends (Long => Long) {
22 |   def apply(updatedAt: Long): Long
23 |   def unapply(uuid: Long): Option[Long]
24 | }
25 | 
26 | object OrderedUuidGenerator extends UuidGenerator {
27 |   private val randomGenerator = new Random
28 |   // 64 bits - 20 leaves 44 bits of milliseconds, or over 500 years.
29 |   private val unusedBits = 20
30 |   private val randomMask = (1 << unusedBits) - 1
31 | 
32 |   def apply(updatedAt: Long) = {
33 |     (updatedAt << unusedBits) | (randomGenerator.nextInt() & randomMask)
34 |   }
35 | 
36 |   def unapply(uuid: Long) = {
37 |     Some(uuid >> unusedBits)
38 |   }
39 | }
40 | 
41 | object IdentityUuidGenerator extends UuidGenerator {
42 |   def apply(updatedAt: Long) = updatedAt
43 | 
44 |   def unapply(uuid: Long) = Some(uuid)
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/ForwardingManager.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | import com.twitter.gizzard.nameserver.MultiForwarder
20 | import com.twitter.gizzard.shards.{RoutingNode, ShardException}
21 | import com.twitter.flockdb.shards.{Shard, ReadWriteShardAdapter}
22 | 
23 | 
24 | class ForwardingManager(val forwarder: MultiForwarder[Shard]) {
25 |   @throws(classOf[ShardException])
26 |   def find(sourceId: Long, graphId: Int, direction: Direction): Shard = {
27 |     new ReadWriteShardAdapter(findNode(sourceId, graphId, direction))
28 |   }
29 | 
30 |   @throws(classOf[ShardException])
31 |   def findNode(sourceId: Long, graphId: Int, direction: Direction)= {
32 |     forwarder.find(translate(graphId, direction), sourceId)
33 |   }
34 | 
35 |   private def translate(graphId: Int, direction: Direction) = {
36 |     if (direction == Direction.Backward) -1 * graphId else graphId
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/config/FlockDB.scala:
--------------------------------------------------------------------------------
 1 | package com.twitter.flockdb.config
 2 | 
 3 | import com.twitter.gizzard.config._
 4 | import com.twitter.ostrich.admin.config.AdminServiceConfig
 5 | import com.twitter.querulous.config.{Connection, AsyncQueryEvaluator}
 6 | import com.twitter.util.TimeConversions._
 7 | import com.twitter.flockdb.queries.QueryTree
 8 | import com.twitter.flockdb.queries
 9 | 
10 | 
11 | class FlockDBServer {
12 |   var name = "flockdb_edges"
13 |   var port = 7915
14 |   var maxConcurrentRequests = 10000
15 | }
16 | 
17 | trait IntersectionQuery {
18 |   var intersectionTimeout           = 100.millis
19 |   var averageIntersectionProportion = 0.1
20 |   var intersectionPageSizeMax       = 4000
21 | 
22 |   def intersect(query1: QueryTree, query2: QueryTree) = new queries.IntersectionQuery(query1, query2, averageIntersectionProportion, intersectionPageSizeMax, intersectionTimeout)
23 |   def difference(query1: QueryTree, query2: QueryTree) = new queries.DifferenceQuery(query1, query2, averageIntersectionProportion, intersectionPageSizeMax, intersectionTimeout)
24 | }
25 | 
26 | trait FlockDB extends GizzardServer {
27 |   var server = new FlockDBServer
28 | 
29 |   var intersectionQuery: IntersectionQuery = new IntersectionQuery { }
30 |   var aggregateJobsPageSize         = 500
31 | 
32 |   def databaseConnection: Connection
33 | 
34 |   def edgesQueryEvaluator: AsyncQueryEvaluator
35 |   def lowLatencyQueryEvaluator: AsyncQueryEvaluator
36 |   def materializingQueryEvaluator: AsyncQueryEvaluator
37 | 
38 |   def adminConfig: AdminServiceConfig
39 | }
40 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/UnionQuerySpec.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package unit
19 | 
20 | import org.specs.mock.JMocker
21 | 
22 | class UnionQuerySpec extends ConfiguredSpecification with JMocker {
23 |   "UnionQuery" should {
24 |     val query1 = new queries.SeqQuery(List(1,2,3,4,5,6,7,8,9,10))
25 |     val query2 = new queries.SeqQuery(List(1,2,3,4,11))
26 | 
27 |     "sizeEstimate" in {
28 |       val unionQuery = new queries.UnionQuery(query1, query2)
29 |       unionQuery.sizeEstimate()() mustEqual 10
30 |     }
31 | 
32 |     "selectWhereIn" in {
33 |       val unionQuery = new queries.UnionQuery(query1, query2)
34 |       unionQuery.selectWhereIn(List(1, 2, 3, 12))().toList mustEqual List(1, 2, 3)
35 |     }
36 | 
37 |     "selectPage" in {
38 |       val unionQuery = new queries.UnionQuery(query1, query2)
39 |       unionQuery.selectPage(10, Cursor(9))().toTuple mustEqual (List(8,7,6,5,4,3,2,1), Cursor.End, Cursor(-8))
40 |     }
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/SeqQuery.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package queries
19 | 
20 | import scala.util.Sorting
21 | import com.twitter.gizzard.thrift.conversions.Sequences._
22 | import com.twitter.util.Future
23 | 
24 | class SeqQuery(s: Seq[Long]) extends SimpleQueryNode {
25 |   val seq = sort(s)
26 |   def sizeEstimate = Future(seq.size)
27 |   def selectWhereIn(i: Seq[Long]) = Future(sort(seq.toList intersect i.toList).toList)
28 |   protected def selectPage(count: Int, cursor: Cursor) = selectPageByDestinationId(count, cursor)
29 |   def selectPageByDestinationId(count: Int, cursor: Cursor) = {
30 |     val filtered: Seq[Long] = cursor match {
31 |       case Cursor.Start => seq
32 |       case Cursor.End => Seq()
33 |       case _ => seq.filter(_ <= cursor.position)
34 |     }
35 | 
36 |     Future(new ResultWindow(Cursor.cursorZip(filtered), count, cursor))
37 |   }
38 | 
39 |   private def sort(s: Seq[Long]) = Sorting.stableSort(s, (x: Long, y: Long) => y < x)
40 | 
41 |   override def toString =
42 |     "<SeqQuery ids=("+s.mkString(",")+")>"
43 | }
44 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/IntersectionQuerySpec.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package unit
19 | 
20 | import org.specs.mock.JMocker
21 | 
22 | object IntersectionQuerySpec extends ConfiguredSpecification with JMocker {
23 |   "IntersectionQuery" should {
24 |     val query1 = new queries.SeqQuery(List(1,2,3,4,5,6,7,8,9,10))
25 |     val query2 = new queries.SeqQuery(List(1,2,3,4,11))
26 |     val queryConfig = config.intersectionQuery
27 | 
28 |     "sizeEstimate" in {
29 |       val intersectionQuery = queryConfig.intersect(query1, query2)
30 |       intersectionQuery.sizeEstimate()() mustEqual (5 * queryConfig.averageIntersectionProportion).toInt
31 |     }
32 | 
33 |     "selectWhereIn" in {
34 |       val intersectionQuery = queryConfig.intersect(query1, query2)
35 |       intersectionQuery.selectWhereIn(List(1, 2, 12, 13))() mustEqual List(2, 1)
36 |     }
37 | 
38 |     "selectPage" in {
39 |       val intersectionQuery = queryConfig.intersect(query1, query2)
40 |       intersectionQuery.selectPage(5, Cursor.Start)().toTuple mustEqual (List(4, 3, 2, 1), Cursor.End, Cursor.End)
41 |     }
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/queries/SimpleQuery.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package queries
19 | 
20 | import shards.Shard
21 | import com.twitter.gizzard.Stats
22 | 
23 | class SimpleQuery(shard: Shard, sourceId: Long, states: Seq[State]) extends SimpleQueryNode {
24 |   def sizeEstimate() = {
25 |     Stats.transaction.record("Selecting counts from "+shard)
26 |     time(shard.count(sourceId, states))
27 |   }
28 | 
29 |   def selectWhereIn(page: Seq[Long]) = time {
30 |     Stats.transaction.record("Intersecting "+page.size+" ids from "+shard)
31 |     shard.intersect(sourceId, states, page)
32 |   }
33 | 
34 |   def selectPageByDestinationId(count: Int, cursor: Cursor) = time {
35 |     Stats.transaction.record("Selecting "+count+" destinationIds from "+shard)
36 |     shard.selectByDestinationId(sourceId, states, count, cursor)
37 |   }
38 | 
39 |   def selectPage(count: Int, cursor: Cursor) = time {
40 |     Stats.transaction.record("Selecting "+count+" edges from "+shard)
41 |     shard.selectByPosition(sourceId, states, count, cursor)
42 |   }
43 | 
44 |   override def toString = {
45 |     "<SimpleQuery sourceId="+sourceId+" states=("+states.map(_.name).mkString(",")+")"+duration.map(" time="+_.inMillis).mkString+">"
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/queries/UnionQuery.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package queries
19 | 
20 | import scala.util.Sorting
21 | 
22 | class UnionQuery(query1: QueryTree, query2: QueryTree) extends ComplexQueryNode(query1, query2) {
23 |   def sizeEstimate() = getSizeEstimates() map { case (count1, count2) => count1 max count2 }
24 | 
25 |   def selectPage(count: Int, cursor: Cursor) = selectPageByDestinationId(count, cursor)
26 | 
27 |   def selectPageByDestinationId(count: Int, cursor: Cursor) = time {
28 |     val f1 = query1.selectPageByDestinationId(count, cursor)
29 |     val f2 = query2.selectPageByDestinationId(count, cursor)
30 | 
31 |     for (result1 <- f1; result2 <- f2) yield result1.merge(result2)
32 |   }
33 | 
34 |   def selectWhereIn(page: Seq[Long]) = time {
35 |     val f1 = query1.selectWhereIn(page)
36 |     val f2 = query2.selectWhereIn(page)
37 | 
38 |     for (page1 <- f1; page2 <- f2) yield {
39 |       Sorting.stableSort((page1 ++ page2).toSet.toSeq)
40 |     }
41 |   }
42 | 
43 |   private def merge(page1: Seq[Long], page2: Seq[Long]): Seq[Long] = {
44 |     Sorting.stableSort((Set(page1: _*) ++ Set(page2: _*)).toSeq)
45 |   }
46 | 
47 |   override def toString =
48 |     "<UnionQuery query1="+query1.toString+" query2="+query2.toString+duration.map(" time="+_.inMillis).mkString+">"
49 | }
50 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/DifferenceQuerySpec.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package unit
19 | 
20 | import org.specs.mock.JMocker
21 | 
22 | class DifferenceQuerySpec extends ConfiguredSpecification with JMocker {
23 |   "DifferenceQuery" should {
24 |     val query1 = new queries.SeqQuery(List(1,2,3,4,5,6,7,8,9,10,11,12))
25 |     val query2 = new queries.SeqQuery(List(3,4,7,11))
26 |     val queryConfig = config.intersectionQuery
27 | 
28 |     "sizeEstimate" in {
29 |       val differenceQuery = queryConfig.difference(query1, query2)
30 |       differenceQuery.sizeEstimate()() mustEqual 12
31 |     }
32 | 
33 |     "selectWhereIn" in {
34 |       val differenceQuery = queryConfig.difference(query1, query2)
35 |       differenceQuery.selectWhereIn(List(1, 2, 3, 4, 5, 11, 12, 13))().toList mustEqual List(12,5,2,1)
36 |     }
37 | 
38 |     "selectPage" in {
39 |       val differenceQuery = queryConfig.difference(query1, query2)
40 | 
41 |       differenceQuery.selectPage(5, Cursor.Start)().toTuple  mustEqual (List(12,10,9,8,6), Cursor(6), Cursor.End)
42 |       differenceQuery.selectPage(10, Cursor(12L))().toTuple  mustEqual (List(10,9,8,6,5,2,1), Cursor.End, Cursor(-10))
43 |       differenceQuery.selectPage(10, Cursor.Start)().toTuple mustEqual (List(12,10,9,8,6,5,2,1), Cursor.End, Cursor.End)
44 |     }
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/queries/WhereInQuery.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package queries
19 | 
20 | import com.twitter.util.{Duration, Future}
21 | import com.twitter.gizzard.Stats
22 | import shards.Shard
23 | 
24 | class WhereInQuery(shard: Shard, sourceId: Long, states: Seq[State], destinationIds: Seq[Long]) extends SimpleQueryNode {
25 | 
26 |   def sizeEstimate() = Future(destinationIds.size)
27 | 
28 |   def selectWhereIn(page: Seq[Long]) = time {
29 |     val intersection = (Set(destinationIds: _*) intersect Set(page: _*)).toSeq
30 |     Stats.transaction.record("Intersecting "+intersection.size+" ids from "+shard)
31 |     shard.intersect(sourceId, states, intersection)
32 |   }
33 | 
34 |   def selectPageByDestinationId(count: Int, cursor: Cursor) = time {
35 |     Stats.transaction.record("Selecting "+ count +" edges from an intersection of "+ destinationIds.size +" ids")
36 |     shard.intersect(sourceId, states, destinationIds) map { results =>
37 |       Stats.transaction.record("Selected "+ results.size +" rows.")
38 |       new ResultWindow(results.map(result => (result, Cursor(result))), count, cursor)
39 |     }
40 |   }
41 | 
42 |   def selectPage(count: Int, cursor: Cursor) = selectPageByDestinationId(count, cursor)
43 | 
44 |   override def toString = {
45 |     "<WhereInQuery sourceId="+sourceId+" states=("+states.map(_.name).mkString(",")+") destIds=("+destinationIds.mkString(",")+")"+duration.map(" time="+_.inMillis).mkString+">"
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/queries/Query.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package queries
19 | 
20 | import com.twitter.util.{Time, Duration, Future}
21 | 
22 | trait Query {
23 |   def sizeEstimate(): Future[Int]
24 |   def selectWhereIn(page: Seq[Long]): Future[Seq[Long]]
25 |   def select(page: Page) = selectPage(page.count, page.cursor)
26 |   def selectPageByDestinationId(count: Int, cursor: Cursor): Future[ResultWindow[Long]]
27 | 
28 |   protected def selectPage(count: Int, cursor: Cursor): Future[ResultWindow[Long]]
29 | }
30 | 
31 | trait Timed {
32 |   var duration: Option[Duration] = None
33 | 
34 |   protected def time[A](f: => Future[A]): Future[A] = {
35 |     val start = Time.now
36 |     f map { rv => duration = Some(Time.now - start); rv }
37 |   }
38 | }
39 | 
40 | sealed abstract class QueryTree extends Query with Timed {
41 |   def getComplexity(): Int
42 |   def getDepth(): Int
43 | }
44 | 
45 | abstract case class ComplexQueryNode(left: QueryTree, right: QueryTree) extends QueryTree {
46 |   val complexity = (left.getComplexity() + right.getComplexity()) + 1
47 |   val depth = (left.getDepth() max right.getDepth) + 1
48 |   def getComplexity(): Int = complexity
49 |   def getDepth(): Int = depth
50 | 
51 |   def getSizeEstimates() = {
52 |     val f1 = left.sizeEstimate
53 |     val f2 = right.sizeEstimate
54 |     for (count1 <- f1; count2 <- f2) yield (count1, count2)
55 |   }
56 | 
57 |   def orderQueries() = {
58 |     getSizeEstimates() map { case (count1, count2) =>
59 |       if (count1 < count2) {
60 |         (left, right)
61 |       } else {
62 |         (right, left)
63 |       }
64 |     }
65 |   }
66 | 
67 | }
68 | 
69 | abstract case class SimpleQueryNode() extends QueryTree {
70 |   def getComplexity(): Int = 0
71 |   def getDepth(): Int = 0
72 | }
73 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/WhereInQuerySpec.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package unit
19 | 
20 | import com.twitter.util.Future
21 | import org.specs.mock.JMocker
22 | import shards.Shard
23 | 
24 | class WhereInQuerySpec extends ConfiguredSpecification with JMocker {
25 |   "WhereInQuery" should {
26 |     var shard: Shard = null
27 |     val sourceId = 900
28 |     val destinationIds = List(55L, 60L, 65L, 70L, 75L, 80L, 85L)
29 | 
30 |     doBefore {
31 |       shard = mock[Shard]
32 |     }
33 | 
34 |     "sizeEstimate" in {
35 |       val whereInQuery = new queries.WhereInQuery(shard, sourceId, List(State.Normal), destinationIds)
36 |       whereInQuery.sizeEstimate()() mustEqual destinationIds.size
37 |     }
38 | 
39 |     "selectWhereIn" in {
40 |       val page = List(65L, 63L, 60L)
41 |       expect {
42 |         one(shard).intersect(sourceId, List(State.Normal), List(60L, 65L)) willReturn Future(List(60L))
43 |       }
44 |       val whereInQuery = new queries.WhereInQuery(shard, sourceId, List(State.Normal), destinationIds)
45 |       whereInQuery.selectWhereIn(page)().toList mustEqual List(60L)
46 |     }
47 | 
48 |     "selectPage" in {
49 |       expect {
50 |         allowing(shard).intersect(sourceId, List(State.Normal), destinationIds) willReturn Future(List(85L, 75L, 65L, 55L))
51 |       }
52 | 
53 |       val whereInQuery = new queries.WhereInQuery(shard, sourceId, List(State.Normal), destinationIds)
54 | 
55 |       whereInQuery.selectPage(10, Cursor(90L))().toTuple mustEqual (List(85L, 75L, 65L, 55L), Cursor.End, Cursor.End)
56 |       whereInQuery.selectPage(10, Cursor(75L))().toTuple mustEqual (List(65L, 55L), Cursor.End, Cursor(-65L))
57 |       whereInQuery.selectPage(2, Cursor(-65L))().toTuple mustEqual (List(85L, 75L), Cursor(75L), Cursor.End)
58 |     }
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/Metadata.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | import com.twitter.util.Time
20 | import com.twitter.gizzard.scheduler._
21 | import jobs.multi._
22 | 
23 | object Metadata {
24 |   def apply(sourceId: Long, state: State, count: Int, updatedAt: Time) = new Metadata(sourceId, state, count, updatedAt)
25 |   def apply(sourceId: Long, state: State, updatedAt: Time) = new Metadata(sourceId, state, updatedAt)
26 |   val Max = Metadata(Long.MaxValue, State.Normal, Time.fromSeconds(0))
27 | }
28 | 
29 | case class Metadata(sourceId: Long, state: State, count: Int, updatedAtSeconds: Int) extends Ordered[Metadata] {
30 | 
31 |   def this(sourceId: Long, state: State, count: Int, updatedAt: Time) =
32 |       this(sourceId, state, count, updatedAt.inSeconds)
33 | 
34 |   def this(sourceId: Long, state: State, updatedAt: Time) =
35 |       this(sourceId, state, 0, updatedAt.inSeconds)
36 | 
37 |   val updatedAt = Time.fromSeconds(updatedAtSeconds)
38 | 
39 | 
40 |   def compare(other: Metadata) = {
41 |     val out = updatedAt.compare(other.updatedAt)
42 |     if (out == 0) {
43 |       state.compare(other.state)
44 |     } else {
45 |       out
46 |     }
47 |   }
48 | 
49 |   def max(other: Metadata) = if (this > other) this else other
50 | 
51 |   def schedule(
52 |     tableId: Int,
53 |     forwardingManager: ForwardingManager,
54 |     scheduler: PrioritizingJobScheduler,
55 |     priority: Int
56 |   ) = {
57 |     val job = new Multi(
58 |       sourceId,
59 |       tableId,
60 |       (if (tableId > 0) Direction.Forward else Direction.Backward),
61 |       state,
62 |       updatedAt,
63 |       Priority.Medium,
64 |       500,
65 |       forwardingManager,
66 |       scheduler
67 |     )
68 | 
69 |     scheduler.put(priority, job)
70 |   }
71 | 
72 |   def similar(other: Metadata) = {
73 |     sourceId.compare(other.sourceId)
74 |   }
75 | }
76 | 
77 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/Edge.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | 
19 | import com.twitter.util.Time
20 | import com.twitter.flockdb.jobs.single._
21 | import com.twitter.gizzard.scheduler.{PrioritizingJobScheduler, JsonJob}
22 | 
23 | object Edge {
24 |   def apply(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time, count: Int, state: State) = new Edge(sourceId, destinationId, position, updatedAt, count, state)
25 |   val Max = Edge(Long.MaxValue, Long.MaxValue, Long.MaxValue, 0, 0, State.Normal)
26 | }
27 | 
28 | case class Edge(sourceId: Long, destinationId: Long, position: Long, updatedAtSeconds: Int, count: Int,
29 |                 state: State) extends Ordered[Edge] {
30 | 
31 |   def this(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time, count: Int, state: State) =
32 |     this(sourceId, destinationId, position, updatedAt.inSeconds, count, state)
33 | 
34 |   val updatedAt = Time.fromSeconds(updatedAtSeconds)
35 | 
36 |   def schedule(tableId: Int, forwardingManager: ForwardingManager, scheduler: PrioritizingJobScheduler, priority: Int) = {
37 |     scheduler.put(priority, toJob(tableId, forwardingManager))
38 |   }
39 | 
40 |   def toJob(tableId: Int, forwardingManager: ForwardingManager) = {
41 |     new Single(
42 |       sourceId,
43 |       tableId,
44 |       destinationId,
45 |       OrderedUuidGenerator.unapply(position).get,
46 |       state,
47 |       updatedAt,
48 |       forwardingManager,
49 |       OrderedUuidGenerator
50 |     )
51 |   }
52 | 
53 |   def similar(other:Edge) = {
54 |     sourceId.compare(other.sourceId) match {
55 |       case x if x < 0 => -1
56 |       case x if x > 0 => 1
57 |       case _ => destinationId.compare(other.destinationId)
58 |     }
59 |   }
60 | 
61 |   def compare(other: Edge) = {
62 |     val out = updatedAt.compare(other.updatedAt)
63 |     if (out == 0) {
64 |       state.compare(other.state)
65 |     } else {
66 |       out
67 |     }
68 |   }
69 | 
70 |   def max(other: Edge) = if (this > other) this else other
71 | }
72 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/integration/FlockFixRegressionSpec.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package integration
19 | 
20 | import com.twitter.gizzard.scheduler.{JsonJob, PrioritizingJobScheduler}
21 | import com.twitter.gizzard.shards.ShardInfo
22 | import com.twitter.util.Time
23 | import com.twitter.conversions.time._
24 | import org.specs.mock.{ClassMocker, JMocker}
25 | import jobs.multi.Multi
26 | import shards.{Shard, SqlShard}
27 | 
28 | class FlockFixRegressionSpec extends IntegrationSpecification {
29 |   val alice = 1L
30 |   val FOLLOWS = 1
31 |   val pageSize = 100
32 | 
33 |   def alicesFollowings() = {
34 |     val term = QueryTerm(alice, FOLLOWS, true, None, List(State.Normal))
35 |     val query = EdgeQuery(term, Page(pageSize, Cursor.Start))
36 |     val resultsList = flockService.selectEdges(List(query))()
37 |     resultsList.size mustEqual 1
38 |     resultsList(0).toList
39 |   }
40 | 
41 |   "select results" should {
42 |     "be in order and still in order after unarchive" in {
43 |       reset(config)  // I don't know why this isn't working in doBefore
44 | 
45 |       for(i <- 0 until 10) {
46 |         if (i % 2 == 0) {
47 |           execute(Select(alice, FOLLOWS, i).add)
48 |         } else {
49 |           execute(Select(alice, FOLLOWS, i).archive)
50 |         }
51 |         Thread.sleep(1000) // prevent same-millisecond collision
52 |       }
53 | 
54 |       flock.jobScheduler.size must eventually(be(0)) // Make sure adds get applied.  I can't wait for Time.asOf()
55 | 
56 |       alicesFollowings().size must eventually(be_==(5))
57 |       alicesFollowings().toList.map(_.destinationId) mustEqual List(8,6,4,2,0)
58 | 
59 |       Thread.sleep(1000)
60 | 
61 |       val job = new Multi(alice, FOLLOWS, Direction.Forward, State.Normal, Time.now, Priority.High, pageSize, flock.forwardingManager, flock.jobScheduler)
62 |       job()
63 | 
64 |       alicesFollowings().size must eventually(be(10))
65 | 
66 |       alicesFollowings().toList.map(_.destinationId) mustEqual List(9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
67 |     }
68 |   }
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/src/scripts/setup-env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2010 Twitter, Inc.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License"); you may
 6 | # not use this file except in compliance with the License. You may obtain
 7 | # a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | VERSION="@VERSION@"
18 | 
19 | if java -version 2>&1 |grep "1\.5"; then
20 |   echo "Java must be at least 1.6"
21 |   exit 1
22 | fi
23 | 
24 | if [ "x$DB_USERNAME" = "x" ]; then
25 |   echo "Please set DB_USERNAME and/or DB_PASSWORD."
26 |   exit 1
27 | fi
28 | 
29 | if gizzmo --help > /dev/null; then
30 |   gizzmo="gizzmo -H localhost -P 7920"
31 | else
32 |   echo "Make sure you have gizzmo available on your path."
33 |   echo "Find it here: http://github.com/twitter/gizzmo"
34 |   exit 1
35 | fi
36 | 
37 | MYSQL_COMMAND=$(if [ "x$DB_PASSWORD" = "x" ]; then
38 |   echo "mysql -u$DB_USERNAME"
39 | else
40 |   echo "mysql -u$DB_USERNAME -p$DB_PASSWORD"
41 | fi)
42 | 
43 | function exec_sql {
44 |   echo $1 | $MYSQL_COMMAND
45 | }
46 | 
47 | echo "Killing any running flockdb..."
48 | curl http://localhost:9990/shutdown >/dev/null 2>/dev/null
49 | sleep 3
50 | 
51 | echo "Launching flockdb..."
52 | exec_sql "DROP DATABASE IF EXISTS flockdb_development"
53 | exec_sql "CREATE DATABASE IF NOT EXISTS flockdb_development"
54 | 
55 | JAVA_OPTS="-Xms256m -Xmx256m -XX:NewSize=64m -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -server"
56 | java $JAVA_OPTS -jar ./dist/flockdb/flockdb-${VERSION}.jar config/development.scala &
57 | sleep 10
58 | 
59 | echo "Creating shards..."
60 | i=1
61 | while [ $i -le 15 ]; do
62 |   /bin/echo -n "$i "
63 |   exec_sql "DROP TABLE IF EXISTS edges_development.forward_${i}_0000_edges"
64 |   exec_sql "DROP TABLE IF EXISTS edges_development.forward_${i}_0000_metadata"
65 |   exec_sql "DROP TABLE IF EXISTS edges_development.backward_${i}_0000_edges"
66 |   exec_sql "DROP TABLE IF EXISTS edges_development.backward_${i}_0000_metadata"
67 |   forward_shard=$($gizzmo create -s "INT UNSIGNED" -d "INT UNSIGNED" "com.twitter.flockdb.SqlShard" "localhost/forward_${i}_0000")
68 |   backward_shard=$($gizzmo create -s "INT UNSIGNED" -d "INT UNSIGNED" "com.twitter.flockdb.SqlShard" "localhost/backward_${i}_0000")
69 |   $gizzmo addforwarding -- $i 0 $forward_shard
70 |   $gizzmo addforwarding -- -$i 0 $backward_shard
71 |   i=$((i + 1))
72 | done
73 | echo
74 | $gizzmo -f reload
75 | echo "Done."
76 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/integration/IntersectionSpec.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package integration
19 | 
20 | import com.twitter.querulous.evaluator.QueryEvaluatorFactory
21 | 
22 | object IntersectionSpec extends IntegrationSpecification {
23 | 
24 |   val FOLLOWS = 1
25 | 
26 |   val alice = 1L
27 |   val bob = 2L
28 |   val carl = 3L
29 |   val darcy = 4L
30 |   var queryEvaluatorFactories: List[QueryEvaluatorFactory] = null
31 | 
32 | 
33 |   def intersectionOf(user1: Long, user2: Long, page: Page) = {
34 |     select(Select(user1, FOLLOWS, ()) intersect Select(user2, FOLLOWS, ()), page)
35 |   }
36 | 
37 |   def intersectAlot = {
38 |     "intersection_for" in {
39 |       "pagination" in {
40 |         reset(config)
41 |         execute(Select(alice, FOLLOWS, bob).add)
42 |         execute(Select(alice, FOLLOWS, carl).add)
43 |         execute(Select(alice, FOLLOWS, darcy).add)
44 |         execute(Select(carl, FOLLOWS, bob).add)
45 |         execute(Select(carl, FOLLOWS, darcy).add)
46 | 
47 |         flockService.contains(carl, FOLLOWS, darcy)() must eventually(beTrue)
48 | 
49 |         intersectionOf(alice, carl, new Page(1, Cursor.Start))  mustEqual ((List(darcy), Cursor(darcy), Cursor.End))
50 |         intersectionOf(alice, carl, new Page(1, Cursor(darcy))) mustEqual ((List(bob), Cursor.End, Cursor(-bob)))
51 |         intersectionOf(alice, carl, new Page(2, Cursor.Start))  mustEqual ((List(darcy, bob), Cursor.End, Cursor.End))
52 |       }
53 | 
54 |       "one list is empty" in {
55 |         reset(config)
56 |         for (i <- 1 until 11) execute(Select(alice, FOLLOWS, i).add)
57 |         count(Select(alice, FOLLOWS, ())) must eventually(be_==(10))
58 | 
59 |         intersectionOf(alice, carl, new Page(10, Cursor.Start)) mustEqual (Nil, Cursor.End, Cursor.End)
60 |       }
61 |     }
62 |   }
63 | 
64 |   "Intersection" should {
65 |     "with a large intersection" >>  {
66 |       config.intersectionQuery.intersectionPageSizeMax = 1
67 | 
68 |       intersectAlot
69 |     }
70 | 
71 |     "with a small intersection" >> {
72 |       config.intersectionQuery.intersectionPageSizeMax = Integer.MAX_VALUE - 1
73 | 
74 |       intersectAlot
75 |     }
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/queries/DifferenceQuery.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package queries
19 | 
20 | import com.twitter.util.{Duration, Future}
21 | 
22 | class DifferenceQuery(query1: QueryTree, query2: QueryTree, averageIntersectionProportion: Double,
23 |                       intersectionPageSizeMax: Int, intersectionTimeout: Duration)
24 |     extends ComplexQueryNode(query1, query2) {
25 |   def sizeEstimate = query1.sizeEstimate
26 | 
27 |   def selectPage(count: Int, cursor: Cursor) = selectPageByDestinationId(count, cursor)
28 | 
29 |   def selectPageByDestinationId(count: Int, cursor: Cursor) = time {
30 |     val guessedPageSize = (count + count * averageIntersectionProportion).toInt
31 |     val internalPageSize = guessedPageSize min intersectionPageSizeMax
32 |     val timeout = intersectionTimeout.inMillis
33 |     val startTime = System.currentTimeMillis
34 | 
35 |     def loop(currCursor: Cursor): Future[ResultWindow[Long]] = {
36 |       pageDifference(internalPageSize, count, currCursor) flatMap { resultWindow =>
37 |         if (resultWindow.page.size < count &&
38 |             resultWindow.continueCursor != Cursor.End &&
39 |             System.currentTimeMillis - startTime < timeout) {
40 |           loop(resultWindow.continueCursor) map { resultWindow ++ _ }
41 |         } else {
42 |           Future(resultWindow)
43 |         }
44 |       }
45 |     }
46 | 
47 |     loop(cursor)
48 |   }
49 | 
50 |   def selectWhereIn(page: Seq[Long]) = time {
51 |     for {
52 |       results <- query1.selectWhereIn(page)
53 |       rejects <- query2.selectWhereIn(results)
54 |     } yield {
55 |       val rejectsSet = rejects.toSet
56 |       results.filterNot { rejects.contains(_) }
57 |     }
58 |   }
59 | 
60 |   private def pageDifference(internalPageSize: Int, count: Int, cursor: Cursor) = {
61 |     for {
62 |       results <- query1.selectPageByDestinationId(internalPageSize, cursor)
63 |       rejects <- query2.selectWhereIn(results.view)
64 |     } yield results.diff(rejects, count)
65 |   }
66 | 
67 |   override def toString =
68 |     "<DifferenceQuery query1="+ query1.toString +" query2="+ query2.toString + duration.map(" time="+ _.inMillis).mkString +">"
69 | }
70 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/shards/Shard.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package shards
19 | 
20 | import com.twitter.util.{Future, Time}
21 | 
22 | trait Shard {
23 |   def get(sourceId: Long, destinationId: Long): Future[Option[Edge]]
24 |   def getMetadata(sourceId: Long): Future[Option[Metadata]]
25 |   def getMetadataForWrite(sourceId: Long): Future[Option[Metadata]]
26 | 
27 |   def count(sourceId: Long, states: Seq[State]): Future[Int]
28 | 
29 |   def selectAll(cursor: (Cursor, Cursor), count: Int): Future[(Seq[Edge], (Cursor, Cursor))]
30 |   def selectAllMetadata(cursor: Cursor, count: Int): Future[(Seq[Metadata], Cursor)]
31 |   def selectIncludingArchived(sourceId: Long, count: Int, cursor: Cursor): Future[ResultWindow[Long]]
32 |   def selectByDestinationId(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor): Future[ResultWindow[Long]]
33 |   def selectByPosition(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor): Future[ResultWindow[Long]]
34 |   def selectEdges(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor): Future[ResultWindow[Edge]]
35 | 
36 |   def writeCopies(edge: Seq[Edge]): Future[Unit]
37 |   def updateMetadata(metadata: Metadata): Future[Unit]
38 |   def writeMetadata(metadata: Metadata): Future[Unit]
39 |   def writeMetadatas(metadata: Seq[Metadata]): Future[Unit]
40 | 
41 |   def bulkUnsafeInsertEdges(edge: Seq[Edge]): Future[Unit]
42 |   def bulkUnsafeInsertMetadata(edge: Seq[Metadata]): Future[Unit]
43 | 
44 |   def archive(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time): Future[Unit]
45 |   def archive(sourceId: Long, updatedAt: Time): Future[Unit]
46 | 
47 |   def remove(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time): Future[Unit]
48 |   def remove(sourceId: Long, updatedAt: Time): Future[Unit]
49 | 
50 |   def add(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time): Future[Unit]
51 |   def add(sourceId: Long, updatedAt: Time): Future[Unit]
52 | 
53 |   def negate(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time): Future[Unit]
54 |   def negate(sourceId: Long, updatedAt: Time): Future[Unit]
55 | 
56 |   def intersect(sourceId: Long, states: Seq[State], destinationIds: Seq[Long]): Future[Seq[Long]]
57 |   def intersectEdges(sourceId: Long, states: Seq[State], destinationIds: Seq[Long]): Future[Seq[Edge]]
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/StatsCollectingQuery.scala:
--------------------------------------------------------------------------------
 1 | package com.twitter.flockdb
 2 | 
 3 | import com.twitter.gizzard.Stats
 4 | import com.twitter.querulous.database.{Database, DatabaseFactory, DatabaseProxy}
 5 | import com.twitter.querulous.query.{Query, QueryFactory, QueryClass, QueryProxy}
 6 | import com.twitter.util.{Time, Duration}
 7 | import java.sql.Connection
 8 | 
 9 | class TransactionStatsCollectingQueryFactory(queryFactory: QueryFactory)
10 |   extends QueryFactory {
11 | 
12 |   def apply(connection: Connection, queryClass: QueryClass, query: String, params: Any*) = {
13 |     new TransactionStatsCollectingQuery(queryFactory(connection, queryClass, query, params: _*), queryClass, query)
14 |   }
15 | }
16 | 
17 | class TransactionStatsCollectingQuery(query: Query, queryClass: QueryClass, queryString: String) extends QueryProxy(query) {
18 |   override def delegate[A](f: => A) = {
19 |     Stats.transaction.record("Executing "+queryClass.name+" query: "+queryString)
20 |     val start = Time.now
21 |     try {
22 |       val rv = f
23 |       val duration = Time.now - start
24 |       Stats.transaction.record("Query duration: "+duration.inMillis)
25 |       rv
26 |     } catch {
27 |       case e =>
28 |         Stats.transaction.record("Failure executing query: "+e)
29 |         val duration = Time.now - start
30 |         Stats.transaction.record("Query duration: "+duration.inMillis)
31 |         throw e
32 |     }
33 |   }
34 | }
35 | 
36 | class TransactionStatsCollectingDatabaseFactory(databaseFactory: DatabaseFactory) extends DatabaseFactory {
37 |   def apply(dbhosts: List[String], dbname: String, username: String, password: String, urlOptions: Map[String, String], driverName: String) = {
38 |     new TransactionStatsCollectingDatabase(databaseFactory(dbhosts, dbname, username, password, urlOptions, driverName), dbhosts)
39 |   }
40 | }
41 | 
42 | class TransactionStatsCollectingDatabase(val database: Database, dbhosts: List[String]) extends DatabaseProxy {
43 |   override def open(): Connection = {
44 |     Stats.transaction.record("Opening a connection to: "+dbhosts.mkString(","))
45 |     val start = Time.now
46 |     try {
47 |       val rv = database.open()
48 |       val duration = Time.now-start
49 |       Stats.transaction.record("Open duration: "+duration.inMillis)
50 |       rv
51 |     } catch {
52 |       case e =>
53 |         Stats.transaction.record("Failure opening a connection: "+e)
54 |         val duration = Time.now-start
55 |         Stats.transaction.record("Open duration: "+duration.inMillis)
56 |         throw e
57 |     }
58 |   }
59 | 
60 |   override def close(connection: Connection) = {
61 |     Stats.transaction.record("Closing connection to: "+dbhosts.mkString(","))
62 |     val start = Time.now
63 |     try {
64 |       val rv = database.close(connection)
65 |       val duration = Time.now - start
66 |       Stats.transaction.record("Close duration: "+duration.inMillis)
67 |       rv
68 |     } catch {
69 |       case e =>
70 |         Stats.transaction.record("Failure closing a connection: "+e)
71 |         val duration = Time.now-start
72 |         Stats.transaction.record("Close duration: "+duration.inMillis)
73 |         throw e
74 |     }
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/scripts/mkshards.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | #
 3 | # Copyright 2010 Twitter, Inc.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License"); you may
 6 | # not use this file except in compliance with the License. You may obtain
 7 | # a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | $:.push(File.dirname($0))
18 | require 'optparse'
19 | require 'yaml'
20 | 
21 | options = {
22 |   :config_filename => ENV['HOME'] + "/.shards.yml",
23 |   :count => 500,
24 | }
25 | 
26 | $stderr.puts "WARNING: This script is deprecated. Use 'gizzmo create-table' instead."
27 | 
28 | parser = OptionParser.new do |opts|
29 |   opts.banner = "Usage: #{$0} [options] <graph_id>"
30 |   opts.separator "Example: #{$0} -f shards.yml 11"
31 | 
32 |   opts.on("-f", "--config=FILENAME", "load shard database config (default: #{options[:config_filename]})") do |filename|
33 |     options[:config_filename] = filename
34 |   end
35 |   opts.on("-n", "--count=N", "create N bins (default: #{options[:count]})") do |count|
36 |     options[:count] = count.to_i
37 |   end
38 | end
39 | 
40 | parser.parse!(ARGV)
41 | 
42 | if ARGV.size < 1
43 |   puts
44 |   puts parser
45 |   puts
46 |   exit 1
47 | end
48 | 
49 | config = YAML.load_file(options[:config_filename]) rescue {}
50 | 
51 | app_host, app_port = (config['app_host'] || 'localhost').split(':')
52 | app_port ||= 7920
53 | 
54 | namespace = config['namespace'] || nil
55 | db_trees = Array(config['databases'] || 'localhost')
56 | graph_id = ARGV[0].to_i
57 | 
58 | gizzmo = lambda do |cmd|
59 |   `gizzmo --host=#{app_host} --port=#{app_port} #{cmd}`
60 | end
61 | 
62 | 
63 | print "Creating bins"
64 | STDOUT.flush
65 | options[:count].times do |i|
66 |   table_name = [ namespace, "edges_#{graph_id}_%04d" % i ].compact.join("_")
67 |   hosts = Array(db_trees[i % db_trees.size])
68 |   lower_bound = (1 << 60) / options[:count] * i
69 |   types = "-s 'INT UNSIGNED' -d 'INT UNSIGNED'"
70 | 
71 |   [ "forward", "backward" ].each do |direction|
72 |     gizzmo.call "create com.twitter.gizzard.shards.ReplicatingShard localhost/#{table_name}_#{direction}_replicating"
73 | 
74 |     distinct = 1
75 |     hosts.each do |host|
76 |       host, weight = host.split(':')
77 |       weight ||= 1
78 |       gizzmo.call "create #{types} com.twitter.flockdb.SqlShard #{host}/#{table_name}_#{direction}_#{distinct}"
79 |       gizzmo.call "addlink localhost/#{table_name}_#{direction}_replicating #{host}/#{table_name}_#{direction}_#{distinct} #{weight}"
80 |       distinct += 1
81 |     end
82 |   end
83 | 
84 |   gizzmo.call "addforwarding -- #{graph_id} #{lower_bound} localhost/#{table_name}_forward_replicating"
85 |   gizzmo.call "addforwarding -- -#{graph_id} #{lower_bound} localhost/#{table_name}_backward_replicating"
86 | 
87 |   print "."
88 |   print "#{i+1}" if (i + 1) % 100 == 0
89 |   STDOUT.flush
90 | end
91 | puts "Done."
92 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/integration/SelectCompilerSpec.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package integration
19 | 
20 | import com.twitter.util.Time
21 | import com.twitter.conversions.time._
22 | import org.specs.mock.{ClassMocker, JMocker}
23 | 
24 | object SelectCompilerSpec extends IntegrationSpecification with JMocker with ClassMocker {
25 |   "SelectCompiler integration" should {
26 |     val FOLLOWS = 1
27 | 
28 |     val alice = 1L
29 |     val bob = 2L
30 |     val carl = 3L
31 |     val darcy = 4L
32 | 
33 |     def setup1() {
34 |       execute(Select(alice, FOLLOWS, bob).add)
35 |       execute(Select(alice, FOLLOWS, carl).add)
36 |       execute(Select(alice, FOLLOWS, darcy).add)
37 |       execute(Select(carl, FOLLOWS, bob).add)
38 |       execute(Select(carl, FOLLOWS, darcy).add)
39 | 
40 |       flockService.contains(carl, FOLLOWS, darcy)() must eventually(beTrue)
41 |     }
42 | 
43 |     def setup2() {
44 |       for (i <- 1 until 11) execute(Select(alice, FOLLOWS, i).add)
45 |       for (i <- 1 until 7) execute(Select(bob, FOLLOWS, i * 2).add)
46 | 
47 |       count(Select(alice, FOLLOWS, ())) must eventually(be_==(10))
48 |       count(Select(bob, FOLLOWS, ())) must eventually(be_==(6))
49 |     }
50 | 
51 |     "pagination" in {
52 |       reset(config)
53 |       setup1()
54 | 
55 |       val program = Select(alice, FOLLOWS, ()) intersect Select(carl, FOLLOWS, ())
56 | 
57 |       select(program, Page(1, Cursor.Start)) mustEqual ((List(darcy), Cursor(darcy), Cursor.End))
58 |       select(program, new Page(1, Cursor(darcy))) mustEqual ((List(bob), Cursor.End, Cursor(-bob)))
59 |       select(program, Page(2, Cursor.Start)) mustEqual ((List(darcy, bob), Cursor.End, Cursor.End))
60 |     }
61 | 
62 |     "one list is empty" in {
63 |       reset(config)
64 |       setup2()
65 | 
66 |       val program = Select(alice, FOLLOWS, ()) intersect Select(carl, FOLLOWS, ())
67 | 
68 |       select(program, new Page(10, Cursor.Start)) mustEqual ((List(), Cursor.End, Cursor.End))
69 |     }
70 | 
71 |     "difference" in {
72 |       reset(config)
73 |       setup2()
74 | 
75 |       val program = Select(alice, FOLLOWS, ()) difference Select(bob, FOLLOWS, ())
76 | 
77 |       select(program, new Page(10, Cursor.Start)) mustEqual ((List(9,7,5,3,1), Cursor.End, Cursor.End))
78 |       select(program, new Page(2, Cursor.Start))  mustEqual ((List(9,7), Cursor(7), Cursor.End))
79 |       select(program, new Page(2, Cursor(7)))     mustEqual ((List(5,3), Cursor(3), Cursor(-5)))
80 |       select(program, new Page(2, Cursor(3)))     mustEqual ((List(1), Cursor.End, Cursor(-1)))
81 |     }
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/SimpleQuerySpec.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package unit
19 | 
20 | import com.twitter.util.Future
21 | import org.specs.mock.JMocker
22 | import shards.Shard
23 | 
24 | object SimpleQuerySpec extends ConfiguredSpecification with JMocker {
25 |   "SimpleQuery" should {
26 |     var shard: Shard = null
27 |     var simpleQuery: queries.SimpleQuery = null
28 |     val sourceId = 900
29 | 
30 |     doBefore {
31 |       shard = mock[Shard]
32 |     }
33 | 
34 |     "sizeEstimate" in {
35 |       "when the state is normal" >> {
36 |         expect {
37 |           one(shard).count(sourceId, List(State.Normal)) willReturn Future(10)
38 |         }
39 |         simpleQuery = new queries.SimpleQuery(shard, sourceId, List(State.Normal))
40 |         simpleQuery.sizeEstimate()() mustEqual 10
41 |       }
42 | 
43 |       "when the state is abnormal" >> {
44 |         expect {
45 |           one(shard).count(sourceId, List(State.Removed)) willReturn Future(10)
46 |         }
47 |         simpleQuery = new queries.SimpleQuery(shard, sourceId, List(State.Removed))
48 |         simpleQuery.sizeEstimate()() mustEqual 10
49 |       }
50 |     }
51 | 
52 |     "selectWhereIn" in {
53 |       val page = List(1L, 2L, 3L, 4L)
54 |       expect {
55 |         one(shard).intersect(sourceId, List(State.Normal), page) willReturn Future(List(1L, 2L))
56 |       }
57 |       simpleQuery = new queries.SimpleQuery(shard, sourceId, List(State.Normal))
58 |       simpleQuery.selectWhereIn(page)().toList mustEqual List(1L, 2L)
59 |     }
60 | 
61 |     "selectPage" in {
62 |       var edges = List[Long](101L, 103L, 104L, 107L, 108L)
63 |       val cursor = Cursor(102L)
64 |       val count = 5
65 |       expect {
66 |         allowing(shard).selectByPosition(sourceId, List(State.Normal), count, cursor) willReturn Future(new ResultWindow(Cursor.cursorZip(edges), Cursor.End, Cursor.End, count, cursor))
67 |       }
68 |       simpleQuery = new queries.SimpleQuery(shard, sourceId, List(State.Normal))
69 |       simpleQuery.selectPage(count, cursor)().toTuple mustEqual (edges, Cursor.End, Cursor.End)
70 |     }
71 | 
72 |     "selectPageByDestinationId" in {
73 |       val edges = List[Long](101L, 103L, 104L, 107L, 108L)
74 |       val cursor = Cursor(102L)
75 |       val count = 5
76 |       expect {
77 |         allowing(shard).selectByDestinationId(sourceId, List(State.Normal), count, cursor) willReturn Future(new ResultWindow(Cursor.cursorZip(edges), Cursor.End, Cursor.End, count, cursor))
78 |       }
79 |       simpleQuery = new queries.SimpleQuery(shard, sourceId, List(State.Normal))
80 |       simpleQuery.selectPageByDestinationId(count, cursor)().toTuple mustEqual (edges, Cursor.End, Cursor.End)
81 |     }
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/queries/IntersectionQuery.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package queries
19 | 
20 | import com.twitter.util.{Duration, Future}
21 | import com.twitter.gizzard.Stats
22 | 
23 | class IntersectionQuery(query1: QueryTree, query2: QueryTree, averageIntersectionProportion: Double, intersectionPageSizeMax: Int, intersectionTimeout: Duration) extends ComplexQueryNode(query1, query2) {
24 |   def sizeEstimate() = {
25 |     getSizeEstimates() map { case (count1, count2) =>
26 |       ((count1 min count2) * averageIntersectionProportion).toInt
27 |     }
28 |   }
29 | 
30 |   def selectPage(count: Int, cursor: Cursor) = selectPageByDestinationId(count, cursor)
31 | 
32 |   def selectPageByDestinationId(count: Int, cursor: Cursor) = time {
33 |     getSizeEstimates() flatMap { case (count1, count2) =>
34 |       if (count1 == 0 || count2 == 0) {
35 |         Future(new ResultWindow(List[(Long,Cursor)](), count, cursor))
36 |       } else {
37 |         val guessedPageSize = (count / averageIntersectionProportion).toInt
38 |         val internalPageSize = guessedPageSize min intersectionPageSizeMax.toInt
39 |         val timeout = intersectionTimeout.inMillis
40 |         val startTime = System.currentTimeMillis
41 | 
42 |         def loop(smaller: Query, larger: Query, currCursor: Cursor): Future[ResultWindow[Long]] = {
43 |           pageIntersection(smaller, larger, internalPageSize, count, currCursor) flatMap { resultWindow =>
44 |             if (resultWindow.page.size < count &&
45 |                 resultWindow.continueCursor != Cursor.End &&
46 |                 System.currentTimeMillis - startTime < timeout) {
47 |               loop(smaller, larger, resultWindow.continueCursor) map { resultWindow ++ _ }
48 |             } else {
49 |               Future(resultWindow)
50 |             }
51 |           }
52 |         }
53 | 
54 |         orderQueries() flatMap { case (smaller, larger) => loop(smaller, larger, cursor) }
55 |       }
56 |     }
57 |   }
58 | 
59 |   def selectWhereIn(page: Seq[Long]) = time {
60 |     orderQueries() flatMap { case (smaller, larger) =>
61 |       smaller.selectWhereIn(page) flatMap { larger.selectWhereIn(_) }
62 |     }
63 |   }
64 | 
65 |   private def pageIntersection(smallerQuery: Query, largerQuery: Query, internalPageSize: Int, count: Int, cursor: Cursor) = {
66 |     for {
67 |       results <- smallerQuery.selectPageByDestinationId(internalPageSize, cursor)
68 |       whereIn <- largerQuery.selectWhereIn(results.view)
69 |     } yield {
70 |       new ResultWindow(Cursor.cursorZip(whereIn), results.nextCursor, results.prevCursor, count, cursor)
71 |     }
72 |   }
73 | 
74 |   override def toString =
75 |     "<IntersectionQuery query1="+query1.toString+" query2="+query2.toString+duration.map(" time="+_.inMillis).mkString+">"
76 | }
77 | 


--------------------------------------------------------------------------------
/src/scripts/start.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | #
  3 | # flockdb init.d script.
  4 | #
  5 | 
  6 | APP_NAME="flock"
  7 | ADMIN_PORT="9990"
  8 | VERSION="@VERSION@"
  9 | APP_HOME="/usr/local/$APP_NAME/current"
 10 | DAEMON="/usr/local/bin/daemon"
 11 | 
 12 | JAR_NAME="flockdb-$VERSION.jar"
 13 | STAGE="production"
 14 | 
 15 | HEAP_OPTS="-Xmx4096m -Xms4096m -XX:NewSize=768m"
 16 | GC_OPTS="-verbosegc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UseConcMarkSweepGC -XX:+UseParNewGC"
 17 | DEBUG_OPTS="-XX:ErrorFile=/var/log/$APP_NAME/java_error%p.log"
 18 | JAVA_OPTS="-server -Dstage=$STAGE $GC_OPTS $HEAP_OPTS $DEBUG_OPTS"
 19 | 
 20 | pidfile="/var/run/$APP_NAME/$APP_NAME.pid"
 21 | daemon_pidfile="/var/run/$APP_NAME/$APP_NAME-daemon.pid"
 22 | daemon_args="--name $APP_NAME --pidfile $daemon_pidfile --core --chdir /"
 23 | daemon_start_args="--stdout=/var/log/$APP_NAME/stdout --stderr=/var/log/$APP_NAME/error"
 24 |  
 25 | 
 26 | function running() {
 27 |   $DAEMON $daemon_args --running
 28 | }
 29 | 
 30 | function find_java() {
 31 |   if [ ! -z "$JAVA_HOME" ]; then
 32 |     return
 33 |   fi
 34 |   for dir in /opt/jdk /System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK/Home /usr/java/default; do
 35 |     if [ -x $dir/bin/java ]; then
 36 |       JAVA_HOME=$dir
 37 |       break
 38 |     fi
 39 |   done
 40 | }
 41 | 
 42 | find_java
 43 | 
 44 | 
 45 | case "$1" in
 46 |   start)
 47 |     echo -n "Starting $APP_NAME... "
 48 | 
 49 |     if [ ! -r $APP_HOME/$JAR_NAME ]; then
 50 |       echo "FAIL"
 51 |       echo "*** $APP_NAME jar missing: $APP_HOME/$JAR_NAME - not starting"
 52 |       exit 1
 53 |     fi
 54 |     if [ ! -x $JAVA_HOME/bin/java ]; then
 55 |       echo "FAIL"
 56 |       echo "*** $JAVA_HOME/bin/java doesn't exist -- check JAVA_HOME?"
 57 |       exit 1
 58 |     fi
 59 |     if running; then
 60 |       echo "already running."
 61 |       exit 0
 62 |     fi
 63 | 
 64 |     ulimit -n 32768 || echo -n " (no ulimit)"
 65 |     ulimit -c unlimited || echo -n " (no coredump)"
 66 |     $DAEMON $daemon_args $daemon_start_args -- sh -c "echo "'$$'" > $pidfile; exec ${JAVA_HOME}/bin/java ${JAVA_OPTS} -jar ${APP_HOME}/${JAR_NAME}"
 67 |     tries=0
 68 |     while ! running; do
 69 |       tries=$((tries + 1))
 70 |       if [ $tries -ge 5 ]; then
 71 |         echo "FAIL"
 72 |         exit 1
 73 |       fi
 74 |       sleep 1
 75 |     done
 76 |     echo "done."
 77 |   ;;
 78 | 
 79 |   stop)
 80 |     echo -n "Stopping $APP_NAME... "
 81 |     if ! running; then
 82 |       echo "wasn't running."
 83 |       exit 0
 84 |     fi
 85 | 
 86 |     curl -s http://localhost:${ADMIN_PORT}/shutdown.txt > /dev/null
 87 |     tries=0
 88 |     while running; do
 89 |       tries=$((tries + 1))
 90 |       if [ $tries -ge 15 ]; then
 91 |         echo "FAILED SOFT SHUTDOWN, TRYING HARDER"
 92 |         if [ -f $pidfile ]; then
 93 |           kill $(cat $pidfile)
 94 |         else
 95 |           echo "CAN'T FIND PID, TRY KILL MANUALLY"
 96 |           exit 1
 97 |         fi
 98 |         hardtries=0
 99 |         while running; do
100 |           hardtries=$((hardtries + 1))
101 |           if [ $hardtries -ge 5 ]; then
102 |             echo "FAILED HARD SHUTDOWN, TRY KILL -9 MANUALLY"
103 |             exit 1
104 |           fi
105 |           sleep 1
106 |         done
107 |       fi
108 |       sleep 1
109 |     done
110 |     echo "done."
111 |   ;;
112 | 
113 |   status)
114 |     if running; then
115 |       echo "$APP_NAME is running."
116 |     else
117 |       echo "$APP_NAME is NOT running."
118 |     fi
119 |   ;;
120 | 
121 |   restart)
122 |     $0 stop
123 |     sleep 2
124 |     $0 start
125 |   ;;
126 | 
127 |   *)
128 |     echo "Usage: /etc/init.d/${APP_NAME}.sh {start|stop|restart|status}"
129 |     exit 1
130 |   ;;
131 | esac
132 | 
133 | exit 0
134 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/queries/ExecuteCompiler.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | package queries
 19 | 
 20 | import scala.collection.mutable
 21 | import com.twitter.gizzard.Stats
 22 | import com.twitter.gizzard.scheduler.{JsonJob, JsonNestedJob, PrioritizingJobScheduler}
 23 | import com.twitter.gizzard.shards.ShardException
 24 | import com.twitter.gizzard.thrift.conversions.Sequences._
 25 | import com.twitter.util.Time
 26 | import com.twitter.util.TimeConversions._
 27 | import jobs.single.Single
 28 | import jobs.multi.Multi
 29 | import operations.{ExecuteOperations, ExecuteOperationType}
 30 | 
 31 | 
 32 | class ExecuteCompiler(scheduler: PrioritizingJobScheduler, forwardingManager: ForwardingManager, aggregateJobPageSize: Int) {
 33 |   @throws(classOf[ShardException])
 34 |   def apply(program: ExecuteOperations) {
 35 |     val now = Time.now
 36 |     val operations = program.operations
 37 |     val results = new mutable.ArrayBuffer[JsonJob]
 38 |     if (operations.size == 0) throw new InvalidQueryException("You must have at least one operation")
 39 | 
 40 |     for (op <- operations) {
 41 |       val term = op.term
 42 |       val time = program.executeAt.map(Time.fromSeconds).getOrElse(Time.now)
 43 |       val position = op.position.getOrElse(Time.now.inMillis)
 44 | 
 45 |       // force an exception for nonexistent graphs
 46 |       forwardingManager.find(0, term.graphId, Direction.Forward)
 47 | 
 48 |       val state = op.operationType match {
 49 |         case ExecuteOperationType.Add => State.Normal
 50 |         case ExecuteOperationType.Remove => State.Removed
 51 |         case ExecuteOperationType.Archive => State.Archived
 52 |         case ExecuteOperationType.Negate => State.Negative
 53 |         case n => throw new InvalidQueryException("Unknown operation " + n)
 54 |       }
 55 | 
 56 |       results ++= processDestinations(term) { (sourceId, destinationId) =>
 57 |         new Single(
 58 |           sourceId,
 59 |           term.graphId,
 60 |           destinationId,
 61 |           position,
 62 |           state,
 63 |           time,
 64 |           null,
 65 |           null
 66 |         )
 67 |       } {
 68 |         new Multi(
 69 |           term.sourceId,
 70 |           term.graphId,
 71 |           Direction(term.isForward),
 72 |           state,
 73 |           time,
 74 |           program.priority,
 75 |           aggregateJobPageSize,
 76 |           null,
 77 |           null
 78 |         )
 79 |       }
 80 |     }
 81 | 
 82 |     Stats.transaction.set("job", results.map { _.toJson }.mkString(", "))
 83 |     scheduler.put(program.priority.id, new JsonNestedJob(results))
 84 |   }
 85 | 
 86 |   private def processDestinations(term: QueryTerm)(handleItemInCollection: (Long, Long) => JsonJob)(noDestinations: JsonJob) = {
 87 |     if (term.destinationIds.isDefined) {
 88 |       for (d <- term.destinationIds.get) yield {
 89 |         val (sourceId, destinationId) = if (term.isForward) {
 90 |           (term.sourceId, d)
 91 |         } else {
 92 |           (d, term.sourceId)
 93 |         }
 94 |         handleItemInCollection(sourceId, destinationId)
 95 |       }
 96 |     } else {
 97 |       List(noDestinations)
 98 |     }
 99 |   }
100 | }
101 | 


--------------------------------------------------------------------------------
/config/development.scala:
--------------------------------------------------------------------------------
  1 | import scala.collection.JavaConversions._
  2 | import com.twitter.flockdb.config._
  3 | import com.twitter.gizzard.config._
  4 | import com.twitter.querulous.config._
  5 | import com.twitter.querulous.StatsCollector
  6 | import com.twitter.conversions.time._
  7 | import com.twitter.conversions.storage._
  8 | import com.twitter.flockdb.shards.QueryClass
  9 | import com.twitter.flockdb.Priority
 10 | import com.twitter.ostrich.admin.config.AdminServiceConfig
 11 | import com.twitter.logging.Level
 12 | import com.twitter.logging.config._
 13 | 
 14 | trait Credentials extends Connection {
 15 |   val env = System.getenv().toMap
 16 |   val username = env.get("DB_USERNAME").getOrElse("root")
 17 |   val password = env.get("DB_PASSWORD").getOrElse("")
 18 | }
 19 | 
 20 | class ProductionQueryEvaluator extends AsyncQueryEvaluator {
 21 |   override var workPoolSize = 40
 22 |   database.memoize = true
 23 |   database.pool = new ThrottledPoolingDatabase {
 24 |     size = workPoolSize
 25 |     openTimeout = 100.millis
 26 |   }
 27 | 
 28 |   query.timeouts = Map(
 29 |     QueryClass.Select                  -> QueryTimeout(1.second),
 30 |     QueryClass.Execute                 -> QueryTimeout(1.second),
 31 |     QueryClass.SelectCopy              -> QueryTimeout(15.seconds),
 32 |     QueryClass.SelectModify            -> QueryTimeout(3.seconds),
 33 |     QueryClass.SelectSingle            -> QueryTimeout(1.second),
 34 |     QueryClass.SelectIntersection      -> QueryTimeout(1.second),
 35 |     QueryClass.SelectIntersectionSmall -> QueryTimeout(1.second),
 36 |     QueryClass.SelectMetadata          -> QueryTimeout(1.second)
 37 |   )
 38 | }
 39 | 
 40 | class ProductionNameServerReplica(host: String) extends Mysql {
 41 |   val connection = new Connection with Credentials {
 42 |     val hostnames = Seq(host)
 43 |     val database = "flockdb_development"
 44 |   }
 45 | 
 46 |   queryEvaluator = new QueryEvaluator {
 47 |     database.memoize = true
 48 |     database.pool = new ThrottledPoolingDatabase {
 49 |       size = 1
 50 |       openTimeout = 1.second
 51 |     }
 52 |   }
 53 | }
 54 | 
 55 | new FlockDB {
 56 |   aggregateJobsPageSize = 500
 57 | 
 58 |   mappingFunction                   = ByteSwapper
 59 |   jobRelay                          = NoJobRelay
 60 |   nameServerReplicas                = Seq(new ProductionNameServerReplica("localhost"))
 61 |   jobInjector.timeout               = 100.millis
 62 |   jobInjector.idleTimeout           = 60.seconds
 63 |   jobInjector.threadPool.minThreads = 30
 64 | 
 65 |   val databaseConnection = new Credentials {
 66 |     val hostnames = Seq("localhost")
 67 |     val database = "edges_development"
 68 |     urlOptions = Map("rewriteBatchedStatements" -> "true")
 69 |   }
 70 | 
 71 |   val edgesQueryEvaluator = new ProductionQueryEvaluator
 72 | 
 73 |   val lowLatencyQueryEvaluator = edgesQueryEvaluator
 74 | 
 75 |   val materializingQueryEvaluator = new ProductionQueryEvaluator {
 76 |     workPoolSize = 1
 77 |     database.pool = new ThrottledPoolingDatabase {
 78 |       size = workPoolSize
 79 |       openTimeout = 1.second
 80 |     }
 81 |   }
 82 | 
 83 |   class DevelopmentScheduler(val name: String) extends Scheduler {
 84 |     override val jobQueueName = name + "_jobs"
 85 |     val schedulerType = new KestrelScheduler {
 86 |       val queuePath = "."
 87 |     }
 88 | 
 89 |     errorLimit = 100
 90 |     errorRetryDelay = 15.minutes
 91 |     errorStrobeInterval = 1.second
 92 |     perFlushItemLimit = 100
 93 |     jitterRate = 0
 94 |   }
 95 | 
 96 |   val jobQueues = Map(
 97 |     Priority.High.id    -> new DevelopmentScheduler("edges") { threads = 32 },
 98 |     Priority.Medium.id  -> new DevelopmentScheduler("copy") { threads = 12; errorRetryDelay = 60.seconds },
 99 |     Priority.Low.id     -> new DevelopmentScheduler("edges_slow") { threads = 2 }
100 |   )
101 | 
102 |   val adminConfig = new AdminServiceConfig {
103 |     httpPort = Some(9990)
104 |   }
105 | 
106 |   loggers = List(new LoggerConfig {
107 |     level = Some(Level.INFO)
108 |     handlers = List(
109 |       new ThrottledHandlerConfig {
110 |         duration = 60.seconds
111 |         maxToDisplay = 10
112 |         handler = new FileHandlerConfig {
113 |           filename = "development.log"
114 |           roll = Policy.Hourly
115 |         }
116 |       }
117 |     )
118 |   })
119 | }
120 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/EdgesSpec.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | package unit
 19 | 
 20 | import com.twitter.gizzard.scheduler._
 21 | import com.twitter.gizzard.shards.ShardInfo
 22 | import com.twitter.util.{Future, Time}
 23 | import com.twitter.conversions.time._
 24 | import org.specs.mock.{ClassMocker, JMocker}
 25 | import jobs.single.Single
 26 | import shards.Shard
 27 | import State._
 28 | import com.twitter.flockdb.operations._
 29 | 
 30 | 
 31 | object EdgesSpec extends ConfiguredSpecification with JMocker with ClassMocker {
 32 |   "Edges" should {
 33 |     val FOLLOWS = 1
 34 | 
 35 |     val bob = 1L
 36 |     val mary = 2L
 37 | 
 38 |     val nestedJob = capturingParam[JsonNestedJob]
 39 |     val uuidGenerator = mock[UuidGenerator]
 40 |     val forwardingManager = mock[ForwardingManager]
 41 |     val shard = mock[Shard]
 42 |     val scheduler = mock[PrioritizingJobScheduler]
 43 |     val flock = new EdgesService(forwardingManager, scheduler, config.intersectionQuery, config.aggregateJobsPageSize)
 44 | 
 45 |     def toExecuteOperations(e: Execute) = ExecuteOperations(e.toOperations, None, Priority.High)
 46 | 
 47 |     "add" in {
 48 |       Time.withCurrentTimeFrozen { time =>
 49 |         val job = new Single(bob, FOLLOWS, mary, Time.now.inMillis, State.Normal, Time.now, null, null)
 50 |         expect {
 51 |           one(forwardingManager).find(0, FOLLOWS, Direction.Forward)
 52 |           one(scheduler).put(will(beEqual(Priority.High.id)), nestedJob.capture)
 53 |         }
 54 |         flock.execute(toExecuteOperations(Select(bob, FOLLOWS, mary).add))()
 55 |         jsonMatching(List(job), nestedJob.captured.jobs)
 56 |       }
 57 |     }
 58 | 
 59 |     "add_at" in {
 60 |       Time.withCurrentTimeFrozen { time =>
 61 |         val job = new Single(bob, FOLLOWS, mary, Time.now.inMillis, State.Normal, Time.now, null, null)
 62 |         expect {
 63 |           one(forwardingManager).find(0, FOLLOWS, Direction.Forward)
 64 |           one(scheduler).put(will(beEqual(Priority.High.id)), nestedJob.capture)
 65 |         }
 66 |         flock.execute(toExecuteOperations(Select(bob, FOLLOWS, mary).addAt(Time.now)))()
 67 |         jsonMatching(List(job), nestedJob.captured.jobs)
 68 |       }
 69 |     }
 70 | 
 71 |     "remove" in {
 72 |       Time.withCurrentTimeFrozen { time =>
 73 |         val job = new Single(bob, FOLLOWS, mary, Time.now.inMillis, State.Removed, Time.now, null, null)
 74 |         expect {
 75 |           one(forwardingManager).find(0, FOLLOWS, Direction.Forward)
 76 |           one(scheduler).put(will(beEqual(Priority.High.id)), nestedJob.capture)
 77 |         }
 78 |         flock.execute(toExecuteOperations(Select(bob, FOLLOWS, mary).remove))()
 79 |         jsonMatching(List(job), nestedJob.captured.jobs)
 80 |       }
 81 |     }
 82 | 
 83 |     "remove_at" in {
 84 |       Time.withCurrentTimeFrozen { time =>
 85 |         val job = new Single(bob, FOLLOWS, mary, Time.now.inMillis, State.Removed, Time.now, null, null)
 86 |         expect {
 87 |           one(forwardingManager).find(0, FOLLOWS, Direction.Forward)
 88 |           one(scheduler).put(will(beEqual(Priority.High.id)), nestedJob.capture)
 89 |         }
 90 |         flock.execute(toExecuteOperations(Select(bob, FOLLOWS, mary).removeAt(Time.now)))()
 91 |         jsonMatching(List(job), nestedJob.captured.jobs)
 92 |       }
 93 |     }
 94 | 
 95 |     "contains" in {
 96 |       Time.withCurrentTimeFrozen { time =>
 97 |         expect {
 98 |           one(forwardingManager).find(bob, FOLLOWS, Direction.Forward) willReturn shard
 99 |           one(shard).get(bob, mary) willReturn Future(Some(new Edge(bob, mary, 0, Time.now, 0, State.Normal)))
100 |         }
101 |         flock.contains(bob, FOLLOWS, mary)() must beTrue
102 |       }
103 |     }
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/queries/SelectCompiler.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | package queries
 19 | 
 20 | import scala.collection.mutable
 21 | import com.twitter.gizzard.Stats
 22 | import operations.{SelectOperation, SelectOperationType}
 23 | import thrift.FlockException
 24 | 
 25 | 
 26 | class InvalidQueryException(reason: String) extends FlockException(reason)
 27 | 
 28 | class SelectCompiler(forwardingManager: ForwardingManager, intersectionConfig: config.IntersectionQuery) {
 29 | 
 30 |   private def validateProgram(acc: Int, op: SelectOperation) = op.operationType match {
 31 |     case SelectOperationType.SimpleQuery  => acc + 1
 32 |     case SelectOperationType.Intersection =>
 33 |       if (acc < 2) throw new InvalidQueryException("Need two sub-queries to do an intersection")
 34 |       acc - 1
 35 |     case SelectOperationType.Union        =>
 36 |       if (acc < 2) throw new InvalidQueryException("Need two sub-queries to do a union")
 37 |       acc - 1
 38 |     case SelectOperationType.Difference   =>
 39 |       if (acc < 2) throw new InvalidQueryException("Need two sub-queries to do a difference")
 40 |       acc - 1
 41 |     case n =>      throw new InvalidQueryException("Unknown operation " + n)
 42 |   }
 43 | 
 44 |   def apply(program: Seq[SelectOperation]): Query = {
 45 | 
 46 |     // program is a list representation of a compound query in reverse polish (postfix) notation
 47 |     // with one literal (SimpleQuery) and three binary operators (Intersection, Union, Difference)
 48 |     // left fold over list to ensure that a valid parsing exists
 49 |     val items = program.foldLeft(0)(validateProgram)
 50 |     if (items != 1) throw new InvalidQueryException("Left " + items + " items on the stack instaed of 1")
 51 | 
 52 |     var stack = new mutable.Stack[QueryTree]
 53 |     val graphIds = new mutable.HashSet[String]
 54 |     for (op <- program) op.operationType match {
 55 |       case SelectOperationType.SimpleQuery =>
 56 |         val term = op.term.get
 57 | 
 58 |         // denote n for a backwards query
 59 |         graphIds += (if (term.isForward) "" else "n") + term.graphId
 60 | 
 61 |         val shard = forwardingManager.find(term.sourceId, term.graphId, Direction(term.isForward))
 62 |         val states = if (term.states.isEmpty) List(State.Normal) else term.states
 63 |         val query = if (term.destinationIds.isDefined) {
 64 |           new WhereInQuery(shard, term.sourceId, states, term.destinationIds.get)
 65 |         } else {
 66 |           new SimpleQuery(shard, term.sourceId, states)
 67 |         }
 68 |         stack.push(query)
 69 |       case SelectOperationType.Intersection =>
 70 |         stack.push(intersectionConfig.intersect(stack.pop, stack.pop))
 71 |       case SelectOperationType.Union =>
 72 |         stack.push(new UnionQuery(stack.pop, stack.pop))
 73 |       case SelectOperationType.Difference =>
 74 |         val rightSide = stack.pop
 75 |         val leftSide = stack.pop
 76 |         stack.push(intersectionConfig.difference(leftSide, rightSide))
 77 |     }
 78 |     val rv = stack.pop
 79 | 
 80 |     // complexity == 0 indicates only a single literal (no binary operators) -- program is length 1
 81 |     val complexity = rv.getComplexity()
 82 |     val name = if (complexity > 0) {
 83 |       "select-complex-"+complexity
 84 |     } else {
 85 |       "select" + (rv match {
 86 |         case query: WhereInQuery => if (program.head.term.get.destinationIds.get.size == 1) "-single" else "-simple"
 87 |         case query: SimpleQuery  => if (program.head.term.get.states.size > 1) "-multistate" else ""
 88 |       })
 89 |     }
 90 | 
 91 |     // collect stats per graph
 92 |     for (graphId <- graphIds) {
 93 |       Stats.incr(name + "-graph_" + graphId + "-count");
 94 |     }
 95 | 
 96 |     Stats.transaction.record("Query Plan: "+rv.toString)
 97 |     Stats.transaction.name = name
 98 |     rv
 99 |   }
100 | }
101 | 


--------------------------------------------------------------------------------
/config/test.scala:
--------------------------------------------------------------------------------
  1 | import com.twitter.flockdb.config._
  2 | import com.twitter.gizzard.config._
  3 | import com.twitter.gizzard.TransactionalStatsProvider
  4 | import com.twitter.querulous.config._
  5 | import com.twitter.querulous.database.DatabaseFactory
  6 | import com.twitter.querulous.query.QueryFactory
  7 | import com.twitter.querulous.StatsCollector
  8 | import com.twitter.conversions.time._
  9 | import com.twitter.conversions.storage._
 10 | import com.twitter.flockdb.shards.QueryClass
 11 | import com.twitter.flockdb.{MemoizedQueryEvaluators, Priority}
 12 | import com.twitter.ostrich.admin.config.AdminServiceConfig
 13 | import com.twitter.logging.{Level, Logger}
 14 | import com.twitter.logging.config.{FileHandlerConfig, LoggerConfig}
 15 | 
 16 | 
 17 | trait Credentials extends Connection {
 18 |   import scala.collection.JavaConversions._
 19 |   val env = System.getenv().toMap
 20 |   val username = env.get("DB_USERNAME").getOrElse("root")
 21 |   val password = env.get("DB_PASSWORD").getOrElse("")
 22 |   urlOptions = Map("connectTimeout" -> "0")
 23 | }
 24 | 
 25 | class TestQueryEvaluator(label: String) extends AsyncQueryEvaluator {
 26 |   query.debug = { s => Logger.get("query").debug(s) }
 27 |   override var workPoolSize = 2
 28 |   singletonFactory = true
 29 |   database.memoize = true
 30 |   database.pool = new ThrottledPoolingDatabase {
 31 |     size = workPoolSize
 32 |     openTimeout = 5.seconds
 33 |   }
 34 | 
 35 |   query.timeouts = Map(
 36 |     QueryClass.Select       -> QueryTimeout(5.seconds),
 37 |     QueryClass.SelectModify -> QueryTimeout(5.seconds),
 38 |     QueryClass.SelectCopy   -> QueryTimeout(15.seconds),
 39 |     QueryClass.Execute      -> QueryTimeout(5.seconds),
 40 |     QueryClass.SelectSingle -> QueryTimeout(5.seconds),
 41 |     QueryClass.SelectIntersection         -> QueryTimeout(5.seconds),
 42 |     QueryClass.SelectIntersectionSmall    -> QueryTimeout(5.seconds),
 43 |     QueryClass.SelectMetadata             -> QueryTimeout(5.seconds)
 44 |   )
 45 | }
 46 | 
 47 | class NameserverQueryEvaluator extends QueryEvaluator {
 48 |   singletonFactory = true
 49 |   database.memoize = true
 50 |   database.pool = new ThrottledPoolingDatabase {
 51 |     size = 1
 52 |     openTimeout = 5.seconds
 53 |   }
 54 | }
 55 | 
 56 | new FlockDB {
 57 |   mappingFunction = Identity
 58 |   jobRelay        = NoJobRelay
 59 | 
 60 |   nameServerReplicas = Seq(new Mysql {
 61 |     queryEvaluator  = new NameserverQueryEvaluator
 62 | 
 63 |     val connection = new Connection with Credentials {
 64 |       val hostnames = Seq("localhost")
 65 |       val database = "flock_edges_test"
 66 |     }
 67 |   })
 68 | 
 69 |   jobInjector.timeout               = 100.milliseconds
 70 |   jobInjector.idleTimeout           = 60.seconds
 71 |   jobInjector.threadPool.minThreads = 30
 72 | 
 73 |   // Database Connectivity
 74 | 
 75 |   val databaseConnection = new Credentials {
 76 |     val hostnames = Seq("localhost")
 77 |     val database = "edges_test"
 78 |   }
 79 | 
 80 |   val edgesQueryEvaluator = new TestQueryEvaluator("edges")
 81 |   val lowLatencyQueryEvaluator = edgesQueryEvaluator
 82 |   val materializingQueryEvaluator = edgesQueryEvaluator
 83 | 
 84 |   // schedulers
 85 | 
 86 |   class TestScheduler(val name: String) extends Scheduler {
 87 |     jobQueueName = name + "_jobs"
 88 | 
 89 |     val schedulerType = new KestrelScheduler {
 90 |       path = "/tmp"
 91 |       keepJournal = false
 92 |       maxMemorySize = 36.megabytes
 93 |     }
 94 | 
 95 |     threads = 2
 96 |     errorLimit = 25
 97 |     errorRetryDelay = 900.seconds
 98 |     errorStrobeInterval = 30.seconds
 99 |     perFlushItemLimit = 1000
100 |     jitterRate = 0.0f
101 |   }
102 | 
103 |   val jobQueues = Map(
104 |     Priority.High.id   -> new TestScheduler("edges"),
105 |     Priority.Medium.id -> new TestScheduler("copy"),
106 |     Priority.Low.id    -> new TestScheduler("edges_slow")
107 |   )
108 | 
109 | 
110 |   // Admin/Logging
111 | 
112 |   val adminConfig = new AdminServiceConfig {
113 |     httpPort = Some(9990)
114 |   }
115 | 
116 |   loggers = List(new LoggerConfig {
117 |     level = Some(Level.INFO)
118 |     handlers = List(new FileHandlerConfig { filename = "test.log" })
119 |   })
120 | 
121 |   queryStats.consumers = Seq(new AuditingTransactionalStatsConsumer {
122 |     names = Set("execute")
123 |     override def apply() = { new com.twitter.gizzard.AuditingTransactionalStatsConsumer(new com.twitter.gizzard.LoggingTransactionalStatsConsumer("audit_log") {
124 |       def transactionToString(t: TransactionalStatsProvider) = { t.get("job").asInstanceOf[String] }
125 |     }, names)}})
126 | }
127 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/jobs/Legacy.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb.jobs
 18 | 
 19 | import com.twitter.logging.Logger
 20 | import com.twitter.util.Time
 21 | import com.twitter.gizzard.scheduler._
 22 | import com.twitter.gizzard.shards._
 23 | import com.twitter.flockdb.{State, ForwardingManager, Cursor, UuidGenerator, Direction, Priority}
 24 | import com.twitter.flockdb.conversions.Numeric._
 25 | import com.twitter.flockdb.jobs.single.Single
 26 | import com.twitter.flockdb.jobs.multi.Multi
 27 | 
 28 | 
 29 | // Legacy parsers for old format jobs without state
 30 | // XXX: remove once we're off of the old format, or factor out common code with above.
 31 | 
 32 | object LegacySingleJobParser {
 33 |   def Add(forwardingManager: ForwardingManager, uuidGenerator: UuidGenerator) = {
 34 |     new LegacySingleJobParser(forwardingManager, uuidGenerator, State.Normal)
 35 |   }
 36 | 
 37 |   def Negate(forwardingManager: ForwardingManager, uuidGenerator: UuidGenerator) = {
 38 |     new LegacySingleJobParser(forwardingManager, uuidGenerator, State.Negative)
 39 |   }
 40 | 
 41 |   def Archive(forwardingManager: ForwardingManager, uuidGenerator: UuidGenerator) = {
 42 |     new LegacySingleJobParser(forwardingManager, uuidGenerator, State.Archived)
 43 |   }
 44 | 
 45 |   def Remove(forwardingManager: ForwardingManager, uuidGenerator: UuidGenerator) = {
 46 |     new LegacySingleJobParser(forwardingManager, uuidGenerator, State.Removed)
 47 |   }
 48 | }
 49 | 
 50 | object LegacyMultiJobParser {
 51 |   def Archive(
 52 |     forwardingManager: ForwardingManager,
 53 |     scheduler: PrioritizingJobScheduler,
 54 |     aggregateJobPageSize: Int
 55 |   ) = {
 56 |     new LegacyMultiJobParser(forwardingManager, scheduler, aggregateJobPageSize, State.Archived)
 57 |   }
 58 | 
 59 |   def Unarchive(
 60 |     forwardingManager: ForwardingManager,
 61 |     scheduler: PrioritizingJobScheduler,
 62 |     aggregateJobPageSize: Int
 63 |   ) = {
 64 |     new LegacyMultiJobParser(forwardingManager, scheduler, aggregateJobPageSize, State.Normal)
 65 |   }
 66 | 
 67 |   def RemoveAll(
 68 |     forwardingManager: ForwardingManager,
 69 |     scheduler: PrioritizingJobScheduler,
 70 |     aggregateJobPageSize: Int
 71 |   ) = {
 72 |     new LegacyMultiJobParser(forwardingManager, scheduler, aggregateJobPageSize, State.Removed)
 73 |   }
 74 | 
 75 |   def Negate(
 76 |     forwardingManager: ForwardingManager,
 77 |     scheduler: PrioritizingJobScheduler,
 78 |     aggregateJobPageSize: Int
 79 |   ) = {
 80 |     new LegacyMultiJobParser(forwardingManager, scheduler, aggregateJobPageSize, State.Negative)
 81 |   }
 82 | }
 83 | 
 84 | class LegacySingleJobParser(
 85 |   forwardingManager: ForwardingManager,
 86 |   uuidGenerator: UuidGenerator,
 87 |   state: State)
 88 | extends JsonJobParser {
 89 | 
 90 |   def log = Logger.get
 91 | 
 92 |   def apply(attributes: Map[String, Any]): JsonJob = {
 93 |     val casted = attributes.asInstanceOf[Map[String, AnyVal]]
 94 | 
 95 |     new Single(
 96 |       casted("source_id").toLong,
 97 |       casted("graph_id").toInt,
 98 |       casted("destination_id").toLong,
 99 |       casted("position").toLong,
100 |       state, // ONLY DIFFERENCE FROM SingleJobParser
101 |       Time.fromSeconds(casted("updated_at").toInt),
102 |       forwardingManager,
103 |       uuidGenerator
104 |     )
105 |   }
106 | }
107 | 
108 | class LegacyMultiJobParser(
109 |   forwardingManager: ForwardingManager,
110 |   scheduler: PrioritizingJobScheduler,
111 |   aggregateJobPageSize: Int,
112 |   state: State)
113 | extends JsonJobParser {
114 | 
115 |   def apply(attributes: Map[String, Any]): JsonJob = {
116 |     val casted = attributes.asInstanceOf[Map[String, AnyVal]]
117 | 
118 |     new Multi(
119 |       casted("source_id").toLong,
120 |       casted("graph_id").toInt,
121 |       Direction(casted("direction").toInt),
122 |       state,
123 |       Time.fromSeconds(casted("updated_at").toInt),
124 |       Priority(casted.get("priority").map(_.toInt).getOrElse(Priority.Low.id)),
125 |       aggregateJobPageSize,
126 |       forwardingManager,
127 |       scheduler
128 |     )
129 |   }
130 | }
131 | 


--------------------------------------------------------------------------------
/config/production.scala:
--------------------------------------------------------------------------------
  1 | import com.twitter.flockdb.config._
  2 | import com.twitter.gizzard.config._
  3 | import com.twitter.querulous.config._
  4 | import com.twitter.querulous.StatsCollector
  5 | import com.twitter.conversions.time._
  6 | import com.twitter.conversions.storage._
  7 | import com.twitter.flockdb.shards.QueryClass
  8 | import com.twitter.flockdb.Priority
  9 | import com.twitter.ostrich.admin.config.AdminServiceConfig
 10 | import com.twitter.logging.Level
 11 | import com.twitter.logging.config._
 12 | 
 13 | trait Credentials extends Connection {
 14 |   val username = "root"
 15 |   val password = ""
 16 | }
 17 | 
 18 | class ProductionQueryEvaluator extends AsyncQueryEvaluator {
 19 |   override var workPoolSize = 40
 20 |   database.memoize = true
 21 |   database.pool = new ThrottledPoolingDatabase {
 22 |     size = workPoolSize
 23 |     openTimeout = 100.millis
 24 |   }
 25 | 
 26 |   query.timeouts = Map(
 27 |     QueryClass.Select                  -> QueryTimeout(1.second),
 28 |     QueryClass.Execute                 -> QueryTimeout(1.second),
 29 |     QueryClass.SelectCopy              -> QueryTimeout(15.seconds),
 30 |     QueryClass.SelectModify            -> QueryTimeout(3.seconds),
 31 |     QueryClass.SelectSingle            -> QueryTimeout(1.second),
 32 |     QueryClass.SelectIntersection      -> QueryTimeout(1.second),
 33 |     QueryClass.SelectIntersectionSmall -> QueryTimeout(1.second),
 34 |     QueryClass.SelectMetadata          -> QueryTimeout(1.second)
 35 |   )
 36 | }
 37 | 
 38 | class ProductionNameServerReplica(host: String) extends Mysql {
 39 |   val connection = new Connection with Credentials {
 40 |     val hostnames = Seq(host)
 41 |     val database = "flock_edges_production"
 42 |   }
 43 | 
 44 |   queryEvaluator = new QueryEvaluator {
 45 |     database.memoize = true
 46 |     database.pool = new ThrottledPoolingDatabase {
 47 |       size = 1
 48 |       openTimeout = 1.second
 49 |     }
 50 |   }
 51 | }
 52 | 
 53 | new FlockDB {
 54 |   mappingFunction = ByteSwapper
 55 |   jobRelay        = NoJobRelay
 56 | 
 57 |   nameServerReplicas = Seq(
 58 |     new ProductionNameServerReplica("flockdb001.twitter.com"),
 59 |     new ProductionNameServerReplica("flockdb002.twitter.com")
 60 |   )
 61 | 
 62 |   jobInjector.timeout               = 100.millis
 63 |   jobInjector.idleTimeout           = 60.seconds
 64 |   jobInjector.threadPool.minThreads = 30
 65 | 
 66 |   val databaseConnection = new Credentials {
 67 |     val hostnames = Seq("localhost")
 68 |     val database = "edges"
 69 |     urlOptions = Map("rewriteBatchedStatements" -> "true")
 70 |   }
 71 | 
 72 |   val edgesQueryEvaluator = new ProductionQueryEvaluator
 73 |   val lowLatencyQueryEvaluator = new ProductionQueryEvaluator
 74 | 
 75 |   val materializingQueryEvaluator = new ProductionQueryEvaluator {
 76 |     workPoolSize = 1
 77 |     database.pool = new ThrottledPoolingDatabase {
 78 |       size = workPoolSize
 79 |       openTimeout = 1.second
 80 |     }
 81 |   }
 82 | 
 83 |   class ProductionScheduler(val name: String) extends Scheduler {
 84 |     jobQueueName = name + "_jobs"
 85 | 
 86 |     val schedulerType = new KestrelScheduler {
 87 |       path = "/var/spool/kestrel"
 88 |       maxMemorySize = 36.megabytes
 89 |     }
 90 | 
 91 |     errorLimit = 100
 92 |     errorRetryDelay = 15.minutes
 93 |     errorStrobeInterval = 1.second
 94 |     perFlushItemLimit = 100
 95 |     jitterRate = 0
 96 |   }
 97 | 
 98 |   val jobQueues = Map(
 99 |     Priority.High.id    -> new ProductionScheduler("edges") { threads = 32 },
100 |     Priority.Medium.id  -> new ProductionScheduler("copy") { threads = 12; errorRetryDelay = 60.seconds },
101 |     Priority.Low.id     -> new ProductionScheduler("edges_slow") { threads = 2 }
102 |   )
103 | 
104 |   val adminConfig = new AdminServiceConfig {
105 |     httpPort = Some(9990)
106 |   }
107 | 
108 |   loggers = List(
109 |     new LoggerConfig {
110 |       level = Some(Level.INFO)
111 |       handlers = List(
112 |         new ThrottledHandlerConfig {
113 |           duration = 60.seconds
114 |           maxToDisplay = 10
115 |           handler = new FileHandlerConfig {
116 |             filename = "/var/log/flock/production.log"
117 |             roll = Policy.Hourly
118 |           }
119 |         })
120 |     },
121 |     new LoggerConfig {
122 |       node = "stats"
123 |       useParents = false
124 |       level = Some(Level.INFO)
125 |       handlers = List(new ScribeHandlerConfig {
126 |         category = "flock-stats"
127 |       })
128 |     },
129 |     new LoggerConfig {
130 |       node = "bad_jobs"
131 |       useParents = false
132 |       level = Some(Level.INFO)
133 |       handlers = List(new FileHandlerConfig {
134 |         roll = Policy.Never
135 |         filename = "/var/log/flock/bad_jobs.log"
136 |       })
137 |     })
138 | }
139 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/integration/OptimisticLockRegressionSpec.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | package integration
 19 | 
 20 | import org.specs.mock.{ClassMocker, JMocker}
 21 | import org.specs.util.{Duration => SpecsDuration}
 22 | import org.specs.matcher.Matcher
 23 | import com.twitter.gizzard.scheduler.{JsonJob, PrioritizingJobScheduler}
 24 | import com.twitter.gizzard.shards._
 25 | import com.twitter.gizzard.nameserver.NameServer
 26 | import com.twitter.util.Time
 27 | import com.twitter.conversions.time._
 28 | import com.twitter.flockdb.operations._
 29 | import jobs.single._
 30 | import shards.{Shard, SqlShard}
 31 | 
 32 | 
 33 | class OptimisticLockRegressionSpec extends IntegrationSpecification() {
 34 |   val FOLLOWS = 1
 35 |   val alice = 1
 36 | 
 37 |   val MIN = 3
 38 |   val MAX = 100
 39 |   val errorLimit = 5
 40 | 
 41 |   "Inserting conflicting items" should {
 42 |     "recover via the optimistic lock" in {
 43 |       reset(config)
 44 | 
 45 |       val scheduler = flock.jobScheduler(Priority.High.id)
 46 |       val errors = scheduler.errorQueue
 47 | 
 48 |       // No thrift api for this, so this is the best I know how to do.
 49 |       scheduler.put(new Single(1, FOLLOWS, 5106, 123456, State.Normal, Time.now, flock.forwardingManager, OrderedUuidGenerator))
 50 | 
 51 |       execute(Select(1, FOLLOWS, ()).archive)
 52 | 
 53 |       playNormalJobs()
 54 | 
 55 |       var found = false
 56 |       while (errors.size > 0) {
 57 |         val job = errors.get.get.job
 58 |         if (job.errorMessage.indexOf("lost optimistic lock") > 0) {
 59 |           found = true
 60 |         }
 61 |         job()
 62 |       }
 63 |       playScheduledJobs()
 64 | 
 65 |       found mustEqual true
 66 | 
 67 |       flockService.get(1, FOLLOWS, 5106)().state must eventually(be_==(State.Archived))
 68 |     }
 69 | 
 70 | 
 71 |     "still work even if we spam a ton of operations" in {
 72 |       // println("gogo")
 73 |       reset(config)
 74 | 
 75 |       val scheduler = flock.jobScheduler(Priority.High.id)
 76 |       val errors = scheduler.errorQueue
 77 | 
 78 |       // println("spamming edges")
 79 |       for(i <- 1 to 500) {
 80 |         (i % 2) match {
 81 |           case 0 => execute(Select(1, FOLLOWS, i).add)
 82 |           case 1 => execute(Select(1, FOLLOWS, i).archive)
 83 |         }
 84 |       }
 85 | 
 86 |       // println("spamming removes")
 87 |       for(i <- 1 to 50) {
 88 |         execute(Select((), FOLLOWS, i * 10).remove)
 89 |       }
 90 | 
 91 |       // println("spamming bulks")
 92 |       for(i <- 1 to 10) {
 93 |         (i % 2) match {
 94 |           case 0 => execute(Select(1, FOLLOWS, ()).add)
 95 |           case 1 => execute(Select(1, FOLLOWS, ()).archive)
 96 |         }
 97 |       }
 98 | 
 99 |       // println("final state")
100 |       execute(Select(1, FOLLOWS, ()).archive)
101 | 
102 |       // println("draining")
103 |       playNormalJobs()
104 | 
105 |       while (errors.size > 0) {
106 |         // println("looping through the error queue")
107 |         val job = errors.get.get.job
108 |         try {
109 |           job()
110 |         } catch {
111 |           case e => {
112 |             job.errorCount += 1
113 |             job.errorMessage = e.toString
114 |             if (job.errorCount > errorLimit) {
115 |               throw new RuntimeException("too many bad jobs")
116 |             } else {
117 |               errors.put(job)
118 |             }
119 |           }
120 |         }
121 | 
122 |         playNormalJobs()
123 |       }
124 | 
125 |       Thread.sleep(1000)
126 | 
127 |       val selectArchived = SimpleSelect(
128 |         SelectOperation(
129 |           SelectOperationType.SimpleQuery,
130 |           Some(QueryTerm(alice, FOLLOWS, true, None, List(State.Archived)))
131 |         )
132 |       )
133 | 
134 |       count(selectArchived) must eventually(be_==(450))
135 |       count(Select(1, FOLLOWS, ())) mustEqual 0
136 | 
137 |       for(i <- 1 to 500) {
138 |         (i % 10) match {
139 |           case 0 => ()
140 |           case _ => flockService.get(1, FOLLOWS, i)().state mustEqual State.Archived
141 |         }
142 |       }
143 |     }
144 |   }
145 | }
146 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/Select.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | 
 19 | import com.twitter.conversions.time._
 20 | import com.twitter.util.Time
 21 | import com.twitter.flockdb.operations.SelectOperationType._
 22 | import com.twitter.flockdb.operations._
 23 | 
 24 | 
 25 | object Select {
 26 |   def apply(sourceId: Unit, graphId: Int, destinationId: Long) = {
 27 |     new SimpleSelect(new SelectOperation(SimpleQuery, Some(new QueryTerm(destinationId, graphId, false, None, List(State.Normal)))))
 28 |   }
 29 | 
 30 |   def apply(sourceId: Long, graphId: Int, destinationId: Unit) = {
 31 |     new SimpleSelect(new SelectOperation(SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))))
 32 |   }
 33 | 
 34 |   def apply(sourceId: Long, graphId: Int, destinationId: Long) = {
 35 |     new SimpleSelect(new SelectOperation(SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, Some(List[Long](destinationId)), List(State.Normal)))))
 36 |   }
 37 | 
 38 |   def apply(sourceId: Long, graphId: Int, destinationIds: Seq[Long]) = {
 39 |     new SimpleSelect(new SelectOperation(SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, Some(destinationIds), List(State.Normal)))))
 40 |   }
 41 | 
 42 |   def apply(sourceIds: Seq[Long], graphId: Int, destinationId: Long) = {
 43 |     new SimpleSelect(new SelectOperation(SimpleQuery, Some(new QueryTerm(destinationId, graphId, false, Some(sourceIds), List(State.Normal)))))
 44 |   }
 45 | }
 46 | 
 47 | trait Select {
 48 |   def toList: List[SelectOperation]
 49 |   def intersect(that: Select): Select = new CompoundSelect(Intersection, this, that)
 50 |   def difference(that: Select): Select = new CompoundSelect(Difference, this, that)
 51 | }
 52 | 
 53 | trait Execute {
 54 |   def toOperations: List[ExecuteOperation]
 55 |   def at(time: Time): Execute
 56 |   def +(execute: Execute): Execute
 57 | }
 58 | 
 59 | // FIXME this is infinity-select not null :)
 60 | object NullSelect extends Select {
 61 |   override def intersect(that: Select) = that
 62 |   def toList = { throw new Exception("Not Applicable") }
 63 | }
 64 | 
 65 | case class SimpleSelect(operation: SelectOperation) extends Select {
 66 |   def toList = List(operation)
 67 | 
 68 |   def addAt(at: Time) = execute(ExecuteOperationType.Add, at)
 69 |   def add = addAt(Time.now)
 70 |   def archiveAt(at: Time) = execute(ExecuteOperationType.Archive, at)
 71 |   def archive = archiveAt(Time.now)
 72 |   def removeAt(at: Time) = execute(ExecuteOperationType.Remove, at)
 73 |   def remove = removeAt(Time.now)
 74 |   def negateAt(at: Time) = execute(ExecuteOperationType.Negate, at)
 75 |   def negate = negateAt(Time.now)
 76 |   private def execute(executeOperationType: ExecuteOperationType.Value, at: Time) =
 77 |     new SimpleExecute(new ExecuteOperation(executeOperationType, operation.term.get,
 78 |                                            Some(Time.now.inMillis)), at)
 79 | 
 80 |   def negative = {
 81 |     val negativeOperation = operation.clone
 82 |     negativeOperation.term.get.states = List(State.Negative)
 83 |     new SimpleSelect(negativeOperation)
 84 |   }
 85 | 
 86 |   def states(states: State*) = {
 87 |     val statefulOperation = operation.clone
 88 |     statefulOperation.term.get.states = states
 89 |     new SimpleSelect(statefulOperation)
 90 |   }
 91 | }
 92 | 
 93 | case class CompoundSelect(operation: SelectOperationType.Value, operand1: Select, operand2: Select) extends Select {
 94 |   def toList = operand1.toList ++ operand2.toList ++ List(new SelectOperation(operation, None))
 95 | }
 96 | 
 97 | case class SimpleExecute(operation: ExecuteOperation, at: Time) extends Execute {
 98 |   def toOperations = List(operation)
 99 |   def at(time: Time) = new SimpleExecute(operation, time)
100 |   def +(execute: Execute) = new CompoundExecute(this, execute, at, Priority.High)
101 | }
102 | 
103 | case class CompoundExecute(operand1: Execute, operand2: Execute, at: Time, priority: Priority.Value) extends Execute {
104 |   def toOperations = operand1.toOperations ++ operand2.toOperations
105 | 
106 |   def +(execute: Execute) = new CompoundExecute(this, execute, at, priority)
107 |   def withPriority(priority: Priority.Value) = new CompoundExecute(operand2, operand2, at, priority)
108 |   def at(time: Time) = new CompoundExecute(operand1, operand2, time, priority)
109 | }
110 | 


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
  1 | # STATUS
  2 | 
  3 | Twitter is no longer maintaining this project or responding to issues or PRs.
  4 | 
  5 | # FlockDB
  6 | 
  7 | FlockDB is a distributed graph database for storing adjancency lists, with
  8 | goals of supporting:
  9 | 
 10 | - a high rate of add/update/remove operations
 11 | - potientially complex set arithmetic queries
 12 | - paging through query result sets containing millions of entries
 13 | - ability to "archive" and later restore archived edges
 14 | - horizontal scaling including replication
 15 | - online data migration
 16 | 
 17 | Non-goals include:
 18 | 
 19 | - multi-hop queries (or graph-walking queries)
 20 | - automatic shard migrations
 21 | 
 22 | FlockDB is much simpler than other graph databases such as neo4j because it
 23 | tries to solve fewer problems. It scales horizontally and is designed for
 24 | on-line, low-latency, high throughput environments such as web-sites.
 25 | 
 26 | Twitter uses FlockDB to store social graphs (who follows whom, who blocks
 27 | whom) and secondary indices. As of April 2010, the Twitter FlockDB cluster
 28 | stores 13+ billion edges and sustains peak traffic of 20k writes/second and
 29 | 100k reads/second.
 30 | 
 31 | 
 32 | # It does what?
 33 | 
 34 | If, for example, you're storing a social graph (user A follows user B), and
 35 | it's not necessarily symmetrical (A can follow B without B following A), then
 36 | FlockDB can store that relationship as an edge: node A points to node B. It
 37 | stores this edge with a sort position, and in both directions, so that it can
 38 | answer the question "Who follows A?" as well as "Whom is A following?"
 39 | 
 40 | This is called a directed graph. (Technically, FlockDB stores the adjacency
 41 | lists of a directed graph.) Each edge has a 64-bit source ID, a 64-bit
 42 | destination ID, a state (normal, removed, archived), and a 32-bit position
 43 | used for sorting. The edges are stored in both a forward and backward
 44 | direction, meaning that an edge can be queried based on either the source or
 45 | destination ID.
 46 | 
 47 | For example, if node 134 points to node 90, and its sort position is 5, then
 48 | there are two rows written into the backing store:
 49 | 
 50 |     forward: 134 -> 90 at position 5
 51 |     backward: 90 <- 134 at position 5
 52 | 
 53 | If you're storing a social graph, the graph might be called "following", and
 54 | you might use the current time as the position, so that a listing of followers
 55 | is in recency order. In that case, if user 134 is Nick, and user 90 is Robey,
 56 | then FlockDB can store:
 57 | 
 58 |     forward: Nick follows Robey at 9:54 today
 59 |     backward: Robey is followed by Nick at 9:54 today
 60 | 
 61 | The (source, destination) must be unique: only one edge can point from node A
 62 | to node B, but the position and state may be modified at any time. Position is
 63 | used only for sorting the results of queries, and state is used to mark edges
 64 | that have been removed or archived (placed into cold sleep).
 65 | 
 66 | 
 67 | # Building
 68 | 
 69 | In theory, building is as simple as
 70 | 
 71 |     $ sbt clean update package-dist
 72 | 
 73 | but there are some pre-requisites. You need:
 74 | 
 75 | - java 1.6
 76 | - sbt 0.7.4
 77 | - thrift 0.5.0
 78 | 
 79 | If you haven't used sbt before, this page has a quick setup:
 80 | [http://code.google.com/p/simple-build-tool/wiki/Setup](http://code.google.com/p/simple-build-tool/wiki/Setup).
 81 | My `~/bin/sbt` looks like this:
 82 | 
 83 |     #!/bin/bash
 84 |     java -server -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=256m -Xmx1024m -jar `dirname $0`/sbt-launch-0.7.4.jar "$@"
 85 | 
 86 | Apache Thrift 0.5.0 is pre-requisite for building java stubs of the thrift
 87 | IDL. It can't be installed via jar, so you'll need to install it separately
 88 | before you build. It can be found on the apache thrift site:
 89 | [http://thrift.apache.org/](http://thrift.apache.org/).
 90 | You can find the download for 0.5.0 here: 
 91 | [http://archive.apache.org/dist/incubator/thrift/0.5.0-incubating/](http://archive.apache.org/dist/incubator/thrift/0.5.0-incubating/).
 92 | 
 93 | In addition, the tests require a local mysql instance to be running, and for
 94 | `DB_USERNAME` and `DB_PASSWORD` env vars to contain login info for it. You can
 95 | skip the tests if you want (but you should feel a pang of guilt):
 96 | 
 97 |     $ NO_TESTS=1 sbt package-dist
 98 | 
 99 | 
100 | # Running
101 | 
102 | Check out
103 | [the demo](http://github.com/twitter/flockdb/blob/master/doc/demo.markdown)
104 | for instructions on how to start up a local development instance of FlockDB.
105 | It also shows how to add edges, query them, etc.
106 | 
107 | 
108 | # Community
109 | 
110 | - Twitter: #flockdb
111 | - IRC: #twinfra on freenode (irc.freenode.net)
112 | - Mailing list: <flockdb@googlegroups.com> [subscribe](http://groups.google.com/group/flockdb)
113 | 
114 | 
115 | # Contributors
116 | 
117 | - Nick Kallen @nk
118 | - Robey Pointer @robey
119 | - John Kalucki @jkalucki
120 | - Ed Ceaser @asdf
121 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/ResultWindow.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | 
 19 | import scala.util.Sorting
 20 | import com.twitter.util.Time
 21 | import com.twitter.util.TimeConversions._
 22 | 
 23 | 
 24 | case class ResultWindowRow[T](id: T, cursor: Cursor) extends Ordered[ResultWindowRow[T]] {
 25 |   def compare(that: ResultWindowRow[T]) = that.cursor.compare(cursor)
 26 | }
 27 | 
 28 | class ResultWindowRows[T](data: Seq[ResultWindowRow[T]]) extends Seq[ResultWindowRow[T]] {
 29 |   def length = data.length
 30 |   def apply(i: Int) = data(i)
 31 |   def iterator = data.iterator
 32 | }
 33 | 
 34 | class ResultWindow[T](val data: ResultWindowRows[T], val inNextCursor: Cursor, val inPrevCursor: Cursor, val count: Int, val cursor: Cursor) extends Seq[T] {
 35 |   def this(data: Seq[(T, Cursor)], inNextCursor: Cursor, inPrevCursor: Cursor, count: Int, cursor: Cursor) =
 36 |     this(new ResultWindowRows(data.map { datum => ResultWindowRow(datum._1, datum._2) }), inNextCursor, inPrevCursor, count, cursor)
 37 |   def this(data: Seq[(T, Cursor)], count: Int, cursor: Cursor) =
 38 |     this(data, Cursor.End, Cursor.End, count, cursor)
 39 |   def this() =
 40 |     this(List[(T, Cursor)](), 0, Cursor.End)
 41 | 
 42 |   var page: Seq[ResultWindowRow[T]] = data
 43 |   var nextChanged, prevChanged = false
 44 |   if (cursor < Cursor.Start) {
 45 |     page = data.takeWhile(_.cursor > cursor.magnitude)
 46 |     nextChanged = page.size < data.size
 47 |     prevChanged = page.size > count
 48 |     page = page.drop(page.size - count)
 49 |   } else if (cursor == Cursor.Start) {
 50 |     nextChanged = page.size > count
 51 |     page = page.take(count)
 52 |   } else {
 53 |     page = data.dropWhile(_.cursor >= cursor)
 54 |     nextChanged = page.size > count
 55 |     prevChanged = page.size < data.size
 56 |     page = page.take(count)
 57 |   }
 58 |   val nextCursor = if (nextChanged && !page.isEmpty) page(page.size - 1).cursor else inNextCursor
 59 |   val prevCursor = if (prevChanged && !page.isEmpty) page(0).cursor.reverse else inPrevCursor
 60 | 
 61 |   def ++(other: ResultWindow[T]) = concat(other)
 62 | 
 63 |   def concat(other: ResultWindow[T], newCount: Int = count) = {
 64 |     if (cursor < Cursor.Start) {
 65 |       new ResultWindow(new ResultWindowRows(other.page ++ page), nextCursor, other.prevCursor, newCount, cursor)
 66 |     } else {
 67 |       new ResultWindow(new ResultWindowRows(page ++ other.page), other.nextCursor, prevCursor, newCount, cursor)
 68 |     }
 69 |   }
 70 | 
 71 | 
 72 |   def merge(other: ResultWindow[T]) = {
 73 |     val newPage = Sorting.stableSort((Set((page ++ other.page): _*)).toSeq)
 74 |     val newNextCursor = if (nextCursor == Cursor.End && other.nextCursor == Cursor.End) Cursor.End else newPage(newPage.size - 1).cursor
 75 |     val newPrevCursor = if (prevCursor == Cursor.End && other.prevCursor == Cursor.End) Cursor.End else newPage(0).cursor.reverse
 76 |     new ResultWindow(new ResultWindowRows(newPage), newNextCursor, newPrevCursor, count, cursor)
 77 |   }
 78 | 
 79 |   def --(values: Seq[T]) = diff(values)
 80 | 
 81 |   def diff(values: Seq[T], newCount: Int = count) = {
 82 |     val rejects = Set(values: _*)
 83 |     val newPage = page.filter { row => !rejects.contains(row.id) }
 84 |     val newNextCursor = if (nextCursor == Cursor.End || newPage.size == 0) Cursor.End else newPage(newPage.size - 1).cursor
 85 |     val newPrevCursor = if (prevCursor == Cursor.End || newPage.size == 0) Cursor.End else newPage(0).cursor.reverse
 86 |     new ResultWindow(new ResultWindowRows(newPage), newNextCursor, newPrevCursor, newCount, cursor)
 87 |   }
 88 | 
 89 |   def length = page.length
 90 |   def apply(i: Int) = page(i).id
 91 |   def iterator = page.view.map(_.id).iterator
 92 |   def continueCursor = if (cursor < Cursor.Start) prevCursor else nextCursor
 93 |   override def headOption = page.headOption.map { _.id }
 94 | 
 95 |   override def toString = (iterator.toList, nextCursor, prevCursor, count, cursor).toString
 96 | 
 97 |   override def equals(that: Any) = that match {
 98 |     case that: ResultWindow[_] => iterator.toList == that.iterator.toList && nextCursor == that.nextCursor && prevCursor == that.prevCursor && cursor == that.cursor
 99 |     case _ => false
100 |   }
101 | 
102 |   // convenience method that makes for easier matching in tests
103 |   def toTuple = (iterator.toList, nextCursor, prevCursor)
104 | }
105 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/jobs/multi/Multi.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb.jobs.multi
 18 | 
 19 | import com.twitter.gizzard.scheduler._
 20 | import com.twitter.gizzard.shards.ShardBlackHoleException
 21 | import com.twitter.util.Time
 22 | import com.twitter.util.TimeConversions._
 23 | import com.twitter.flockdb.{State, ForwardingManager, Cursor, Priority, Direction}
 24 | import com.twitter.flockdb.conversions.Numeric._
 25 | import com.twitter.flockdb.shards.Shard
 26 | import com.twitter.flockdb.jobs.single.Single
 27 | 
 28 | // TODO: Make this async.
 29 | class MultiJobParser(
 30 |   forwardingManager: ForwardingManager,
 31 |   scheduler: PrioritizingJobScheduler,
 32 |   aggregateJobPageSize: Int)
 33 | extends JsonJobParser {
 34 | 
 35 |   def apply(attributes: Map[String, Any]): JsonJob = {
 36 |     val casted = attributes.asInstanceOf[Map[String, AnyVal]]
 37 | 
 38 |     new Multi(
 39 |       casted("source_id").toLong,
 40 |       casted("graph_id").toInt,
 41 |       Direction(casted("direction").toInt),
 42 |       State(casted("state").toInt),
 43 |       Time.fromSeconds(casted("updated_at").toInt),
 44 |       Priority(casted.get("priority").map(_.toInt).getOrElse(Priority.Low.id)),
 45 |       aggregateJobPageSize,
 46 |       casted.get("cursor").map( c => Cursor(c.toLong)).getOrElse(Cursor.Start),
 47 |       forwardingManager,
 48 |       scheduler
 49 |     )
 50 |   }
 51 | }
 52 | 
 53 | class Multi(
 54 |   sourceId: Long,
 55 |   graphId: Int,
 56 |   direction: Direction,
 57 |   preferredState: State,
 58 |   updatedAt: Time,
 59 |   priority: Priority.Value,
 60 |   aggregateJobPageSize: Int,
 61 |   var cursor: Cursor,
 62 |   forwardingManager: ForwardingManager,
 63 |   scheduler: PrioritizingJobScheduler)
 64 | extends JsonJob {
 65 | 
 66 |   def this(
 67 |     sourceId: Long,
 68 |     graphId: Int,
 69 |     direction: Direction,
 70 |     preferredState: State,
 71 |     updatedAt: Time,
 72 |     priority: Priority.Value,
 73 |     aggregateJobPageSize: Int,
 74 |     forwardingManager: ForwardingManager,
 75 |     scheduler: PrioritizingJobScheduler
 76 |   ) = {
 77 |     this(
 78 |       sourceId,
 79 |       graphId,
 80 |       direction,
 81 |       preferredState,
 82 |       updatedAt,
 83 |       priority,
 84 |       aggregateJobPageSize,
 85 |       Cursor.Start,
 86 |       forwardingManager,
 87 |       scheduler
 88 |     )
 89 |   }
 90 | 
 91 |   def toMap = Map(
 92 |     "source_id" -> sourceId,
 93 |     "updated_at" -> updatedAt.inSeconds,
 94 |     "graph_id" -> graphId,
 95 |     "direction" -> direction.id,
 96 |     "priority" -> priority.id,
 97 |     "state" -> preferredState.id,
 98 |     "cursor" -> cursor.position
 99 |   )
100 | 
101 |   def apply() {
102 |     val forwardShard = forwardingManager.find(sourceId, graphId, direction)
103 | 
104 |     if (cursor == Cursor.Start) try {
105 |       updateMetadata(forwardShard, preferredState)
106 |     } catch {
107 |       case e: ShardBlackHoleException => return
108 |     }
109 | 
110 |     while (cursor != Cursor.End) {
111 |       val resultWindow = forwardShard.selectIncludingArchived(sourceId, aggregateJobPageSize, cursor)()
112 | 
113 |       val chunkOfTasks = resultWindow.map { destinationId =>
114 |         val (a, b) = if (direction == Direction.Backward) (destinationId, sourceId) else (sourceId, destinationId)
115 |         singleEdgeJob(a, graphId, b, preferredState)
116 |       }
117 | 
118 |       scheduler.put(priority.id, new JsonNestedJob(chunkOfTasks))
119 | 
120 |       // "commit" the current iteration by saving the next cursor.
121 |       // if the job blows up in the next round, it will be re-serialized
122 |       // with this cursor.
123 |       cursor = resultWindow.nextCursor
124 |     }
125 |   }
126 | 
127 |   // XXX: since this job gets immediately serialized, pass null for forwardingManager and uuidGenerator.
128 |   protected def singleEdgeJob(sourceId: Long, graphId: Int, destinationId: Long, state: State) = {
129 |     new Single(sourceId, graphId, destinationId, updatedAt.inMillis, state, updatedAt, null, null)
130 |   }
131 | 
132 |   protected def updateMetadata(shard: Shard, state: State) = state match {
133 |     case State.Normal   => shard.add(sourceId, updatedAt)()
134 |     case State.Removed  => shard.remove(sourceId, updatedAt)()
135 |     case State.Archived => shard.archive(sourceId, updatedAt)()
136 |     case State.Negative => shard.negate(sourceId, updatedAt)()
137 |   }
138 | 
139 |   override def equals(o: Any) = o match {
140 |     case o: Multi => this.toMap == o.toMap
141 |     case _        => false
142 |   }
143 | }
144 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/ConfiguredSpecification.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | 
 19 | import java.io.File
 20 | import org.specs.Specification
 21 | import com.twitter.gizzard.shards.{Busy, ShardId, ShardInfo}
 22 | import com.twitter.gizzard.nameserver.Forwarding
 23 | import com.twitter.gizzard.scheduler._
 24 | import com.twitter.gizzard.test.NameServerDatabase
 25 | import com.twitter.util.{Eval, Time}
 26 | import com.twitter.querulous.evaluator.QueryEvaluatorFactory
 27 | import com.twitter.logging.Logger
 28 | import scala.collection.mutable
 29 | import com.twitter.flockdb
 30 | import com.twitter.flockdb.operations._
 31 | 
 32 | object MemoizedQueryEvaluators {
 33 |   val evaluators = mutable.Map[String,QueryEvaluatorFactory]()
 34 | }
 35 | 
 36 | object Config {
 37 |   val config = {
 38 |     val c = Eval[flockdb.config.FlockDB](new File("config/test.scala"))
 39 |     try {
 40 |       c.loggers.foreach { _() }
 41 |       c
 42 |     } catch {
 43 |       case e: Exception => {
 44 |         e.printStackTrace()
 45 |         throw e
 46 |       }
 47 |     }
 48 |   }
 49 | }
 50 | 
 51 | abstract class ConfiguredSpecification extends Specification {
 52 |   val config = Config.config
 53 | 
 54 |   def jsonMatching(list1: Iterable[JsonJob], list2: Iterable[JsonJob]) = {
 55 |     list1 must eventually(verify(l1 => { l1.map(_.toJson).sameElements(list2.map(_.toJson))}))
 56 |   }
 57 | }
 58 | 
 59 | abstract class IntegrationSpecification extends ConfiguredSpecification with NameServerDatabase {
 60 |   lazy val flock = {
 61 |     val f = new FlockDB(config)
 62 |     f.jobScheduler.start()
 63 |     f
 64 |   }
 65 | 
 66 |   lazy val flockService = flock.flockService
 67 | 
 68 |   def execute(e: Execute, t: Option[Time] = None) = {
 69 |     flockService.execute(ExecuteOperations(e.toOperations, t map { _.inSeconds }, Priority.High))()
 70 |   }
 71 | 
 72 |   def count(s: Select) = {
 73 |     flockService.count(Seq(s.toList))().head
 74 |   }
 75 | 
 76 |   def select(s: Select, page: Page) = {
 77 |     flockService.select(SelectQuery(s.toList, page))().toTuple
 78 |   }
 79 | 
 80 |   def reset(config: flockdb.config.FlockDB) { reset(config, 1) }
 81 | 
 82 |   def reset(config: flockdb.config.FlockDB, count: Int) {
 83 |     materialize(config)
 84 |     flock.nameServer.reload()
 85 | 
 86 |     val rootQueryEvaluator = config.edgesQueryEvaluator()(config.databaseConnection.withoutDatabase)
 87 |     //rootQueryEvaluator.execute("DROP DATABASE IF EXISTS " + config.databaseConnection.database)
 88 |     val queryEvaluator = config.edgesQueryEvaluator()(config.databaseConnection)
 89 | 
 90 |     for (graph <- (1 until 10)) {
 91 |       Seq("forward", "backward").foreach { direction =>
 92 |         val tableId = if (direction == "forward") graph else graph * -1
 93 |         val replicatingShardId = ShardId("localhost", "replicating_" + direction + "_" + graph)
 94 |         flock.shardManager.createAndMaterializeShard(
 95 |           ShardInfo(replicatingShardId, "com.twitter.gizzard.shards.ReplicatingShard", "", "", Busy.Normal)
 96 |         )
 97 |         flock.shardManager.setForwarding(Forwarding(tableId, 0, replicatingShardId))
 98 | 
 99 |         for (sqlShardId <- (1 to count)) {
100 |           val shardId = ShardId("localhost", direction + "_" + sqlShardId + "_" + graph)
101 | 
102 |           flock.shardManager.createAndMaterializeShard(ShardInfo(shardId,
103 |             "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal))
104 |           flock.shardManager.addLink(replicatingShardId, shardId, 1)
105 | 
106 |           queryEvaluator.execute("DELETE FROM " + shardId.tablePrefix + "_edges")()
107 |           queryEvaluator.execute("DELETE FROM " + shardId.tablePrefix + "_metadata")()
108 |         }
109 |       }
110 |     }
111 | 
112 |     flock.nameServer.reload()
113 |   }
114 | 
115 |   def playScheduledJobs() {
116 |     Thread.sleep(100)
117 |     val s = flock.jobScheduler
118 |     while (s.size > 0 || s.errorSize > 0 || s.activeThreads > 0) {
119 |       s.retryErrors()
120 | 
121 |       Thread.sleep(50)
122 |     }
123 |   }
124 | 
125 |   def playNormalJobs() {
126 |     Thread.sleep(100)
127 |     val s = flock.jobScheduler
128 |     while (s.size > 0 || s.activeThreads > 0) {
129 |       Thread.sleep(50)
130 |     }
131 |   }
132 | 
133 |   def reset(config: flockdb.config.FlockDB, db: String) {
134 |     try {
135 |       evaluator(config).execute("DROP DATABASE IF EXISTS " + db)
136 |     } catch {
137 |       case e =>
138 |         e.printStackTrace()
139 |         throw e
140 |     }
141 |   }
142 | }
143 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/jobs/single/Single.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb.jobs.single
 18 | 
 19 | import com.twitter.logging.Logger
 20 | import com.twitter.util.{Time, Return, Throw}
 21 | import com.twitter.gizzard.scheduler._
 22 | import com.twitter.gizzard.shards._
 23 | import com.twitter.conversions.time._
 24 | import com.twitter.flockdb.{State, ForwardingManager, Cursor, UuidGenerator, Direction}
 25 | import com.twitter.flockdb.conversions.Numeric._
 26 | import com.twitter.flockdb.shards.Shard
 27 | import com.twitter.flockdb.shards.LockingNodeSet._
 28 | 
 29 | 
 30 | // TODO: Make this async.
 31 | class SingleJobParser(
 32 |   forwardingManager: ForwardingManager,
 33 |   uuidGenerator: UuidGenerator)
 34 | extends JsonJobParser {
 35 | 
 36 |   def log = Logger.get
 37 | 
 38 |   def apply(attributes: Map[String, Any]): JsonJob = {
 39 |     val writeSuccesses = try {
 40 |       attributes.get("write_successes") map {
 41 |         _.asInstanceOf[Seq[Seq[String]]] map { case Seq(h, tp) => ShardId(h, tp) }
 42 |       } getOrElse Nil
 43 |     } catch {
 44 |       case e => {
 45 |         log.warning("Error parsing write successes. falling back to non-memoization", e)
 46 |         Nil
 47 |       }
 48 |     }
 49 | 
 50 |     val casted = attributes.asInstanceOf[Map[String, AnyVal]]
 51 | 
 52 |     new Single(
 53 |       casted("source_id").toLong,
 54 |       casted("graph_id").toInt,
 55 |       casted("destination_id").toLong,
 56 |       casted("position").toLong,
 57 |       State(casted("state").toInt),
 58 |       Time.fromSeconds(casted("updated_at").toInt),
 59 |       forwardingManager,
 60 |       uuidGenerator,
 61 |       writeSuccesses.toList
 62 |     )
 63 |   }
 64 | }
 65 | 
 66 | class Single(
 67 |   sourceId: Long,
 68 |   graphId: Int,
 69 |   destinationId: Long,
 70 |   position: Long,
 71 |   preferredState: State,
 72 |   updatedAt: Time,
 73 |   forwardingManager: ForwardingManager,
 74 |   uuidGenerator: UuidGenerator,
 75 |   var successes: List[ShardId] = Nil)
 76 | extends JsonJob {
 77 | 
 78 |   def toMap = {
 79 |     val base =  Map(
 80 |       "source_id" -> sourceId,
 81 |       "graph_id" -> graphId,
 82 |       "destination_id" -> destinationId,
 83 |       "position" -> position,
 84 |       "state" -> preferredState.id,
 85 |       "updated_at" -> updatedAt.inSeconds
 86 |     )
 87 | 
 88 |     if (successes.isEmpty) {
 89 |       base
 90 |     } else {
 91 |       base + ("write_successes" -> (successes map { case ShardId(h, tp) => Seq(h, tp) }))
 92 |     }
 93 |   }
 94 | 
 95 |   def apply() = {
 96 |     val forward  = forwardingManager.findNode(sourceId, graphId, Direction.Forward).write
 97 |     val backward = forwardingManager.findNode(destinationId, graphId, Direction.Backward).write
 98 |     val uuid     = uuidGenerator(position)
 99 | 
100 |     var currSuccesses: List[ShardId] = Nil
101 |     var currErrs: List[Throwable]    = Nil
102 | 
103 |     forward.optimistically(sourceId) { left =>
104 |       backward.optimistically(destinationId) { right =>
105 |         val state           = left max right max preferredState
106 |         val forwardResults  = writeToShard(forward, sourceId, destinationId, uuid, state)
107 |         val backwardResults = writeToShard(backward, destinationId, sourceId, uuid, state)
108 | 
109 |         List(forwardResults, backwardResults) foreach {
110 |           _ foreach {
111 |             case Return(id) => currSuccesses = id :: currSuccesses
112 |             case Throw(e)   => currErrs = e :: currErrs
113 |           }
114 |         }
115 |       }
116 |     }
117 | 
118 |     // add successful writes here, since we are only successful if an optimistic lock exception is not raised.
119 |     successes = successes ++ currSuccesses
120 | 
121 |     currErrs.headOption foreach { e => throw e }
122 |   }
123 | 
124 |   def writeToShard(shards: NodeSet[Shard], sourceId: Long, destinationId: Long, uuid: Long, state: State) = {
125 |     shards.skip(successes) all { (shardId, shard) =>
126 |       state match {
127 |         case State.Normal   => shard.add(sourceId, destinationId, uuid, updatedAt)()
128 |         case State.Removed  => shard.remove(sourceId, destinationId, uuid, updatedAt)()
129 |         case State.Archived => shard.archive(sourceId, destinationId, uuid, updatedAt)()
130 |         case State.Negative => shard.negate(sourceId, destinationId, uuid, updatedAt)()
131 |       }
132 | 
133 |       shardId
134 |     }
135 |   }
136 | 
137 |   override def equals(o: Any) = o match {
138 |     case o: Single => this.toMap == o.toMap
139 |     case _         => false
140 |   }
141 | }
142 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/shards/ReadWriteShardAdapter.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2010 Twitter, Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
 5 |  * not use this file except in compliance with the License. You may obtain
 6 |  * a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.twitter.flockdb
18 | package shards
19 | 
20 | import com.twitter.gizzard.shards.RoutingNode
21 | import com.twitter.util.Future
22 | import com.twitter.util.Time
23 | 
24 | class ReadWriteShardAdapter(shard: RoutingNode[Shard]) extends Shard {
25 |   def selectIncludingArchived(sourceId: Long, count: Int, cursor: Cursor)                     = shard.read.futureAny { _.selectIncludingArchived(sourceId, count, cursor) }
26 |   def intersect(sourceId: Long, states: Seq[State], destinationIds: Seq[Long])                = shard.read.futureAny { _.intersect(sourceId, states, destinationIds) }
27 |   def intersectEdges(sourceId: Long, states: Seq[State], destinationIds: Seq[Long])           = shard.read.futureAny { _.intersectEdges(sourceId, states, destinationIds) }
28 |   def getMetadata(sourceId: Long)                                                             = shard.read.futureAny { _.getMetadata(sourceId) }
29 |   def getMetadataForWrite(sourceId: Long)                                                     = shard.read.futureAny { _.getMetadataForWrite(sourceId) }
30 |   def selectByDestinationId(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor)   = shard.read.futureAny { _.selectByDestinationId(sourceId, states, count, cursor) }
31 |   def selectByPosition(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor)        = shard.read.futureAny { _.selectByPosition(sourceId, states, count, cursor) }
32 |   def selectEdges(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor)             = shard.read.futureAny { _.selectEdges(sourceId, states, count, cursor) }
33 |   def selectAll(cursor: (Cursor, Cursor), count: Int)                                         = shard.read.futureAny { _.selectAll(cursor, count) }
34 |   def selectAllMetadata(cursor: Cursor, count: Int)                                           = shard.read.futureAny { _.selectAllMetadata(cursor, count) }
35 |   def get(sourceId: Long, destinationId: Long)                                                = shard.read.futureAny { _.get(sourceId, destinationId) }
36 |   def count(sourceId: Long, states: Seq[State])                                               = shard.read.futureAny { _.count(sourceId, states) }
37 | 
38 |   def bulkUnsafeInsertEdges(edges: Seq[Edge])                                                 = Future.join(shard.write.fmap { _.bulkUnsafeInsertEdges(edges) })
39 |   def bulkUnsafeInsertMetadata(metadata: Seq[Metadata])                                       = Future.join(shard.write.fmap { _.bulkUnsafeInsertMetadata(metadata) })
40 | 
41 |   def writeCopies(edges: Seq[Edge])                                                           = Future.join(shard.write.fmap { _.writeCopies(edges) })
42 |   def writeMetadata(metadata: Metadata)                                                       = Future.join(shard.write.fmap { _.writeMetadata(metadata) })
43 |   def writeMetadatas(metadata: Seq[Metadata])                                                 = Future.join(shard.write.fmap { _.writeMetadatas(metadata) })
44 |   def updateMetadata(metadata: Metadata)                                                      = Future.join(shard.write.fmap { _.updateMetadata(metadata) })
45 |   def remove(sourceId: Long, updatedAt: Time)                                                 = Future.join(shard.write.fmap { _.remove(sourceId, updatedAt) })
46 |   def remove(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time)            = Future.join(shard.write.fmap { _.remove(sourceId, destinationId, position, updatedAt) })
47 |   def add(sourceId: Long, updatedAt: Time)                                                    = Future.join(shard.write.fmap { _.add(sourceId, updatedAt) })
48 |   def add(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time)               = Future.join(shard.write.fmap { _.add(sourceId, destinationId, position, updatedAt) })
49 |   def negate(sourceId: Long, updatedAt: Time)                                                 = Future.join(shard.write.fmap { _.negate(sourceId, updatedAt) })
50 |   def negate(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time)            = Future.join(shard.write.fmap { _.negate(sourceId, destinationId, position, updatedAt) })
51 |   def archive(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time)           = Future.join(shard.write.fmap { _.archive(sourceId, destinationId, position, updatedAt) })
52 |   def archive(sourceId: Long, updatedAt: Time)                                                = Future.join(shard.write.fmap { _.archive(sourceId, updatedAt) })
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/thrift/Flockdb.thrift:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | namespace java com.twitter.flockdb.thrift
 18 | namespace rb Flock.Edges
 19 | 
 20 | exception FlockException {
 21 |   1: string description
 22 | }
 23 | 
 24 | struct Results {
 25 |   # byte-packed list of i64, little-endian:
 26 |   1: binary ids
 27 |   2: i64 next_cursor
 28 |   3: i64 prev_cursor
 29 | }
 30 | 
 31 | # Set Cursor = -1 when requesting the first Page. Cursor = 0 indicates the end of the result set.
 32 | struct Page {
 33 |   1: i32 count
 34 |   2: i64 cursor
 35 | }
 36 | 
 37 | struct Metadata {
 38 |   1: i64 source_id
 39 |   2: i32 state_id
 40 |   3: i32 count
 41 |   4: i32 updated_at
 42 | }
 43 | 
 44 | struct Edge {
 45 |   1: i64 source_id
 46 |   2: i64 destination_id
 47 |   3: i64 position
 48 |   4: i32 updated_at
 49 |   5: i32 count
 50 |   6: i32 state_id
 51 | }
 52 | 
 53 | enum SelectOperationType {
 54 |   SimpleQuery = 1
 55 |   Intersection = 2
 56 |   Union = 3
 57 |   Difference = 4
 58 | }
 59 | 
 60 | # Add and Negate set an edge positive or negative, which are both "normal" states. You can use them
 61 | # to track 2 different "colors" of edge. Often, negative means private.
 62 | # Archive will change positive/negative edges to archived.
 63 | # Remove will change any edge to removed.
 64 | enum ExecuteOperationType {
 65 |   Add = 1
 66 |   Remove = 2
 67 |   Archive = 3
 68 |   Negate = 4
 69 | }
 70 | 
 71 | enum EdgeState {
 72 |   Positive = 0
 73 |   Negative = 3
 74 |   Removed = 1
 75 |   Archived = 2
 76 | }
 77 | 
 78 | # Basic FlockDB query term.
 79 | # Terms can query a specific edge in either direction, or a one-to-many edge in either direction.
 80 | # Wildcard queries can be specified by leaving `destination_ids` empty.
 81 | # Only edges matching the set of given `state_ids` are included.
 82 | #
 83 | # Examples:
 84 | #   (30, 2, true, [40], [Positive])
 85 | #       -- in graph 2, is there an edge from 30 -> 40?
 86 | #   (30, 1, false, [40, 50, 60], [Positive])
 87 | #       -- in graph 1, which of (40 -> 30, 50 -> 30, 60 -> 30) exist?
 88 | #   (30, 3, false, [], [Removed, Archived])
 89 | #       -- in graph 3, which edges point to -> 30, and are either removed or archived?
 90 | struct QueryTerm {
 91 |   1: i64 source_id
 92 |   2: i32 graph_id
 93 |   3: bool is_forward
 94 |   # byte-packed list of i64, little-endian. if not present, it means "all":
 95 |   4: optional binary destination_ids
 96 |   5: optional list<i32> state_ids
 97 | }
 98 | 
 99 | struct SelectOperation {
100 |   1: SelectOperationType operation_type
101 |   2: optional QueryTerm term
102 | }
103 | 
104 | enum Priority {
105 |   Low = 1
106 |   Medium = 2
107 |   High = 3
108 | }
109 | 
110 | struct ExecuteOperation {
111 |   1: ExecuteOperationType operation_type
112 |   2: QueryTerm term
113 |   3: optional i64 position
114 | }
115 | 
116 | struct ExecuteOperations {
117 |   1: list<ExecuteOperation> operations
118 |   2: optional i32 execute_at
119 |   3: Priority priority
120 | }
121 | 
122 | struct SelectQuery {
123 |   1: list<SelectOperation> operations
124 |   2: Page page
125 | }
126 | 
127 | struct EdgeQuery {
128 |   1: QueryTerm term
129 |   2: Page page
130 | }
131 | 
132 | struct EdgeResults {
133 |   1: list<Edge> edges
134 |   2: i64 next_cursor
135 |   3: i64 prev_cursor
136 | }
137 | 
138 | service FlockDB {
139 |   # return true if the edge exists.
140 |   bool contains(1: i64 source_id, 2: i32 graph_id, 3: i64 destination_id) throws(1: FlockException ex)
141 |   
142 |   # return all data about an edge if it exists (otherwise, throw an exception).
143 |   Edge get(1: i64 source_id, 2: i32 graph_id, 3: i64 destination_id) throws(1: FlockException ex)
144 | 
145 |   # return all data about a node if it exists (otherwise, throw an exception).
146 |   Metadata get_metadata(1: i64 source_id, 2: i32 graph_id) throws(1: FlockException ex)
147 |   
148 |   # return true if the node exists.
149 |   bool contains_metadata(1: i64 source_id, 2: i32 graph_id) throws(1: FlockException ex)
150 | 
151 |   # perform a list of queries in parallel. each query may be paged, and may be compound.
152 |   list<Results> select2(1: list<SelectQuery> queries) throws(1: FlockException ex)
153 | 
154 |   # perform a list of queries in parallel, and return the counts of results.
155 |   # if the queries are compound, the counts will be estimates.
156 |   binary count2(1: list<list<SelectOperation>> queries) throws(1: FlockException ex)
157 | 
158 |   # perferm a list of simple queries and return the results as full Edge objects.
159 |   # compound queries are not supported.
160 |   list<EdgeResults> select_edges(1: list<EdgeQuery> queries) throws(1: FlockException ex)
161 | 
162 |   void execute(1: ExecuteOperations operations) throws(1: FlockException ex)
163 | 
164 |   # deprecated:
165 |   i32 count(1: list<SelectOperation> operations) throws(1: FlockException ex)
166 |   Results select(1: list<SelectOperation> operations, 2: Page page) throws(1: FlockException ex)
167 | }
168 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/integration/BlackHoleLockingRegressionSpec.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | package integration
 19 | 
 20 | import com.twitter.gizzard.scheduler.{JsonJob, PrioritizingJobScheduler}
 21 | import com.twitter.gizzard.shards.{ShardInfo, ShardId, Busy}
 22 | import com.twitter.gizzard.nameserver.Forwarding
 23 | import com.twitter.util.Time
 24 | import com.twitter.conversions.time._
 25 | import org.specs.mock.{ClassMocker, JMocker}
 26 | import com.twitter.flockdb
 27 | import com.twitter.flockdb.config.{FlockDB => FlockDBConfig}
 28 | import shards.{Shard, SqlShard}
 29 | 
 30 | 
 31 | class BlackHoleLockingRegressionSpec extends IntegrationSpecification {
 32 |   override def reset(config: FlockDBConfig, name: String) {
 33 |     materialize(config)
 34 |     flock.nameServer.reload()
 35 | 
 36 |     val queryEvaluator     = config.edgesQueryEvaluator()(config.databaseConnection)
 37 | 
 38 |     for (graph <- (1 until 10)) {
 39 |       Seq("forward", "backward").foreach { direction =>
 40 |         val tableId = if (direction == "forward") graph else graph * -1
 41 |         if (direction == "forward") {
 42 |           val shardId1 = ShardId("localhost", direction + "_" + graph + "_a")
 43 |           val shardId2 = ShardId("localhost", direction + "_" + graph + "_b")
 44 |           val replicatingShardId = ShardId("localhost", "replicating_" + direction + "_" + graph)
 45 | 
 46 |           flock.shardManager.createAndMaterializeShard(ShardInfo(shardId1,
 47 |             "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal))
 48 |           flock.shardManager.createAndMaterializeShard(ShardInfo(shardId2,
 49 |             "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal))
 50 |           flock.shardManager.createAndMaterializeShard(ShardInfo(replicatingShardId,
 51 |             "ReplicatingShard", "", "", Busy.Normal))
 52 |           flock.shardManager.addLink(replicatingShardId, shardId1, 1)
 53 |           flock.shardManager.addLink(replicatingShardId, shardId2, 1)
 54 |           flock.shardManager.setForwarding(Forwarding(tableId, 0, replicatingShardId))
 55 | 
 56 |           queryEvaluator.execute("DELETE FROM " + direction + "_" + graph + "_a_edges")()
 57 |           queryEvaluator.execute("DELETE FROM " + direction + "_" + graph + "_a_metadata")()
 58 |           queryEvaluator.execute("DELETE FROM " + direction + "_" + graph + "_b_edges")()
 59 |           queryEvaluator.execute("DELETE FROM " + direction + "_" + graph + "_b_metadata")()
 60 |         } else {
 61 |           val shardId1 = ShardId("localhost", direction + "_" + graph + "_replicating")
 62 |           val shardId2 = ShardId("localhost", direction + "_" + graph + "_a")
 63 |           val shardId3 = ShardId("localhost", direction + "_" + graph + "_b")
 64 |           flock.shardManager.createAndMaterializeShard(ShardInfo(shardId1, "ReplicatingShard", "", "", Busy.Normal))
 65 |           flock.shardManager.createAndMaterializeShard(ShardInfo(shardId2, name, "", "", Busy.Normal))
 66 |           flock.shardManager.createAndMaterializeShard(ShardInfo(shardId3,
 67 |             "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal))
 68 | 
 69 |           flock.shardManager.addLink(shardId1, shardId2, 1)
 70 |           flock.shardManager.addLink(shardId2, shardId3, 1)
 71 |           flock.shardManager.setForwarding(Forwarding(tableId, 0, shardId1))
 72 |         }
 73 |       }
 74 |     }
 75 | 
 76 |     flock.nameServer.reload()
 77 |   }
 78 | 
 79 |   val alice = 1L
 80 |   val FOLLOWS = 1
 81 |   val pageSize = 100
 82 | 
 83 |   def alicesFollowings() = {
 84 |     val term = QueryTerm(alice, FOLLOWS, true, None, List(State.Normal))
 85 |     val query = EdgeQuery(term, new Page(pageSize, Cursor.Start))
 86 |     val resultsList = flockService.selectEdges(List(query))()
 87 |     resultsList.size mustEqual 1
 88 |     resultsList(0).toList
 89 |   }
 90 | 
 91 |   "select results" should {
 92 |     "black hole" in {
 93 |       reset(config, "com.twitter.gizzard.shards.BlackHoleShard")  // I don't know why this isn't working in doBefore
 94 | 
 95 |       for(i <- 0 until 10) {
 96 |         execute(Select(alice, FOLLOWS, i).add)
 97 |       }
 98 | 
 99 |       alicesFollowings.size must eventually(be(10))
100 |     }
101 |   }
102 | 
103 |   "select results" should {
104 |     "read-only" in {
105 |       reset(config, "com.twitter.gizzard.shards.ReadOnlyShard")  // I don't know why this isn't working in doBefore
106 | 
107 |       for(i <- 0 until 10) {
108 |         execute(Select(alice, FOLLOWS, i).add)
109 |       }
110 | 
111 |       val scheduler = flock.jobScheduler(flockdb.Priority.High.id)
112 |       val errors = scheduler.errorQueue
113 |       errors.size must eventually(be(10))
114 |     }
115 |   }
116 | 
117 |   "select results" should {
118 |     "write-only" in {
119 |       reset(config, "com.twitter.gizzard.shards.WriteOnlyShard")  // I don't know why this isn't working in doBefore
120 | 
121 |       for(i <- 0 until 10) {
122 |         execute(Select(alice, FOLLOWS, i).add)
123 |       }
124 | 
125 |       val scheduler = flock.jobScheduler(flockdb.Priority.High.id)
126 |       val errors = scheduler.errorQueue
127 |       alicesFollowings.size must eventually(be(10))
128 |     }
129 |   }
130 | 
131 | }
132 | 
133 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/shards/Optimism.scala:
--------------------------------------------------------------------------------
  1 | package com.twitter.flockdb
  2 | package shards
  3 | 
  4 | import com.twitter.util.{Time, Try, Return, Throw}
  5 | import com.twitter.logging.Logger
  6 | import com.twitter.gizzard.shards.{ShardException, NodeSet}
  7 | 
  8 | class OptimisticLockException(message: String) extends ShardException(message)
  9 | 
 10 | /**
 11 |  * This handles a new optimistic lock of one direction of an edge.  In order to lock an
 12 |  * entire edge, you should nest two of these--one for each direction.  This lock is *only*
 13 |  * responsible for maintaining color/state consistency.
 14 |  *
 15 |  * Inconsistencies between replicas are handled by consensus.  The most recent and therefore
 16 |  * highest priority row wins.  It helps to visualize each replication set as a unified state
 17 |  * of the union of all rows, taking the highest priority rows where conflicts happen.  If two
 18 |  * rows are different, and we play an operation, that operation can't make things worse, because
 19 |  * it will move the timeline forwards.
 20 |  *
 21 |  * This lock does a read across every available replica. If any read fails, the entire
 22 |  * lock fails (i.e. reenqueues happen).  However, the writes will still proceed first.  We
 23 |  * want to propagate writes, as they're "better" than the old state, while still not
 24 |  * neccessarily being the final state.
 25 |  *
 26 |  * There would be two ways to make things worse:
 27 |  * 1. regress the timeline (by writing an old operation, which is impossible under the contract
 28 |  *    of the shards themselves)
 29 |  * 2. write a color that doesn't represent the current state of the consensus at some time at or
 30 |  *    after the row is written.  The key case to worry about here is when the consensus looks like
 31 |  *    Seq(old_row, Offline(new_row)).  In this case, the non-unanimous consensus would write the
 32 |  *    wrong color.  We require that the consensus be unanimous for this reason.
 33 |  *
 34 |  * The current iteration of the optimism branch is (perhaps overly) conservative by maintaining
 35 |  * a lock over all replicas. We could choose to go a couple different ways from here:
 36 |  *
 37 |  * 1. Hold the course.  This works.
 38 |  * 2. Only lock over (any) one server of each replication set (so two total--one forwards, one
 39 |  *    backwards).  This would reduce the number of reads.  If our data is (a) eventually-but-not-
 40 |  *    yet-consistent and not just corrupted, and (b) two copies of a node have different states,
 41 |  *    then the job to update the state of the node to the newer state is still active.  If we write
 42 |  *    the wrong edge color based upon the old node, it is true that there is a multi job in some
 43 |  *    presently existing queue that will fix the edge.
 44 |  * 3. Do read repair.  When we do the optimistic lock read over all replicas, we're already grabbing
 45 |  *    all of the information we need to repair the graphs.  Instead of just throwing an exception,
 46 |  *    or ignoring the inconsistency, we could enqueue the appropriate multi job to perform the repair.
 47 |  *    My chief concern would be that we could issue a storm of multi jobs.
 48 |  *
 49 |  * Eventually, we will probably choose (2) or (3), depending on how we weigh the tradeoff between
 50 |  * repairing consistency and raw performance.  However, we should stick to (1) for now, and evaluate
 51 |  * the other options soon.
 52 |  *
 53 |  * Also, in the short term, it's worth understanding (2), so that you can realize that adding
 54 |  * replicas doesn't screw things up.
 55 |  */
 56 | // TODO: Make this async.
 57 | trait OptimisticStateMonitor {
 58 | 
 59 |   def getMetadatas(sourceId: Long): Seq[Try[Option[Metadata]]]
 60 | 
 61 |   // implementation
 62 | 
 63 |   private val log = Logger.get(getClass.getName)
 64 | 
 65 |   def optimistically(sourceId: Long)(f: State => Unit) = {
 66 |     try {
 67 |       log.debug("Optimistic Lock: starting optimistic lock for " + sourceId)
 68 | 
 69 |       val (beforeStateOpt, beforeEx) = getDominantState(sourceId)
 70 |       val beforeState = beforeStateOpt.getOrElse(State.Normal)
 71 | 
 72 |       if (beforeStateOpt.isEmpty) beforeEx.foreach(throw _)
 73 | 
 74 |       f(beforeState)
 75 | 
 76 |       // We didn't do this immediately if we got a result from one shard, because we still want to propagate writes with best effort.
 77 |       // We should reenqueue if the optimistic lock only covers a subset of the intended targets.
 78 |       beforeEx.foreach(throw _)
 79 | 
 80 |       val (afterStateOpt, afterEx) = getDominantState(sourceId)
 81 |       val afterState = afterStateOpt.getOrElse(State.Normal)
 82 | 
 83 |       afterEx.foreach(throw _)
 84 | 
 85 |       if(beforeState != afterState) {
 86 |         val msg = "Optimistic Lock: lost optimistic lock for " + sourceId + ": was " + beforeState +", now " + afterState
 87 | 
 88 |         log.debug(msg)
 89 |         throw new OptimisticLockException(msg)
 90 |       }
 91 | 
 92 |       log.debug("Optimistic Lock: successful optimistic lock for " + sourceId)
 93 | 
 94 |     } catch {
 95 |       case e => {
 96 |         log.debug("Optimistic Lock: exception in optimistic lock for " + sourceId + ": " + e.getMessage)
 97 |         throw e
 98 |       }
 99 |     }
100 |   }
101 | 
102 |   def getDominantState(sourceId: Long) = {
103 |     // The default metadata
104 |     var winning: Option[Metadata]   = None
105 |     var exceptions: List[Throwable] = Nil
106 | 
107 |     getMetadatas(sourceId).foreach {
108 |       case Throw(ex)              => exceptions = ex :: exceptions
109 |       case Return(Some(metadata)) => winning    = winning.map(_ max metadata).orElse(Some(metadata))
110 |       case Return(None)           => ()
111 |     }
112 | 
113 |     (winning.map(_.state), exceptions.headOption)
114 |   }
115 | }
116 | 
117 | object LockingNodeSet {
118 |   implicit def asLockingNodeSet(n: NodeSet[Shard]) = new LockingNodeSet(n)
119 | }
120 | 
121 | class LockingNodeSet(node: NodeSet[Shard]) extends OptimisticStateMonitor {
122 |   def getMetadatas(id: Long) = node.all { _.getMetadataForWrite(id)() }
123 | }
124 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/LegacyJobParserSpec.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb.unit
 18 | 
 19 | import com.twitter.util.Time
 20 | import com.twitter.gizzard.scheduler.JsonCodec
 21 | import com.twitter.flockdb.ConfiguredSpecification
 22 | import com.twitter.flockdb.{Direction, State, Priority}
 23 | import com.twitter.flockdb.jobs.single.Single
 24 | import com.twitter.flockdb.jobs.multi.Multi
 25 | import com.twitter.flockdb.jobs._
 26 | 
 27 | 
 28 | class LegacyJobParserSpec extends ConfiguredSpecification {
 29 | 
 30 |   val updatedAt = Time.fromSeconds(1111)
 31 |   val codec = new JsonCodec(_ => ())
 32 | 
 33 |   codec += ("com.twitter.flockdb.jobs.single.Add".r,      LegacySingleJobParser.Add(null, null))
 34 |   codec += ("com.twitter.flockdb.jobs.single.Remove".r,   LegacySingleJobParser.Remove(null, null))
 35 |   codec += ("com.twitter.flockdb.jobs.single.Negate".r,   LegacySingleJobParser.Negate(null, null))
 36 |   codec += ("com.twitter.flockdb.jobs.single.Archive".r,  LegacySingleJobParser.Archive(null, null))
 37 |   codec += ("com.twitter.flockdb.jobs.multi.Archive".r,   LegacyMultiJobParser.Archive(null, null, 500))
 38 |   codec += ("com.twitter.flockdb.jobs.multi.Unarchive".r, LegacyMultiJobParser.Unarchive(null, null, 500))
 39 |   codec += ("com.twitter.flockdb.jobs.multi.RemoveAll".r, LegacyMultiJobParser.RemoveAll(null, null, 500))
 40 |   codec += ("com.twitter.flockdb.jobs.multi.Negate".r,    LegacyMultiJobParser.Negate(null, null, 500))
 41 | 
 42 |   "LegacySingleJobParser" should {
 43 |     "correctly generate a new style job from an old serialized Add job" in {
 44 |       val map = Map(
 45 |         "com.twitter.flockdb.jobs.single.Add" -> Map(
 46 |           "source_id" -> 22,
 47 |           "graph_id" -> 1,
 48 |           "destination_id" -> 11,
 49 |           "position" -> 1111,
 50 |           "updated_at" -> 1111
 51 |         )
 52 |       )
 53 | 
 54 |       codec.inflate(map) mustEqual new Single(22, 1, 11, 1111, State.Normal, updatedAt, null, null)
 55 |     }
 56 | 
 57 |     "correctly generate a new style job from an old serialized Remove job" in {
 58 |       val map = Map(
 59 |         "com.twitter.flockdb.jobs.single.Remove" -> Map(
 60 |           "source_id" -> 22,
 61 |           "graph_id" -> 1,
 62 |           "destination_id" -> 11,
 63 |           "position" -> 1111,
 64 |           "updated_at" -> 1111
 65 |         )
 66 |       )
 67 | 
 68 |       codec.inflate(map) mustEqual new Single(22, 1, 11, 1111, State.Removed, updatedAt, null, null)
 69 |     }
 70 | 
 71 |     "correctly generate a new style job from an old serialized Negate job" in {
 72 |       val map = Map(
 73 |         "com.twitter.flockdb.jobs.single.Negate" -> Map(
 74 |           "source_id" -> 22,
 75 |           "graph_id" -> 1,
 76 |           "destination_id" -> 11,
 77 |           "position" -> 1111,
 78 |           "updated_at" -> 1111
 79 |         )
 80 |       )
 81 | 
 82 |       codec.inflate(map) mustEqual new Single(22, 1, 11, 1111, State.Negative, updatedAt, null, null)
 83 |     }
 84 | 
 85 |     "correctly generate a new style job from an old serialized Archive job" in {
 86 |       val map = Map(
 87 |         "com.twitter.flockdb.jobs.single.Archive" -> Map(
 88 |           "source_id" -> 22,
 89 |           "graph_id" -> 1,
 90 |           "destination_id" -> 11,
 91 |           "position" -> 1111,
 92 |           "updated_at" -> 1111
 93 |         )
 94 |       )
 95 | 
 96 |       codec.inflate(map) mustEqual new Single(22, 1, 11, 1111, State.Archived, updatedAt, null, null)
 97 |     }
 98 |   }
 99 | 
100 |   "LegacyMultiJobParser" should {
101 |     "correctly generate a new style job from an old serialized Archive job" in {
102 |       val map = Map(
103 |         "com.twitter.flockdb.jobs.multi.Archive" -> Map(
104 |           "source_id" -> 22,
105 |           "graph_id" -> 1,
106 |           "direction" -> 0,
107 |           "updated_at" -> 1111,
108 |           "priority" -> 1
109 |         )
110 |       )
111 | 
112 |       val job = new Multi(22, 1, Direction.Forward, State.Archived, updatedAt, Priority.Low, 500, null, null)
113 | 
114 |       codec.inflate(map) mustEqual job
115 |     }
116 | 
117 |     "correctly generate a new style job from an old serialized Unarchive job" in {
118 |       val map = Map(
119 |         "com.twitter.flockdb.jobs.multi.Unarchive" -> Map(
120 |           "source_id" -> 22,
121 |           "graph_id" -> 1,
122 |           "direction" -> 0,
123 |           "updated_at" -> 1111,
124 |           "priority" -> 1
125 |         )
126 |       )
127 | 
128 |       val job = new Multi(22, 1, Direction.Forward, State.Normal, updatedAt, Priority.Low, 500, null, null)
129 | 
130 |       codec.inflate(map) mustEqual job
131 |     }
132 | 
133 |     "correctly generate a new style job from an old serialized RemoveAll job" in {
134 |       val map = Map(
135 |         "com.twitter.flockdb.jobs.multi.RemoveAll" -> Map(
136 |           "source_id" -> 22,
137 |           "graph_id" -> 1,
138 |           "direction" -> 0,
139 |           "updated_at" -> 1111,
140 |           "priority" -> 1
141 |         )
142 |       )
143 | 
144 |       val job = new Multi(22, 1, Direction.Forward, State.Removed, updatedAt, Priority.Low, 500, null, null)
145 | 
146 |       codec.inflate(map) mustEqual job
147 |     }
148 | 
149 |     "correctly generate a new style job from an old serialized Negate job" in {
150 |       val map = Map(
151 |         "com.twitter.flockdb.jobs.multi.Negate" -> Map(
152 |           "source_id" -> 22,
153 |           "graph_id" -> 1,
154 |           "direction" -> 0,
155 |           "updated_at" -> 1111,
156 |           "priority" -> 1
157 |         )
158 |       )
159 | 
160 |       val job = new Multi(22, 1, Direction.Forward, State.Negative, updatedAt, Priority.Low, 500, null, null)
161 | 
162 |       codec.inflate(map) mustEqual job
163 |     }
164 |   }
165 | }
166 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/JobSpec.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | package unit
 19 | 
 20 | import scala.collection.mutable
 21 | import com.twitter.gizzard.scheduler.{JsonJob, PrioritizingJobScheduler}
 22 | import com.twitter.gizzard.shards._
 23 | import com.twitter.util.{Future, Time}
 24 | import com.twitter.util.TimeConversions._
 25 | import org.specs.mock.{ClassMocker, JMocker}
 26 | import com.twitter.flockdb
 27 | import flockdb.Direction._
 28 | import flockdb.State._
 29 | import shards.{Shard, SqlShard, ReadWriteShardAdapter, OptimisticLockException}
 30 | import jobs.single.Single
 31 | import jobs.multi.Multi
 32 | 
 33 | class JobSpec extends ConfiguredSpecification with JMocker with ClassMocker {
 34 |   val FOLLOWS = 1
 35 | 
 36 |   val bob   = 1L
 37 |   val mary  = 23L
 38 |   val carl  = 42L
 39 |   val jane  = 56L
 40 |   val darcy = 62L
 41 | 
 42 |   val uuidGenerator     = IdentityUuidGenerator
 43 |   val forwardingManager = mock[ForwardingManager]
 44 |   val mocks             = (0 to 3) map { _ => mock[Shard] }
 45 | 
 46 |   // allow the readwrite shard adapter to implement optimistically
 47 |   val shards    = mocks map { m => LeafRoutingNode(m) }
 48 |   val scheduler = mock[PrioritizingJobScheduler]
 49 | 
 50 |   def test(
 51 |     desc: String,
 52 |     jobState: State,
 53 |     bobBefore: State,
 54 |     maryBefore: State,
 55 |     bobAfter: State,
 56 |     maryAfter: State,
 57 |     applied: State,
 58 |     f: jobs.single.Single => Unit
 59 |   ) = {
 60 |     desc in {
 61 |       Time.withCurrentTimeFrozen { time =>
 62 |         val job = new Single(bob, FOLLOWS, mary, 1, jobState, Time.now, forwardingManager, uuidGenerator)
 63 | 
 64 |         expect {
 65 |           allowing(forwardingManager).findNode(bob, FOLLOWS, Forward) willReturn shards(0)
 66 |           allowing(forwardingManager).findNode(mary, FOLLOWS, Backward) willReturn shards(1)
 67 | 
 68 |           // Before
 69 |           one(mocks(0)).getMetadataForWrite(bob) willReturn Future(Some(new Metadata(bob, bobBefore, 1, Time.now - 1.second)))
 70 |           one(mocks(1)).getMetadataForWrite(mary) willReturn Future(Some(new Metadata(mary, maryBefore, 1, Time.now - 1.second)))
 71 | 
 72 |           // After
 73 |           allowing(mocks(0)).getMetadataForWrite(bob) willReturn Future(Some(new Metadata(mary, bobAfter, 1, Time.now)))
 74 |           allowing(mocks(1)).getMetadataForWrite(mary) willReturn Future(Some(new Metadata(mary, maryAfter, 1, Time.now)))
 75 | 
 76 |           // Results
 77 |           applied match {
 78 |             case Normal => {
 79 |               one(mocks(0)).add(bob, mary, 1, Time.now)()
 80 |               one(mocks(1)).add(mary, bob, 1, Time.now)()
 81 |             }
 82 |             case Archived => {
 83 |               one(mocks(0)).archive(bob, mary, 1, Time.now)()
 84 |               one(mocks(1)).archive(mary, bob, 1, Time.now)()
 85 |             }
 86 |             case Removed => {
 87 |               one(mocks(0)).remove(bob, mary, 1, Time.now)()
 88 |               one(mocks(1)).remove(mary, bob, 1, Time.now)()
 89 |             }
 90 |           }
 91 |         }
 92 | 
 93 |         f(job)
 94 |       }
 95 |     }
 96 |   }
 97 | 
 98 |   "Single" should {
 99 |     "toJson" in {
100 |       Time.withCurrentTimeFrozen { time =>
101 |         val job = new Single(bob, FOLLOWS, mary, 1, State.Normal, Time.now, forwardingManager, uuidGenerator)
102 |         val json = job.toJson
103 |         json mustMatch "Single"
104 |         json mustMatch "\"source_id\":" + bob
105 |         json mustMatch "\"graph_id\":" + FOLLOWS
106 |         json mustMatch "\"destination_id\":" + mary
107 |         json mustMatch "\"state\":"
108 |         json mustMatch "\"updated_at\":" + Time.now.inSeconds
109 |       }
110 |     }
111 | 
112 |     "toJson with successes" in {
113 |       Time.withCurrentTimeFrozen { time =>
114 |         val job = new Single(bob, FOLLOWS, mary, 1, State.Normal, Time.now, forwardingManager, uuidGenerator, List(ShardId("host", "prefix")))
115 |         val json = job.toJson
116 |         json mustMatch "Single"
117 |         json mustMatch "\"source_id\":" + bob
118 |         json mustMatch "\"graph_id\":" + FOLLOWS
119 |         json mustMatch "\"destination_id\":" + mary
120 |         json mustMatch "\"updated_at\":" + Time.now.inSeconds
121 |         json must include("\"write_successes\":[[\"host\",\"prefix\"]]")
122 |       }
123 |     }
124 |   }
125 | 
126 |   "Add" should {
127 |     //                         Input   Before            After             Resulting
128 |     //                         Job     Bob     Mary      Bob     Mary      Job
129 |     test("normal add",         Normal, Normal, Normal,   Normal, Normal,   Normal, _.apply)
130 |     test("lost lock add",      Normal, Normal, Normal,   Normal, Archived, Normal, _.apply must throwA[OptimisticLockException])
131 |     test("when bob archived",  Normal, Archived, Normal, Archived, Normal, Archived, _.apply)
132 |     test("when mary archived", Normal, Normal, Archived, Normal, Archived, Archived, _.apply)
133 |   }
134 | 
135 |   "Remove" should {
136 |     //                         Input    Before            After             Resulting
137 |     //                         Job      Bob     Mary      Bob     Mary      Job
138 |     test("normal remove",      Removed, Normal, Normal,   Normal, Normal,   Removed, _.apply)
139 |   }
140 | 
141 |   "Archive" should {
142 |     //                         Input     Before             After             Resulting
143 |     //                         Job       Bob     Mary       Bob     Mary      Job
144 |     test("normal archive",     Archived, Normal, Normal,    Normal, Normal,   Archived, _.apply)
145 |     test("archive removed",    Archived, Normal, Removed,   Normal, Removed,  Removed, _.apply)
146 |     test("archive removed",    Archived, Removed, Normal,   Removed, Normal,  Removed, _.apply)
147 |   }
148 | 
149 | 
150 |   "Multi" should {
151 |     "toJson" in {
152 |       Time.withCurrentTimeFrozen { time =>
153 |         val job = new Multi(bob, FOLLOWS, Direction.Forward, State.Normal, Time.now, Priority.Low, 500, null, null)
154 |         val json = job.toJson
155 |         json mustMatch "Multi"
156 |         json mustMatch "\"source_id\":" + bob
157 |         json mustMatch "\"updated_at\":" + Time.now.inSeconds
158 |         json mustMatch "\"graph_id\":" + FOLLOWS
159 |         json mustMatch "\"direction\":" + Direction.Forward.id
160 |         json mustMatch "\"priority\":" + Priority.Low.id
161 |         json mustMatch "\"state\":" + State.Normal.id
162 |         json mustMatch "\"cursor\":" + Cursor.Start.position
163 |       }
164 |     }
165 |   }
166 | }
167 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/integration/CopySpec.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | package integration
 19 | 
 20 | import scala.collection.JavaConversions._
 21 | import scala.collection.mutable
 22 | import com.twitter.gizzard.thrift.conversions.ShardInfo._
 23 | import com.twitter.gizzard.scheduler.{JsonJob, PrioritizingJobScheduler}
 24 | import com.twitter.gizzard.thrift.conversions.Sequences._
 25 | import com.twitter.gizzard.shards.{ShardInfo, ShardId, Busy, RoutingNode}
 26 | import com.twitter.gizzard.nameserver.Forwarding
 27 | import com.twitter.util.Time
 28 | import com.twitter.util.TimeConversions._
 29 | import org.specs.util.{Duration => SpecsDuration}
 30 | import org.specs.mock.{ClassMocker, JMocker}
 31 | import com.twitter.flockdb
 32 | import com.twitter.flockdb.{Edge, Metadata}
 33 | import com.twitter.flockdb.config.{FlockDB => FlockDBConfig}
 34 | import shards.{Shard, SqlShard}
 35 | import thrift._
 36 | 
 37 | class CopySpec extends IntegrationSpecification {
 38 |   "Copy" should {
 39 |     val sourceShardId = ShardId("localhost", "copy_test1")
 40 |     val destinationShardId = ShardId("localhost", "copy_test2")
 41 |     val shard3Id = ShardId("localhost", "copy_test3")
 42 |     val sourceShardInfo = new ShardInfo(sourceShardId,
 43 |             "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal)
 44 |     val destinationShardInfo = new ShardInfo(destinationShardId,
 45 |             "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal)
 46 |     val shard3Info = new ShardInfo(shard3Id,
 47 |           "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal)
 48 |     val time = Time.now
 49 | 
 50 |     doBefore {
 51 |       val queryEvaluator = config.edgesQueryEvaluator()(config.databaseConnection)
 52 | 
 53 |       queryEvaluator.execute("DROP TABLE IF EXISTS copy_test1_edges")()
 54 |       queryEvaluator.execute("DROP TABLE IF EXISTS copy_test1_metadata")()
 55 |       queryEvaluator.execute("DROP TABLE IF EXISTS copy_test2_edges")()
 56 |       queryEvaluator.execute("DROP TABLE IF EXISTS copy_test2_metadata")()
 57 |       queryEvaluator.execute("DROP TABLE IF EXISTS copy_test3_edges")()
 58 |       queryEvaluator.execute("DROP TABLE IF EXISTS copy_test3_metadata")()
 59 |       flock.nameServer.reload()
 60 |       flock.shardManager.createAndMaterializeShard(sourceShardInfo)
 61 |       flock.shardManager.createAndMaterializeShard(destinationShardInfo)
 62 |       flock.shardManager.createAndMaterializeShard(shard3Info)
 63 |       flock.shardManager.setForwarding(new Forwarding(0, Long.MinValue, sourceShardInfo.id))
 64 | 
 65 |     }
 66 | 
 67 |     doAfter {
 68 |        val queryEvaluator = config.edgesQueryEvaluator()(config.databaseConnection)
 69 |        queryEvaluator.execute("DROP TABLE IF EXISTS copy_test1_edges")()
 70 |        queryEvaluator.execute("DROP TABLE IF EXISTS copy_test1_metadata")()
 71 |        queryEvaluator.execute("DROP TABLE IF EXISTS copy_test2_edges")()
 72 |        queryEvaluator.execute("DROP TABLE IF EXISTS copy_test2_metadata")()
 73 |        queryEvaluator.execute("DROP TABLE IF EXISTS copy_test3_edges")()
 74 |        queryEvaluator.execute("DROP TABLE IF EXISTS copy_test3_metadata")()
 75 |     }
 76 | 
 77 | 
 78 |     def writeEdges(shard: RoutingNode[Shard], num: Int, start: Int, step: Int, outdated: Boolean, state: State = State.Normal) {
 79 |       val edges = for (id <- start to num by step) yield {
 80 |         Edge(1L, id.toLong, id.toLong, (if (outdated) time-1.seconds else time), 0, state)
 81 |       }
 82 | 
 83 |       shard.write.foreach { _.writeCopies(edges)() }
 84 |     }
 85 | 
 86 |     def getEdges(shard: RoutingNode[Shard], num: Int) {
 87 |       shard.read.any { _.count(1L, Seq(State.Normal))() } mustEqual num
 88 |     }
 89 | 
 90 |     def validateEdges(shards: Seq[RoutingNode[Shard]], num: Int) {
 91 |       playScheduledJobs()
 92 | 
 93 |       val shardsEdges = shards map { _.read.any { _.selectAll((Cursor.Start, Cursor.Start), 2*num)()._1 } }
 94 |       shardsEdges.foreach { _.length mustEqual num }
 95 | 
 96 |       for (idx <- 0 until num) {
 97 |         val head :: others = shardsEdges
 98 | 
 99 |         others foreach { edges =>
100 |           head zip edges foreach { case (a, b) =>
101 |             a mustEqual b
102 |             b.updatedAt.inSeconds mustEqual time.inSeconds
103 |           }
104 |         }
105 |       }
106 |     }
107 | 
108 |     "do nothing on equivalent shards" in {
109 |       val numData = 100
110 |       val shard1 = flock.nameServer.findShardById[Shard](sourceShardId)
111 |       val shard2 = flock.nameServer.findShardById[Shard](destinationShardId)
112 |       writeEdges(shard1, numData, 1, 1, false)
113 |       writeEdges(shard2, numData, 1, 1, false)
114 | 
115 |       flock.managerServer.copy_shard(Seq(sourceShardInfo.toThrift.id, destinationShardInfo.toThrift.id))
116 | 
117 |       validateEdges(Seq(shard1, shard2), numData)
118 |     }
119 | 
120 |     "copy" in {
121 |       val numData = 100
122 |       val shard1 = flock.nameServer.findShardById[Shard](sourceShardId)
123 |       val shard2 = flock.nameServer.findShardById[Shard](destinationShardId)
124 |       writeEdges(shard1, numData, 1, 1, false)
125 | 
126 |       flock.managerServer.copy_shard(Seq(sourceShardInfo.toThrift.id, destinationShardInfo.toThrift.id))
127 | 
128 |       validateEdges(Seq(shard1, shard2), numData)
129 |     }
130 | 
131 |     "repair by merging" in {
132 |       val numData = 100
133 |       val shard1 = flock.nameServer.findShardById[Shard](sourceShardId)
134 |       val shard2 = flock.nameServer.findShardById[Shard](destinationShardId)
135 |       writeEdges(shard1, numData, 1, 2, false)
136 |       writeEdges(shard2, numData, 2, 2, false)
137 | 
138 |       flock.managerServer.copy_shard(Seq(sourceShardInfo.toThrift.id, destinationShardInfo.toThrift.id))
139 | 
140 |       validateEdges(Seq(shard1, shard2), numData)
141 |     }
142 | 
143 |     "repair and fill out of date" in {
144 |       val numData = 100
145 | 
146 |       val shard1 = flock.nameServer.findShardById[Shard](sourceShardId)
147 |       val shard2 = flock.nameServer.findShardById[Shard](destinationShardId)
148 |       val shard3 = flock.nameServer.findShardById[Shard](shard3Id)
149 | 
150 |       writeEdges(shard1, numData, 1, 2, false)
151 |       writeEdges(shard2, numData/2, 2, 2, false)
152 |       writeEdges(shard2, numData/2, 1, 2, true)
153 |       writeEdges(shard2, numData, numData/2, 1, false)
154 |       writeEdges(shard3, numData, 1, 3, true, State.Archived)
155 | 
156 |       shard1.write.foreach { _.writeMetadata(Metadata(1L, State.Normal, time))() }
157 |       shard2.write.foreach { _.writeMetadata(Metadata(1L, State.Normal, time))() }
158 |       shard3.write.foreach { _.writeMetadata(Metadata(1L, State.Archived, (time - 1.seconds)) )() }
159 | 
160 |       flock.managerServer.copy_shard(Seq(sourceShardInfo.toThrift.id, destinationShardInfo.toThrift.id, shard3Info.toThrift.id))
161 |       validateEdges(Seq(shard1, shard2, shard3), numData)
162 |     }
163 |   }
164 | }
165 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/EdgesService.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | 
 19 | import com.twitter.logging.Logger
 20 | import com.twitter.gizzard.Stats
 21 | import com.twitter.gizzard.nameserver.{NameServer, NonExistentShard, InvalidShard}
 22 | import com.twitter.gizzard.scheduler.{CopyJobFactory, JsonJob, PrioritizingJobScheduler}
 23 | import com.twitter.gizzard.shards._
 24 | import com.twitter.flockdb.operations.{ExecuteOperations, SelectOperation}
 25 | import com.twitter.flockdb.queries._
 26 | import com.twitter.flockdb.thrift.FlockException
 27 | import com.twitter.util.Future
 28 | 
 29 | class EdgesService(
 30 |   forwardingManager: ForwardingManager,
 31 |   schedule: PrioritizingJobScheduler,
 32 |   intersectionQueryConfig: config.IntersectionQuery,
 33 |   aggregateJobsPageSize: Int) {
 34 | 
 35 |   // TODO: Make serverName configurable.
 36 |   private val serverName = "edges"
 37 |   private val log = Logger.get(getClass.getName)
 38 |   private val exceptionLog = Logger.get("exception")
 39 |   private val selectCompiler = new SelectCompiler(forwardingManager, intersectionQueryConfig)
 40 |   private var executeCompiler = new ExecuteCompiler(schedule, forwardingManager, aggregateJobsPageSize)
 41 | 
 42 |   def containsMetadata(sourceId: Long, graphId: Int): Future[Boolean] = {
 43 |     wrapRPC("contains_metadata") {
 44 |       val name = "contains-metadata"
 45 |       Stats.transaction.name = name
 46 |       Stats.incr(name + "-graph_" + graphId + "-count")
 47 |       forwardingManager.find(sourceId, graphId, Direction.Forward).getMetadata(sourceId) map { _.isDefined }
 48 |     }
 49 |   }
 50 | 
 51 |   def contains(sourceId: Long, graphId: Int, destinationId: Long): Future[Boolean] = {
 52 |     wrapRPC("contains") {
 53 |       val name = "contains"
 54 |       Stats.transaction.name = name
 55 |       Stats.incr(name + "-graph_" + graphId + "-count")
 56 |       forwardingManager.find(sourceId, graphId, Direction.Forward).get(sourceId, destinationId) map {
 57 |         _ map { edge => edge.state == State.Normal || edge.state == State.Negative } getOrElse false
 58 |       }
 59 |     }
 60 |   }
 61 | 
 62 |   def get(sourceId: Long, graphId: Int, destinationId: Long): Future[Edge] = {
 63 |     wrapRPC("get") {
 64 |       val name = "get"
 65 |       Stats.transaction.name = name
 66 |       Stats.incr(name + "-graph_" + graphId + "-count")
 67 |       forwardingManager.find(sourceId, graphId, Direction.Forward).get(sourceId, destinationId) flatMap {
 68 |         case Some(edge) => Future(edge)
 69 |         case _ => Future.exception(new FlockException("Record not found: (%d, %d, %d)".format(sourceId, graphId, destinationId)))
 70 |       }
 71 |     }
 72 |   }
 73 | 
 74 |   def getMetadata(sourceId: Long, graphId: Int): Future[Metadata] = {
 75 |     wrapRPC("get_metadata") {
 76 |       val name = "get-metadata"
 77 |       Stats.transaction.name = name
 78 |       Stats.incr(name + "-graph_" + graphId + "-count")
 79 |       forwardingManager.find(sourceId, graphId, Direction.Forward).getMetadata(sourceId) flatMap {
 80 |         case Some(metadata) => Future(metadata)
 81 |         case _ => Future.exception(new FlockException("Record not found: (%d, %d)".format(sourceId, graphId)))
 82 |       }
 83 |     }
 84 |   }
 85 | 
 86 |   def select(query: SelectQuery): Future[ResultWindow[Long]] = select(List(query)) map { _.head }
 87 | 
 88 |   def select(queries: Seq[SelectQuery]): Future[Seq[ResultWindow[Long]]] = {
 89 |     wrapRPC("select") {
 90 |       Future.collect(queries map { query =>
 91 |         val queryTree = selectCompiler(query.operations)
 92 |         queryTree.select(query.page) onSuccess { _ =>
 93 |           Stats.transaction.record(queryTree.toString)
 94 |         } rescue {
 95 |           case e: ShardBlackHoleException =>
 96 |             Future.exception(new FlockException("Shard is blackholed: " + e))
 97 |         }
 98 |       })
 99 |     }
100 |   }
101 | 
102 |   def selectEdges(queries: Seq[EdgeQuery]): Future[Seq[ResultWindow[Edge]]] = {
103 |     wrapRPC("select_edges") {
104 |       Future.collect(queries map { query =>
105 |         val term = query.term
106 |         Stats.incr("select-edge-graph_" + (if (term.isForward) "" else "n") + term.graphId + "-count")
107 |         val shard = forwardingManager.find(term.sourceId, term.graphId, Direction(term.isForward))
108 |         val states = if (term.states.isEmpty) List(State.Normal) else term.states
109 | 
110 |         if (term.destinationIds.isDefined) {
111 |           shard.intersectEdges(term.sourceId, states, term.destinationIds.get) map { results =>
112 |             new ResultWindow(results.map { edge => (edge, Cursor(edge.destinationId)) }, query.page.count, query.page.cursor)
113 |           }
114 |         } else {
115 |           shard.selectEdges(term.sourceId, states, query.page.count, query.page.cursor)
116 |         }
117 |       })
118 |     }
119 |   }
120 | 
121 |   def execute(operations: ExecuteOperations): Future[Unit] = {
122 |     wrapRPC("execute") {
123 |       Stats.transaction.name = "execute"
124 |       // TODO: This results in a kestrel enqueue, which can block on disk I/O. Consider moving this work
125 |       // to a separate threadpool.
126 |       executeCompiler(operations)
127 |       Future.Unit
128 |     }
129 |   }
130 | 
131 |   def count(queries: Seq[Seq[SelectOperation]]): Future[Seq[Int]] = {
132 |     wrapRPC("count") {
133 |       Future.collect(queries map { query =>
134 |         val queryTree = selectCompiler(query)
135 |         queryTree.sizeEstimate onSuccess { _ =>
136 |           Stats.transaction.record(queryTree.toString)
137 |         }
138 |       })
139 |     }
140 |   }
141 | 
142 |   private[this] def logAndWrapException(rpcName: String, e: Throwable) = {
143 |     val endpoint = serverName +"/"+ rpcName
144 |     e match {
145 |       case e: NonExistentShard =>
146 |         Stats.incr("nonexistent_shard_error_count")
147 |         log.error(e, "Nonexistent shard: %s", e)
148 |       case e: InvalidShard =>
149 |         Stats.incr("invalid_shard_error_count")
150 |         log.error(e, "Invalid shard: %s", e)
151 |       case e: FlockException =>
152 |         Stats.incr("normal_error_count_" + endpoint)
153 |       case e: ShardDatabaseTimeoutException =>
154 |           Stats.incr("timeout_count_" + endpoint)
155 |       case e: ShardTimeoutException =>
156 |         Stats.incr("timeout_count_" + endpoint)
157 |       case e: ShardOfflineException =>
158 |         Stats.incr("offline_count_" + endpoint)
159 |       case _ =>
160 |         Stats.incr("internal_error_count_" + endpoint)
161 |         exceptionLog.error(e, "Unhandled error in EdgesService", e)
162 |         log.error("Unhandled error in EdgesService: " + e.toString)
163 |     }
164 | 
165 |     e match {
166 |       case e: FlockException => Future.exception(e)
167 |       case _ => Future.exception(new FlockException("%s: %s".format(e.getClass.getName, e.getMessage)))
168 |     }
169 |   }
170 | 
171 |   private[this] def timeFuture[T](label: String)(f: => Future[T]) = {
172 |     Stats.timeFutureMillis(serverName +"/"+ label)(f)
173 |   }
174 | 
175 |   private[this] def wrapRPC[T](rpcName: String)(f: => Future[T]) = timeFuture(rpcName) {
176 |     val rv = try {
177 |       f
178 |     } catch {
179 |       case e => Future.exception(e)
180 |     }
181 | 
182 |     rv respond { _ =>
183 |       Stats.incr(serverName +"/"+ rpcName +"_count")
184 |     } onSuccess { _ =>
185 |       Stats.incr(serverName +"/"+ rpcName +"_success_count")
186 |     } rescue {
187 |       case e => logAndWrapException(rpcName, e)
188 |     }
189 |   }
190 | }
191 | 


--------------------------------------------------------------------------------
/src/test/scala/com/twitter/flockdb/unit/SelectCompilerSpec.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | package unit
 19 | 
 20 | import scala.collection.mutable
 21 | import com.twitter.util.{Future, Time}
 22 | import com.twitter.util.TimeConversions._
 23 | import org.specs.mock.{ClassMocker, JMocker}
 24 | import com.twitter.flockdb
 25 | import com.twitter.flockdb.{Page => FlockPage}
 26 | import queries.{SelectCompiler, InvalidQueryException}
 27 | import operations.{SelectOperation, SelectOperationType}
 28 | import shards.Shard
 29 | import thrift.{Page, Results}
 30 | 
 31 | 
 32 | object SelectCompilerSpec extends ConfiguredSpecification with JMocker with ClassMocker {
 33 |   "SelectCompiler" should {
 34 |     var forwardingManager: ForwardingManager = null
 35 |     var shard: Shard = null
 36 |     var shard2: Shard = null
 37 |     var selectCompiler: SelectCompiler = null
 38 |     val sourceId = 900
 39 |     val graphId = 5
 40 |     val states = new mutable.ArrayBuffer[State] {
 41 |       override def equals(that: Any) = that match {
 42 |         case that: Seq[_] => this.toList == that.toList
 43 |         case that => false
 44 |       }
 45 |     }
 46 |     states += State.Normal
 47 | 
 48 |     doBefore {
 49 |       forwardingManager = mock[ForwardingManager]
 50 |       shard = mock[Shard]
 51 |       shard2 = mock[Shard]
 52 |       selectCompiler = new SelectCompiler(forwardingManager, new flockdb.config.IntersectionQuery { averageIntersectionProportion = 1.0 })
 53 |     }
 54 | 
 55 |     "execute a simple wildcard query" in {
 56 |       "when the state is given" >> {
 57 |         expect {
 58 |           one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard
 59 |           one(shard).count(sourceId, states) willReturn Future(23)
 60 |         }
 61 |         val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) :: Nil
 62 |         val query = selectCompiler(program)
 63 |         query.getClass.getName mustMatch "SimpleQuery"
 64 |         query.sizeEstimate()() mustEqual 23
 65 |       }
 66 |     }
 67 | 
 68 |     "should throw" in {
 69 |       "on an empty query" in {
 70 |          val program = Nil
 71 |          selectCompiler(program) must throwA[InvalidQueryException]
 72 |       }
 73 | 
 74 |       "on a malformed binary operation query" in {
 75 |         val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) ::
 76 |           new SelectOperation(SelectOperationType.Intersection, None) :: Nil
 77 |         selectCompiler(program) must throwA[InvalidQueryException]
 78 |       }
 79 | 
 80 |       "on a malformed dual-literal query" in {
 81 |         val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) ::
 82 |           new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) :: Nil
 83 |         selectCompiler(program) must throwA[InvalidQueryException]
 84 |       }
 85 |     }
 86 | 
 87 | 
 88 |     "execute a simple list query" in {
 89 |       expect {
 90 |         one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard
 91 |       }
 92 |       val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, Some(List[Long](12, 13)), List(State.Normal)))) :: Nil
 93 |       val query = selectCompiler(program)
 94 |       query.getClass.getName mustMatch "WhereInQuery"
 95 |       query.sizeEstimate()() mustEqual 2
 96 |     }
 97 | 
 98 |     "execute a compound query" in {
 99 |       expect {
100 |         one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard
101 |         one(forwardingManager).find(sourceId, graphId, Direction.Backward) willReturn shard
102 |         one(shard).count(sourceId, states) willReturn Future(23)
103 |       }
104 |       val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) ::
105 |         new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, false, Some(List[Long](12, 13)), List(State.Normal)))) ::
106 |         new SelectOperation(SelectOperationType.Intersection, None) :: Nil
107 |       val query = selectCompiler(program)
108 |       query.getClass.getName mustMatch "IntersectionQuery"
109 |       query.sizeEstimate()() mustEqual 2
110 |     }
111 | 
112 |     "execute a nested compound query" in {
113 |       expect {
114 |         one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard
115 |         one(forwardingManager).find(sourceId, graphId, Direction.Backward) willReturn shard
116 |         one(forwardingManager).find(sourceId + 1, graphId, Direction.Forward) willReturn shard2
117 |         one(shard).count(sourceId, states) willReturn Future(23)
118 |         one(shard2).count(sourceId + 1, states) willReturn Future(25)
119 |       }
120 |       val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) ::
121 |         new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, false, Some(List[Long](12, 13)), List(State.Normal)))) ::
122 |         new SelectOperation(SelectOperationType.Intersection, None) ::
123 |         new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId + 1, graphId, true, None, List(State.Normal)))) ::
124 |         new SelectOperation(SelectOperationType.Union, None) :: Nil
125 |       val query = selectCompiler(program)
126 |       query.getClass.getName mustMatch "UnionQuery"
127 |       query.sizeEstimate()() mustEqual 25
128 |     }
129 | 
130 |     "execute a difference query in the right order" in {
131 |       expect {
132 |         one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard
133 |         one(forwardingManager).find(sourceId + 1, graphId, Direction.Forward) willReturn shard2
134 |         one(shard).count(sourceId, states) willReturn Future(10)
135 |         allowing(shard2).count(sourceId + 1, states) willReturn Future(2)
136 |       }
137 |       val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) ::
138 |         new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId + 1, graphId, true, None, List(State.Normal)))) ::
139 |         new SelectOperation(SelectOperationType.Difference, None) :: Nil
140 |       val query = selectCompiler(program)
141 |       query.getClass.getName mustMatch "DifferenceQuery"
142 |       query.sizeEstimate()() mustEqual 10
143 |     }
144 | 
145 | 
146 |     "time a simple list query" in {
147 |       expect {
148 |         one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard
149 |         one(shard).intersect(sourceId, List(State.Normal), List[Long](12, 13)) willReturn Future(List[Long](12,13))
150 |       }
151 |       val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, Some(List[Long](12, 13)), List(State.Normal)))) :: Nil
152 |       val queryTree = selectCompiler(program)
153 |       queryTree.toString mustEqual "<WhereInQuery sourceId="+sourceId+" states=(Normal) destIds=(12,13)>"
154 |       val rv = queryTree.select(FlockPage(0,Cursor(0)))()
155 |       queryTree.toString mustMatch "time"
156 |     }
157 |   }
158 | }
159 | 


--------------------------------------------------------------------------------
/src/main/scala/com/twitter/flockdb/jobs/Copy.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Twitter, Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may
  5 |  * not use this file except in compliance with the License. You may obtain
  6 |  * a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | package com.twitter.flockdb
 18 | package jobs
 19 | 
 20 | import com.twitter.gizzard.scheduler._
 21 | import com.twitter.gizzard.shards.{ShardId, RoutingNode}
 22 | import com.twitter.gizzard.nameserver.NameServer
 23 | import com.twitter.gizzard.Stats
 24 | import com.twitter.util.TimeConversions._
 25 | import conversions.Numeric._
 26 | import shards.{Shard, ReadWriteShardAdapter}
 27 | import scala.collection.mutable
 28 | 
 29 | 
 30 | object Copy {
 31 |   type CopyCursor = (Cursor, Cursor)
 32 | 
 33 |   val START = (Cursor.Start, Cursor.Start)
 34 |   val END = (Cursor.End, Cursor.End)
 35 |   val COUNT = 10000
 36 | }
 37 | 
 38 | class CopyFactory(nameServer: NameServer, scheduler: JobScheduler)
 39 |       extends CopyJobFactory[Shard] {
 40 |   def apply(shardIds: Seq[ShardId]) =
 41 |     new MetadataCopy(shardIds, MetadataCopy.START, Copy.COUNT,
 42 |                      nameServer, scheduler)
 43 | }
 44 | 
 45 | class CopyParser(nameServer: NameServer, scheduler: JobScheduler)
 46 |       extends CopyJobParser[Shard] {
 47 |   def deserialize(attributes: Map[String, Any], shardIds: Seq[ShardId], count: Int) = {
 48 |     val cursor = (Cursor(attributes("cursor1").asInstanceOf[AnyVal].toLong),
 49 |                   Cursor(attributes("cursor2").asInstanceOf[AnyVal].toLong))
 50 |     new Copy(shardIds, cursor, count, nameServer, scheduler)
 51 |   }
 52 | 
 53 | 
 54 | }
 55 | 
 56 | case class CopyState(
 57 |   var pos: Int,
 58 |   items: Seq[Edge],
 59 |   cursor: Copy.CopyCursor,
 60 |   total: Int,
 61 |   val diffs: mutable.ArrayBuffer[Edge]
 62 | )
 63 | 
 64 | /** Given a seq of shards, compares them and copies the most up to date data between them */
 65 | class Copy(shardIds: Seq[ShardId], var cursor: Copy.CopyCursor,
 66 |            count: Int, nameServer: NameServer, scheduler: JobScheduler)
 67 |       extends CopyJob[Shard](shardIds, count, nameServer, scheduler) {
 68 | 
 69 |   def copyPage(nodes: Seq[RoutingNode[Shard]], count: Int) = {
 70 |     val shards = nodes map { new ReadWriteShardAdapter(_) }
 71 | 
 72 |     var cursors = Seq(cursor)
 73 | 
 74 |     while (!cursors.isEmpty) {
 75 |       cursor = cursors.min
 76 |       val shardStates = shards map { shard =>
 77 |         val (edges, nextCursor) = shard.selectAll(cursor, count)()
 78 |         shard -> CopyState(0, edges, nextCursor, edges.size, mutable.ArrayBuffer[Edge]())
 79 |       } toMap
 80 | 
 81 |       /*
 82 |        * Loop through the edges we got and add diffs to each. Stop when we either run out of edges to process or we get through
 83 |        * one shard's batch of edges but haven't reached its END. Stopping in the latter case saves cycles that we'll
 84 |        * repeat on the next iteration anyway and potentially saves us from recording useless diffs.
 85 |       */
 86 |       while (shardStates.find { case (shard, state) => state.pos < state.total }.isDefined && shardStates.find { case (shard, state) => state.pos >= state.total && state.cursor != Copy.END}.isEmpty ) {
 87 |         val edges = shardStates.map { case (shard, state) =>
 88 |           val edge = if (state.pos < state.total) state.items(state.pos) else Edge.Max
 89 |           (shard, edge)
 90 |         }
 91 | 
 92 |         val (minShard, minEdge) = edges.foldLeft(edges.head) { case (min, pair) =>
 93 |           val minEdge = min._2
 94 |           val pairEdge = pair._2
 95 | 
 96 |           if (pairEdge.similar(minEdge) < 0) pair else min
 97 |         }
 98 | 
 99 |         val sameEdges = edges.filter { case (shard, edge) => edge.similar(minEdge) == 0 }
100 | 
101 |         val (bestShard, bestEdge) = sameEdges.foldLeft((minShard, minEdge)) { case (newest, pair) =>
102 |           if (pair._2.updatedAt > newest._2.updatedAt) pair else newest
103 |         }
104 |         edges.foreach { case (shard, edge) =>
105 |           if (bestEdge.similar(edge) < 0) {
106 |             shardStates(shard).diffs += bestEdge
107 |           } else if (bestEdge.similar(edge) == 0) {
108 |             if (bestEdge.updatedAt > edge.updatedAt) {
109 |               shardStates(shard).diffs += bestEdge
110 |             }
111 |             shardStates(shard).pos += 1
112 |           }
113 |         }
114 |       }
115 | 
116 |       shardStates.foreach { case (shard, state) =>
117 |         shard.writeCopies(state.diffs)()
118 |         Stats.incr("edges-copy", state.diffs.size)
119 |         state.diffs.clear
120 |       }
121 | 
122 |       cursors = shardStates.toSeq.map { case (shard, state) => state.cursor}.filterNot{ _ == Copy.END }
123 |     }
124 | 
125 |     None
126 |   }
127 | 
128 |   def serialize = Map("cursor1" -> cursor._1.position, "cursor2" -> cursor._2.position)
129 | }
130 | 
131 | object MetadataCopy {
132 |   type CopyCursor = Cursor
133 |   val START = Cursor.Start
134 |   val END = Cursor.End
135 | }
136 | 
137 | class MetadataCopyParser(nameServer: NameServer, scheduler: JobScheduler)
138 |       extends CopyJobParser[Shard] {
139 |   def deserialize(attributes: Map[String, Any], shardIds: Seq[ShardId], count: Int) = {
140 |     val cursor = Cursor(attributes("cursor").asInstanceOf[AnyVal].toLong)
141 |     new MetadataCopy(shardIds, cursor, count, nameServer, scheduler)
142 |   }
143 | }
144 | 
145 | case class MetadataCopyState(
146 |   var pos: Int,
147 |   items: Seq[Metadata],
148 |   cursor: MetadataCopy.CopyCursor,
149 |   total: Int,
150 |   val diffs: mutable.ArrayBuffer[Metadata]
151 | )
152 | 
153 | class MetadataCopy(shardIds: Seq[ShardId], var cursor: MetadataCopy.CopyCursor,
154 |                    count: Int, nameServer: NameServer, scheduler: JobScheduler)
155 |       extends CopyJob[Shard](shardIds, count, nameServer, scheduler) {
156 | 
157 |   def copyPage(nodes: Seq[RoutingNode[Shard]], count: Int) = {
158 |     val shards = nodes.map { new ReadWriteShardAdapter(_) }
159 | 
160 |     var cursors = Seq(cursor)
161 | 
162 |     while(!cursors.isEmpty) {
163 |       cursor = cursors.min
164 | 
165 |       val shardStates = Map(shards.map { shard =>
166 |         val (items, nextCursor) = shard.selectAllMetadata(cursor, count)()
167 |         (shard, MetadataCopyState(0, items, nextCursor, items.size, mutable.ArrayBuffer[Metadata]()))
168 |       }: _*)
169 | 
170 |       while (shardStates.find{case (shard, state) => state.pos < state.total}.isDefined && shardStates.find{case (shard, state) => state.pos >= state.total && state.cursor != MetadataCopy.END}.isEmpty) {
171 |         val items = shardStates.map { case (shard, state) =>
172 |           val item = if (state.pos < state.total) state.items(state.pos) else Metadata.Max
173 |           (shard, item)
174 |         }
175 | 
176 |         val (minShard, minItem) = items.foldLeft(items.head) { case (min, pair) =>
177 |           val minItem = min._2
178 |           val pairItem = pair._2
179 | 
180 |           if (pairItem.similar(minItem) < 0) pair else min
181 |         }
182 | 
183 |         val sameItems = items.filter { case (shard, item) => item.similar(minItem) == 0 }
184 | 
185 |         val (bestShard, bestItem) = sameItems.foldLeft((minShard, minItem)) { case (newest, pair) =>
186 |           if (pair._2.updatedAt > newest._2.updatedAt) pair else newest }
187 | 
188 |         items.foreach { case (shard, item) =>
189 |           if (bestItem.similar(item) < 0) {
190 |             shardStates(shard).diffs += bestItem
191 |           } else if (bestItem.similar(item) == 0) {
192 |             if (bestItem.updatedAt > item.updatedAt) {
193 |               shardStates(shard).diffs += bestItem
194 |             }
195 |             shardStates(shard).pos += 1
196 |           }
197 |         }
198 |       }
199 | 
200 |       shardStates.foreach { case (shard, state) =>
201 |         shard.writeMetadatas(state.diffs)()
202 |         Stats.incr("edges-copy", state.diffs.size)
203 |         state.diffs.clear
204 |       }
205 | 
206 |       cursors = shardStates.toSeq.map { case (shard, state) => state.cursor }.filterNot{ _ == MetadataCopy.END }
207 |     }
208 | 
209 |     Some(new Copy(shardIds, Copy.START, Copy.COUNT, nameServer, scheduler))
210 | 
211 |   }
212 | 
213 |   def serialize = Map("cursor" -> cursor.position)
214 | }
215 | 


--------------------------------------------------------------------------------
/doc/blog.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Introducing FlockDB
  3 | 
  4 | Twitter stores many graphs of relationships between people: who you're following, who's following
  5 | you, who you receive phone notifications from, and so on.
  6 | 
  7 | Some of the features of these graphs have been challenging to store in scalable ways as we've grown.
  8 | For example, instead of requiring each friendship to be requested and confirmed, you can build
  9 | one-way relationships by just following other people. There's also no limit to how many people are
 10 | allowed to follow you, so some people have millions of followers (like @aplusk), while others have
 11 | only a few.
 12 | 
 13 | To deliver a tweet, we need to be able to look up someone's followers and page through them rapidly.
 14 | But we also need to handle heavy write traffic, as followers are added or removed, or spammers are
 15 | caught and put on ice. And for some operations, like delivering a @mention, we need to do set
 16 | arithmetic like "who's following both of these users?" These features are difficult to implement in a traditional relational database.
 17 | 
 18 | ## A valiant effort
 19 | 
 20 | We went through several storage layers in the early days, including abusive use of relational tables
 21 | and key-value storage of denormalized lists. They were either good at handling write operations or
 22 | good at paging through giant result sets, but never good at both.
 23 | 
 24 | A little over a year ago, we could see that we needed to try something new. Our goals were:
 25 | 
 26 | - Write the simplest possible thing that could work.
 27 | 
 28 | - Use off-the-shelf MySQL as the storage engine, because we understand its behavior -- in normal use
 29 |   as well as under extreme load and unusual failure conditions. Give it enough memory to keep
 30 |   everything in cache.
 31 | 
 32 | - Allow for horizontal partitioning so we can add more database hardware as the corpus grows.
 33 | 
 34 | - Allow write operations to arrive out of order or be processed more than once. (Allow failures to
 35 |   result in redundant work rather than lost work.)
 36 | 
 37 | FlockDB was the result. We finished migrating to it about 9 months ago and never looked back.
 38 | 
 39 | ## A valiant-er effort
 40 | 
 41 | FlockDB is a database that stores graph data, but it isn't a database optimized for graph-traversal
 42 | operations. Instead, it's optimized for very large [adjacency
 43 | lists](http://en.wikipedia.org/wiki/Adjacency_list), fast reads and writes, and page-able set
 44 | arithmetic queries.
 45 | 
 46 | It stores graphs as sets of edges between nodes identified by 64-bit integers. For a social graph,
 47 | these node IDs will be user IDs, but in a graph storing "favorite" tweets, the destination may be a
 48 | tweet ID. Each edge is also marked with a 64-bit position, used for sorting. (Twitter puts a
 49 | timestamp here for the "following" graph, so that your follower list is displayed latest-first.)
 50 | 
 51 | ![schema](schema.png)
 52 | 
 53 | When an edge is "deleted", the row isn't actually deleted from MySQL; it's just marked as being in
 54 | the deleted state, which has the effect of moving the primary key (a compound key of the source ID,
 55 | state, and position). Similarly, users who delete their account can have their edges put into an
 56 | archived state, allowing them to be restored later. We keep only a compound primary key and a
 57 | secondary index for each row, and answer all queries from a single index. This kind of schema
 58 | optimization allows MySQL to shine and gives us predictable performance.
 59 | 
 60 | A complex query like "What's the intersection of people I follow and people who are following
 61 | President Obama?" can be answered quickly by decomposing it into single-user queries ("Who is
 62 | following President Obama?"). Data is partitioned by node, so these queries can each be answered by
 63 | a single partition, using an indexed range query. Similarly, paging through long result sets is done
 64 | by using the position field as a cursor, rather than using `LIMIT/OFFSET`, so any page of results
 65 | for a query is indexed and is equally fast.
 66 | 
 67 | Write operations are [idempotent](http://en.wikipedia.org/wiki/Idempotence) and
 68 | [commutative](http://en.wikipedia.org/wiki/Commutative), based on the time they enter the system. We
 69 | can process operations out of order and end up with the same result, so we can paper over temporary
 70 | network and hardware failures, or even replay lost data from minutes or hours ago. This was
 71 | especially helpful during the initial roll-out.
 72 | 
 73 | Commutative writes also simplify the process of bringing up new partitions. A new partition can
 74 | receive write traffic immediately, and receive a dump of data from the old partitions slowly in the
 75 | background. Once the dump is over, the partition is immediately "live" and ready to receive reads.
 76 | 
 77 | The app servers (affectionately called "flapps") are written in Scala, are stateless, and are
 78 | horizontally scalable. We can add more as query load increases, independent of the databases. They
 79 | expose a very small thrift API to clients, though we've written [a Ruby
 80 | client](http://github.com/twitter/flockdb-client) with a much richer interface.
 81 | 
 82 | ![it's in the cloud](flockdb-layout.png)
 83 | 
 84 | We use [the Gizzard library](http://github.com/twitter/gizzard) to handle the partitioning layer. A
 85 | forwarding layer maps ranges of source IDs to physical databases, and replication is handled by
 86 | building a tree of such tables under the same forwarding address. Write operations are acknowledged
 87 | after being journalled locally, so that disruptions in database availability or performance are
 88 | decoupled from website response times.
 89 | 
 90 | Each edge is actually stored twice: once in the "forward" direction (indexed and partitioned by the
 91 | source ID) and once in the "backward" direction (indexed and partitioned by the destination ID).
 92 | That way a query like "Who follows me?" is just as efficient as "Who do I follow?", and the answer
 93 | to each query can be found entirely on a single partition.
 94 | 
 95 | The end result is a cluster of commodity servers that we can expand as needed. Over the winter, we
 96 | added 50% database capacity without anyone noticing. We currently store over **13 billion edges**
 97 | and sustain peak traffic of **20k writes/second** and **100k reads/second**.
 98 | 
 99 | ## Lessons learned
100 | 
101 | Some helpful patterns fell out of our experience, even though they weren't goals originally:
102 | 
103 | - **Use aggressive timeouts to cut off the long tail.**
104 | 
105 |   You can't ever shake out all the unfairness in the system, so some requests will take an
106 |   unreasonably long time to finish -- way over the 99.9th percentile. If there are multiple
107 |   stateless app servers, you can just cut a client loose when it has passed a "reasonable" amount of
108 |   time, and let it try its luck with a different app server.
109 | 
110 | - **Make every case an error case.**
111 | 
112 |   Or, to put it another way, use the same code path for errors as you use in normal operation. Don't
113 |   create rarely-tested modules that only kick in during emergencies, when you're least likely to
114 |   feel like trying new things.
115 | 
116 |   We queue all write operations locally (using [Kestrel](http://github.com/robey/kestrel) as a
117 |   library), and any that fail are thrown into a separate error queue. This error queue is
118 |   periodically flushed back into the write queue, so that retries use the same code path as the
119 |   initial attempt.
120 | 
121 | - **Do nothing automatically at first.**
122 | 
123 |   Provide lots of gauges and levers, and automate with scripts once patterns emerge. FlockDB
124 |   measures the latency distribution of each query type across each service (MySQL, Kestrel, Thrift)
125 |   so we can tune timeouts, and reports counts of each operation so we can see when a client library
126 |   suddenly doubles its query load (or we need to add more hardware).
127 | 
128 |   Write operations that cycle through the error queue too many times are dumped into a log for
129 |   manual inspection. If it turns out to be a bug, we can fix it, and re-inject the job. If it's a
130 |   client error, we have a good bug report.
131 | 
132 | ## Check it out
133 | 
134 | The source is in github: [http://github.com/twitter/flockdb](http://github.com/twitter/flockdb)
135 | 
136 | In particular, check out the demo to get a feel for the kind of data that can be stored and what you
137 | can do with it:
138 | [http://github.com/twitter/flockdb/blob/master/doc/demo.markdown](http://github.com/twitter/flockdb/blob/master/doc/demo.markdown)
139 | 
140 | Talk to us on IRC, in #twinfra (irc.freenode.net), or join the mailing list:
141 | [http://groups.google.com/group/flockdb](http://groups.google.com/group/flockdb)
142 | 


--------------------------------------------------------------------------------