├── doc ├── schema.png ├── flockdb-layout.png ├── find-and-delete.png └── blog.md ├── project ├── release.properties ├── build.properties ├── plugins │ └── Plugins.scala └── build │ └── FlockDBProject.scala ├── .gitignore ├── .ensime ├── src ├── main │ ├── scala │ │ └── com │ │ │ └── twitter │ │ │ └── flockdb │ │ │ ├── JobSchedulable.scala │ │ │ ├── conversions │ │ │ └── Numeric.scala │ │ │ ├── Page.scala │ │ │ ├── EdgeQuery.scala │ │ │ ├── SelectQuery.scala │ │ │ ├── Priority.scala │ │ │ ├── QueryTerm.scala │ │ │ ├── operations │ │ │ ├── ExecuteOperation.scala │ │ │ ├── ExecuteOperations.scala │ │ │ ├── ExecuteOperationType.scala │ │ │ ├── SelectOperation.scala │ │ │ └── SelectOperationType.scala │ │ │ ├── Cursor.scala │ │ │ ├── Main.scala │ │ │ ├── Direction.scala │ │ │ ├── State.scala │ │ │ ├── UuidGenerator.scala │ │ │ ├── ForwardingManager.scala │ │ │ ├── config │ │ │ └── FlockDB.scala │ │ │ ├── queries │ │ │ ├── SimpleQuery.scala │ │ │ ├── UnionQuery.scala │ │ │ ├── WhereInQuery.scala │ │ │ ├── Query.scala │ │ │ ├── DifferenceQuery.scala │ │ │ ├── IntersectionQuery.scala │ │ │ ├── ExecuteCompiler.scala │ │ │ └── SelectCompiler.scala │ │ │ ├── Metadata.scala │ │ │ ├── Edge.scala │ │ │ ├── shards │ │ │ ├── Shard.scala │ │ │ ├── ReadWriteShardAdapter.scala │ │ │ └── Optimism.scala │ │ │ ├── StatsCollectingQuery.scala │ │ │ ├── jobs │ │ │ ├── Legacy.scala │ │ │ ├── multi │ │ │ │ └── Multi.scala │ │ │ ├── single │ │ │ │ └── Single.scala │ │ │ └── Copy.scala │ │ │ ├── Select.scala │ │ │ ├── ResultWindow.scala │ │ │ └── EdgesService.scala │ └── thrift │ │ └── Flockdb.thrift ├── test │ └── scala │ │ └── com │ │ └── twitter │ │ └── flockdb │ │ ├── ConfigValidationSpec.scala │ │ ├── unit │ │ ├── EdgeSpec.scala │ │ ├── UnionQuerySpec.scala │ │ ├── SeqQuery.scala │ │ ├── IntersectionQuerySpec.scala │ │ ├── DifferenceQuerySpec.scala │ │ ├── WhereInQuerySpec.scala │ │ ├── SimpleQuerySpec.scala │ │ ├── EdgesSpec.scala │ │ ├── LegacyJobParserSpec.scala │ │ ├── JobSpec.scala │ │ └── SelectCompilerSpec.scala │ │ ├── integration │ │ ├── FlockFixRegressionSpec.scala │ │ ├── IntersectionSpec.scala │ │ ├── SelectCompilerSpec.scala │ │ ├── OptimisticLockRegressionSpec.scala │ │ ├── BlackHoleLockingRegressionSpec.scala │ │ └── CopySpec.scala │ │ └── ConfiguredSpecification.scala └── scripts │ ├── setup-env.sh │ ├── mkshards.rb │ └── start.sh ├── LICENSE ├── TODO ├── config ├── development.scala ├── test.scala └── production.scala └── README.markdown /doc/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twitter-archive/flockdb/HEAD/doc/schema.png -------------------------------------------------------------------------------- /doc/flockdb-layout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twitter-archive/flockdb/HEAD/doc/flockdb-layout.png -------------------------------------------------------------------------------- /doc/find-and-delete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twitter-archive/flockdb/HEAD/doc/find-and-delete.png -------------------------------------------------------------------------------- /project/release.properties: -------------------------------------------------------------------------------- 1 | #Automatically generated by ReleaseManagement 2 | #Mon Apr 02 14:57:48 PDT 2012 3 | version=1.8.15 4 | sha1=d005f33e04350fbc74d0dd5ddee4214c2f973e4a 5 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | #Project properties 2 | #Mon Apr 02 14:57:48 PDT 2012 3 | project.organization=com.twitter 4 | project.name=flockdb 5 | sbt.version=0.7.4 6 | project.version=1.8.16-SNAPSHOT 7 | build.scala.versions=2.8.1 8 | project.initialize=false 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist/* 2 | *.log 3 | flock.ipr 4 | flock.iws 5 | target/ 6 | .DS_Store 7 | kestrel/* 8 | *.hprof.txt 9 | lib_managed/ 10 | src_managed/ 11 | project/boot/ 12 | project/plugins/project/ 13 | ignore/ 14 | flockdb.tmproj 15 | *.iml 16 | .idea/ 17 | -------------------------------------------------------------------------------- /.ensime: -------------------------------------------------------------------------------- 1 | ;; This config was generated using ensime-config-gen. Feel free to customize its contents manually. 2 | 3 | ( 4 | 5 | :project-package "com.twitter.flockdb" 6 | 7 | :use-sbt t 8 | 9 | :sources ("target/gen-java") 10 | 11 | :compile-jars ("lib_managed") 12 | 13 | ) 14 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/JobSchedulable.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.flockdb 2 | 3 | import com.twitter.gizzard.scheduler._ 4 | 5 | trait JobSchedulable { 6 | def schedule(tableId: Int, forwardingManager: ForwardingManager, scheduler: PrioritizingJobScheduler, priority: Int) 7 | } 8 | -------------------------------------------------------------------------------- /project/plugins/Plugins.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | class Plugins(info: ProjectInfo) extends PluginDefinition(info) { 4 | val twttrRepo = "twitter.com" at "http://maven.twttr.com" 5 | 6 | val standardProject = "com.twitter" % "standard-project" % "0.12.6" 7 | val scrooge = "com.twitter" % "sbt-scrooge" % "2.3.1" 8 | } 9 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/conversions/Numeric.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.flockdb.conversions 2 | 3 | object Numeric { 4 | class RichAnyVal(wrapped: AnyVal) { 5 | def toLong = { 6 | wrapped match { 7 | case i: Int => i.toLong 8 | case n: Long => n 9 | } 10 | } 11 | 12 | def toInt = { 13 | wrapped match { 14 | case i: Int => i 15 | case n: Long => n.toInt 16 | } 17 | } 18 | } 19 | 20 | implicit def anyValToRichAnyVal(v: AnyVal) = new RichAnyVal(v) 21 | } 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2011 Twitter, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/Page.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | case class Page(count: Int, cursor: Cursor) 20 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/EdgeQuery.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | case class EdgeQuery(term: QueryTerm, page: Page) 20 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/ConfigValidationSpec.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.flockdb 2 | 3 | import org.specs.Specification 4 | import com.twitter.util.Eval 5 | import java.io.File 6 | import com.twitter.flockdb 7 | 8 | 9 | object ConfigValidationSpec extends Specification { 10 | "Configuration Validation" should { 11 | "production.scala" >> { 12 | val config = Eval[flockdb.config.FlockDB](new File("config/production.scala")) 13 | config mustNot beNull 14 | } 15 | "development.scala" >> { 16 | val config = Eval[flockdb.config.FlockDB](new File("config/development.scala")) 17 | config mustNot beNull 18 | } 19 | 20 | "test.scala" >> { 21 | val config = Eval[flockdb.config.FlockDB](new File("config/test.scala")) 22 | config mustNot beNull 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/SelectQuery.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | import operations.SelectOperation 20 | 21 | case class SelectQuery(operations: Seq[SelectOperation], page: Page) 22 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/Priority.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | 20 | object Priority extends Enumeration { 21 | val Low = Value(1) 22 | val Medium = Value(2) 23 | val High = Value(3) 24 | } 25 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/QueryTerm.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | 20 | case class QueryTerm(sourceId: Long, graphId: Int, isForward: Boolean, 21 | destinationIds: Option[Seq[Long]], var states: Seq[State]) 22 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/operations/ExecuteOperation.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package operations 19 | 20 | 21 | case class ExecuteOperation(operationType: ExecuteOperationType.Value, term: QueryTerm, 22 | position: Option[Long]) 23 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/operations/ExecuteOperations.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package operations 19 | 20 | 21 | case class ExecuteOperations(operations: Seq[ExecuteOperation], executeAt: Option[Int], 22 | priority: Priority.Value) 23 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | TODO: 2 | 3 | move com.twitter.service.flock.Reset into gizzard 4 | move com.twitter.results into gizzard or its own package. 5 | move ByteSwapper into gizzard 6 | merge in Gizzard's no_reflection branch. 7 | move StatsCollectingQuery where?? 8 | create a `main` file by copying glock 9 | create simple exceptionWrappingProxy using new gizzard niceness interface (cf Rowz) 10 | rename Edges.scala to Flockdb.scala 11 | figure out where Flock.thrift goes 12 | move State into com.twitter.flockdb; make a copy for Groups; they're only identical on accident. not the same thing. 13 | copy the production.conf config from glock (but be careful with passwords!!!!) 14 | rename Edges.thrift Flock.thrift 15 | figure out how to package conf file?? 16 | 17 | then: 18 | make sure it works in development mode 19 | use flocker.rb to create a bunch of shards 20 | use the ruby gem to insert a bunch of data and make a bunch of queries. 21 | write readme's. 22 | write blog post. -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/operations/ExecuteOperationType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb.operations 18 | 19 | 20 | object ExecuteOperationType extends Enumeration { 21 | val Add = Value(1) 22 | val Remove = Value(2) 23 | val Archive = Value(3) 24 | val Negate = Value(4) 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/operations/SelectOperation.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package operations 19 | 20 | 21 | case class SelectOperation(operationType: SelectOperationType.Value, term: Option[QueryTerm]) { 22 | override def clone() = SelectOperation(operationType, term) 23 | } 24 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/operations/SelectOperationType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb.operations 18 | 19 | 20 | object SelectOperationType extends Enumeration { 21 | val SimpleQuery = Value(1) 22 | val Intersection = Value(2) 23 | val Union = Value(3) 24 | val Difference = Value(4) 25 | } 26 | -------------------------------------------------------------------------------- /project/build/FlockDBProject.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import Process._ 3 | import com.twitter.sbt._ 4 | 5 | class FlockDBProject(info: ProjectInfo) extends StandardLibraryProject(info) 6 | with CompileThriftScrooge 7 | with DefaultRepos 8 | with SubversionPublisher { 9 | 10 | override def filterScalaJars = false 11 | val scalaTools = "org.scala-lang" % "scala-compiler" % "2.8.1" 12 | 13 | val gizzard = "com.twitter" % "gizzard" % "3.0.13" withSources() 14 | val scrooge = "com.twitter" % "scrooge-runtime" % "1.0.3" withSources() 15 | 16 | val asm = "asm" % "asm" % "1.5.3" % "test" 17 | val cglib = "cglib" % "cglib" % "2.2" % "test" 18 | val hamcrest = "org.hamcrest" % "hamcrest-all" % "1.1" % "test" 19 | val jmock = "org.jmock" % "jmock" % "2.4.0" % "test" 20 | val objenesis = "org.objenesis" % "objenesis" % "1.1" % "test" 21 | val specs = "org.scala-tools.testing" % "specs_2.8.1" % "1.6.6" % "test" 22 | 23 | override def subversionRepository = Some("https://svn.twitter.biz/maven-public/") 24 | } 25 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/Cursor.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | object Cursor { 20 | def cursorZip(seq: Seq[Long]) = for (i <- seq) yield (i, Cursor(i)) 21 | 22 | val End = new Cursor(0) 23 | val Start = new Cursor(-1) 24 | } 25 | 26 | case class Cursor(position: Long) extends Ordered[Cursor] { 27 | def compare(that: Cursor) = position.compare(that.position) 28 | def reverse = new Cursor(-position) 29 | def magnitude = new Cursor(math.abs(position)) 30 | } 31 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/Main.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.flockdb 2 | 3 | import com.twitter.util.Eval 4 | import com.twitter.logging.Logger 5 | import com.twitter.ostrich.admin.{Service, ServiceTracker, RuntimeEnvironment, AdminHttpService} 6 | import java.io.File 7 | 8 | import com.twitter.flockdb.config.{FlockDB => FlockDBConfig} 9 | 10 | object Main { 11 | val log = Logger.get 12 | 13 | var adminServer: Option[AdminHttpService] = None 14 | 15 | def main(args: Array[String]) { 16 | try { 17 | log.info("Starting FlockDB.") 18 | 19 | val eval = new Eval 20 | val config = eval[FlockDBConfig](args.map(new File(_)): _*) 21 | val runtime = new RuntimeEnvironment(this) 22 | 23 | Logger.configure(config.loggers) 24 | adminServer = config.adminConfig()(runtime) 25 | 26 | val service = new FlockDB(config) 27 | 28 | ServiceTracker.register(service) 29 | service.start() 30 | 31 | } catch { 32 | case e => { 33 | log.fatal(e, "Exception in initialization: ", e.getMessage) 34 | log.fatal(e.getStackTrace.toString) 35 | System.exit(1) 36 | } 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/Direction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | abstract sealed class Direction(val id: Int) { 20 | val opposite: Direction 21 | } 22 | 23 | object Direction { 24 | def apply(id: Int) = id match { 25 | case Forward.id => Forward 26 | case Backward.id => Backward 27 | } 28 | 29 | def apply(isForward: Boolean) = if (isForward) Forward else Backward 30 | 31 | case object Forward extends Direction(0) { 32 | val opposite = Direction.Backward 33 | } 34 | 35 | case object Backward extends Direction(1) { 36 | val opposite = Direction.Forward 37 | } 38 | 39 | val All = List(Forward, Backward) 40 | } 41 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/State.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | abstract class State(val id: Int, val name: String, val ordinal: Int) extends Ordered[State] { 20 | def max(other: State) = if (this > other) this else other 21 | def compare(s: State) = ordinal.compare(s.ordinal) 22 | } 23 | 24 | object State { 25 | def apply(id: Int) = id match { 26 | case Normal.id => Normal 27 | case Removed.id => Removed 28 | case Archived.id => Archived 29 | case Negative.id => Negative 30 | } 31 | 32 | case object Normal extends State(0, "Normal", 0) 33 | case object Negative extends State(3, "Negative", 1) 34 | case object Removed extends State(1, "Removed", 3) 35 | case object Archived extends State(2, "Archived", 2) 36 | } 37 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/EdgeSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package unit 19 | 20 | import com.twitter.util.Time 21 | import org.specs.mock.{ClassMocker, JMocker} 22 | import jobs.single._ 23 | 24 | object EdgeSpec extends ConfiguredSpecification with JMocker with ClassMocker { 25 | val now = Time.fromSeconds(124) 26 | val source = 1 27 | val dest = 2 28 | val pos = 0 29 | val graph = 5 30 | val count = 0 31 | val forwardingManager = mock[ForwardingManager] 32 | 33 | "Edge" should { 34 | "becomes correct job" in { 35 | val edge = new Edge(source, dest, pos, now, count, State.Normal) 36 | edge.toJob(graph, forwardingManager) mustEqual new Single(source, graph, dest, pos, State.Normal, now, forwardingManager, OrderedUuidGenerator) 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/UuidGenerator.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | import java.util.Random 20 | 21 | trait UuidGenerator extends (Long => Long) { 22 | def apply(updatedAt: Long): Long 23 | def unapply(uuid: Long): Option[Long] 24 | } 25 | 26 | object OrderedUuidGenerator extends UuidGenerator { 27 | private val randomGenerator = new Random 28 | // 64 bits - 20 leaves 44 bits of milliseconds, or over 500 years. 29 | private val unusedBits = 20 30 | private val randomMask = (1 << unusedBits) - 1 31 | 32 | def apply(updatedAt: Long) = { 33 | (updatedAt << unusedBits) | (randomGenerator.nextInt() & randomMask) 34 | } 35 | 36 | def unapply(uuid: Long) = { 37 | Some(uuid >> unusedBits) 38 | } 39 | } 40 | 41 | object IdentityUuidGenerator extends UuidGenerator { 42 | def apply(updatedAt: Long) = updatedAt 43 | 44 | def unapply(uuid: Long) = Some(uuid) 45 | } 46 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/ForwardingManager.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | import com.twitter.gizzard.nameserver.MultiForwarder 20 | import com.twitter.gizzard.shards.{RoutingNode, ShardException} 21 | import com.twitter.flockdb.shards.{Shard, ReadWriteShardAdapter} 22 | 23 | 24 | class ForwardingManager(val forwarder: MultiForwarder[Shard]) { 25 | @throws(classOf[ShardException]) 26 | def find(sourceId: Long, graphId: Int, direction: Direction): Shard = { 27 | new ReadWriteShardAdapter(findNode(sourceId, graphId, direction)) 28 | } 29 | 30 | @throws(classOf[ShardException]) 31 | def findNode(sourceId: Long, graphId: Int, direction: Direction)= { 32 | forwarder.find(translate(graphId, direction), sourceId) 33 | } 34 | 35 | private def translate(graphId: Int, direction: Direction) = { 36 | if (direction == Direction.Backward) -1 * graphId else graphId 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/config/FlockDB.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.flockdb.config 2 | 3 | import com.twitter.gizzard.config._ 4 | import com.twitter.ostrich.admin.config.AdminServiceConfig 5 | import com.twitter.querulous.config.{Connection, AsyncQueryEvaluator} 6 | import com.twitter.util.TimeConversions._ 7 | import com.twitter.flockdb.queries.QueryTree 8 | import com.twitter.flockdb.queries 9 | 10 | 11 | class FlockDBServer { 12 | var name = "flockdb_edges" 13 | var port = 7915 14 | var maxConcurrentRequests = 10000 15 | } 16 | 17 | trait IntersectionQuery { 18 | var intersectionTimeout = 100.millis 19 | var averageIntersectionProportion = 0.1 20 | var intersectionPageSizeMax = 4000 21 | 22 | def intersect(query1: QueryTree, query2: QueryTree) = new queries.IntersectionQuery(query1, query2, averageIntersectionProportion, intersectionPageSizeMax, intersectionTimeout) 23 | def difference(query1: QueryTree, query2: QueryTree) = new queries.DifferenceQuery(query1, query2, averageIntersectionProportion, intersectionPageSizeMax, intersectionTimeout) 24 | } 25 | 26 | trait FlockDB extends GizzardServer { 27 | var server = new FlockDBServer 28 | 29 | var intersectionQuery: IntersectionQuery = new IntersectionQuery { } 30 | var aggregateJobsPageSize = 500 31 | 32 | def databaseConnection: Connection 33 | 34 | def edgesQueryEvaluator: AsyncQueryEvaluator 35 | def lowLatencyQueryEvaluator: AsyncQueryEvaluator 36 | def materializingQueryEvaluator: AsyncQueryEvaluator 37 | 38 | def adminConfig: AdminServiceConfig 39 | } 40 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/UnionQuerySpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package unit 19 | 20 | import org.specs.mock.JMocker 21 | 22 | class UnionQuerySpec extends ConfiguredSpecification with JMocker { 23 | "UnionQuery" should { 24 | val query1 = new queries.SeqQuery(List(1,2,3,4,5,6,7,8,9,10)) 25 | val query2 = new queries.SeqQuery(List(1,2,3,4,11)) 26 | 27 | "sizeEstimate" in { 28 | val unionQuery = new queries.UnionQuery(query1, query2) 29 | unionQuery.sizeEstimate()() mustEqual 10 30 | } 31 | 32 | "selectWhereIn" in { 33 | val unionQuery = new queries.UnionQuery(query1, query2) 34 | unionQuery.selectWhereIn(List(1, 2, 3, 12))().toList mustEqual List(1, 2, 3) 35 | } 36 | 37 | "selectPage" in { 38 | val unionQuery = new queries.UnionQuery(query1, query2) 39 | unionQuery.selectPage(10, Cursor(9))().toTuple mustEqual (List(8,7,6,5,4,3,2,1), Cursor.End, Cursor(-8)) 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/SeqQuery.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package queries 19 | 20 | import scala.util.Sorting 21 | import com.twitter.gizzard.thrift.conversions.Sequences._ 22 | import com.twitter.util.Future 23 | 24 | class SeqQuery(s: Seq[Long]) extends SimpleQueryNode { 25 | val seq = sort(s) 26 | def sizeEstimate = Future(seq.size) 27 | def selectWhereIn(i: Seq[Long]) = Future(sort(seq.toList intersect i.toList).toList) 28 | protected def selectPage(count: Int, cursor: Cursor) = selectPageByDestinationId(count, cursor) 29 | def selectPageByDestinationId(count: Int, cursor: Cursor) = { 30 | val filtered: Seq[Long] = cursor match { 31 | case Cursor.Start => seq 32 | case Cursor.End => Seq() 33 | case _ => seq.filter(_ <= cursor.position) 34 | } 35 | 36 | Future(new ResultWindow(Cursor.cursorZip(filtered), count, cursor)) 37 | } 38 | 39 | private def sort(s: Seq[Long]) = Sorting.stableSort(s, (x: Long, y: Long) => y < x) 40 | 41 | override def toString = 42 | "" 43 | } 44 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/IntersectionQuerySpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package unit 19 | 20 | import org.specs.mock.JMocker 21 | 22 | object IntersectionQuerySpec extends ConfiguredSpecification with JMocker { 23 | "IntersectionQuery" should { 24 | val query1 = new queries.SeqQuery(List(1,2,3,4,5,6,7,8,9,10)) 25 | val query2 = new queries.SeqQuery(List(1,2,3,4,11)) 26 | val queryConfig = config.intersectionQuery 27 | 28 | "sizeEstimate" in { 29 | val intersectionQuery = queryConfig.intersect(query1, query2) 30 | intersectionQuery.sizeEstimate()() mustEqual (5 * queryConfig.averageIntersectionProportion).toInt 31 | } 32 | 33 | "selectWhereIn" in { 34 | val intersectionQuery = queryConfig.intersect(query1, query2) 35 | intersectionQuery.selectWhereIn(List(1, 2, 12, 13))() mustEqual List(2, 1) 36 | } 37 | 38 | "selectPage" in { 39 | val intersectionQuery = queryConfig.intersect(query1, query2) 40 | intersectionQuery.selectPage(5, Cursor.Start)().toTuple mustEqual (List(4, 3, 2, 1), Cursor.End, Cursor.End) 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/queries/SimpleQuery.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package queries 19 | 20 | import shards.Shard 21 | import com.twitter.gizzard.Stats 22 | 23 | class SimpleQuery(shard: Shard, sourceId: Long, states: Seq[State]) extends SimpleQueryNode { 24 | def sizeEstimate() = { 25 | Stats.transaction.record("Selecting counts from "+shard) 26 | time(shard.count(sourceId, states)) 27 | } 28 | 29 | def selectWhereIn(page: Seq[Long]) = time { 30 | Stats.transaction.record("Intersecting "+page.size+" ids from "+shard) 31 | shard.intersect(sourceId, states, page) 32 | } 33 | 34 | def selectPageByDestinationId(count: Int, cursor: Cursor) = time { 35 | Stats.transaction.record("Selecting "+count+" destinationIds from "+shard) 36 | shard.selectByDestinationId(sourceId, states, count, cursor) 37 | } 38 | 39 | def selectPage(count: Int, cursor: Cursor) = time { 40 | Stats.transaction.record("Selecting "+count+" edges from "+shard) 41 | shard.selectByPosition(sourceId, states, count, cursor) 42 | } 43 | 44 | override def toString = { 45 | "" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/queries/UnionQuery.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package queries 19 | 20 | import scala.util.Sorting 21 | 22 | class UnionQuery(query1: QueryTree, query2: QueryTree) extends ComplexQueryNode(query1, query2) { 23 | def sizeEstimate() = getSizeEstimates() map { case (count1, count2) => count1 max count2 } 24 | 25 | def selectPage(count: Int, cursor: Cursor) = selectPageByDestinationId(count, cursor) 26 | 27 | def selectPageByDestinationId(count: Int, cursor: Cursor) = time { 28 | val f1 = query1.selectPageByDestinationId(count, cursor) 29 | val f2 = query2.selectPageByDestinationId(count, cursor) 30 | 31 | for (result1 <- f1; result2 <- f2) yield result1.merge(result2) 32 | } 33 | 34 | def selectWhereIn(page: Seq[Long]) = time { 35 | val f1 = query1.selectWhereIn(page) 36 | val f2 = query2.selectWhereIn(page) 37 | 38 | for (page1 <- f1; page2 <- f2) yield { 39 | Sorting.stableSort((page1 ++ page2).toSet.toSeq) 40 | } 41 | } 42 | 43 | private def merge(page1: Seq[Long], page2: Seq[Long]): Seq[Long] = { 44 | Sorting.stableSort((Set(page1: _*) ++ Set(page2: _*)).toSeq) 45 | } 46 | 47 | override def toString = 48 | "" 49 | } 50 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/DifferenceQuerySpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package unit 19 | 20 | import org.specs.mock.JMocker 21 | 22 | class DifferenceQuerySpec extends ConfiguredSpecification with JMocker { 23 | "DifferenceQuery" should { 24 | val query1 = new queries.SeqQuery(List(1,2,3,4,5,6,7,8,9,10,11,12)) 25 | val query2 = new queries.SeqQuery(List(3,4,7,11)) 26 | val queryConfig = config.intersectionQuery 27 | 28 | "sizeEstimate" in { 29 | val differenceQuery = queryConfig.difference(query1, query2) 30 | differenceQuery.sizeEstimate()() mustEqual 12 31 | } 32 | 33 | "selectWhereIn" in { 34 | val differenceQuery = queryConfig.difference(query1, query2) 35 | differenceQuery.selectWhereIn(List(1, 2, 3, 4, 5, 11, 12, 13))().toList mustEqual List(12,5,2,1) 36 | } 37 | 38 | "selectPage" in { 39 | val differenceQuery = queryConfig.difference(query1, query2) 40 | 41 | differenceQuery.selectPage(5, Cursor.Start)().toTuple mustEqual (List(12,10,9,8,6), Cursor(6), Cursor.End) 42 | differenceQuery.selectPage(10, Cursor(12L))().toTuple mustEqual (List(10,9,8,6,5,2,1), Cursor.End, Cursor(-10)) 43 | differenceQuery.selectPage(10, Cursor.Start)().toTuple mustEqual (List(12,10,9,8,6,5,2,1), Cursor.End, Cursor.End) 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/queries/WhereInQuery.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package queries 19 | 20 | import com.twitter.util.{Duration, Future} 21 | import com.twitter.gizzard.Stats 22 | import shards.Shard 23 | 24 | class WhereInQuery(shard: Shard, sourceId: Long, states: Seq[State], destinationIds: Seq[Long]) extends SimpleQueryNode { 25 | 26 | def sizeEstimate() = Future(destinationIds.size) 27 | 28 | def selectWhereIn(page: Seq[Long]) = time { 29 | val intersection = (Set(destinationIds: _*) intersect Set(page: _*)).toSeq 30 | Stats.transaction.record("Intersecting "+intersection.size+" ids from "+shard) 31 | shard.intersect(sourceId, states, intersection) 32 | } 33 | 34 | def selectPageByDestinationId(count: Int, cursor: Cursor) = time { 35 | Stats.transaction.record("Selecting "+ count +" edges from an intersection of "+ destinationIds.size +" ids") 36 | shard.intersect(sourceId, states, destinationIds) map { results => 37 | Stats.transaction.record("Selected "+ results.size +" rows.") 38 | new ResultWindow(results.map(result => (result, Cursor(result))), count, cursor) 39 | } 40 | } 41 | 42 | def selectPage(count: Int, cursor: Cursor) = selectPageByDestinationId(count, cursor) 43 | 44 | override def toString = { 45 | "" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/queries/Query.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package queries 19 | 20 | import com.twitter.util.{Time, Duration, Future} 21 | 22 | trait Query { 23 | def sizeEstimate(): Future[Int] 24 | def selectWhereIn(page: Seq[Long]): Future[Seq[Long]] 25 | def select(page: Page) = selectPage(page.count, page.cursor) 26 | def selectPageByDestinationId(count: Int, cursor: Cursor): Future[ResultWindow[Long]] 27 | 28 | protected def selectPage(count: Int, cursor: Cursor): Future[ResultWindow[Long]] 29 | } 30 | 31 | trait Timed { 32 | var duration: Option[Duration] = None 33 | 34 | protected def time[A](f: => Future[A]): Future[A] = { 35 | val start = Time.now 36 | f map { rv => duration = Some(Time.now - start); rv } 37 | } 38 | } 39 | 40 | sealed abstract class QueryTree extends Query with Timed { 41 | def getComplexity(): Int 42 | def getDepth(): Int 43 | } 44 | 45 | abstract case class ComplexQueryNode(left: QueryTree, right: QueryTree) extends QueryTree { 46 | val complexity = (left.getComplexity() + right.getComplexity()) + 1 47 | val depth = (left.getDepth() max right.getDepth) + 1 48 | def getComplexity(): Int = complexity 49 | def getDepth(): Int = depth 50 | 51 | def getSizeEstimates() = { 52 | val f1 = left.sizeEstimate 53 | val f2 = right.sizeEstimate 54 | for (count1 <- f1; count2 <- f2) yield (count1, count2) 55 | } 56 | 57 | def orderQueries() = { 58 | getSizeEstimates() map { case (count1, count2) => 59 | if (count1 < count2) { 60 | (left, right) 61 | } else { 62 | (right, left) 63 | } 64 | } 65 | } 66 | 67 | } 68 | 69 | abstract case class SimpleQueryNode() extends QueryTree { 70 | def getComplexity(): Int = 0 71 | def getDepth(): Int = 0 72 | } 73 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/WhereInQuerySpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package unit 19 | 20 | import com.twitter.util.Future 21 | import org.specs.mock.JMocker 22 | import shards.Shard 23 | 24 | class WhereInQuerySpec extends ConfiguredSpecification with JMocker { 25 | "WhereInQuery" should { 26 | var shard: Shard = null 27 | val sourceId = 900 28 | val destinationIds = List(55L, 60L, 65L, 70L, 75L, 80L, 85L) 29 | 30 | doBefore { 31 | shard = mock[Shard] 32 | } 33 | 34 | "sizeEstimate" in { 35 | val whereInQuery = new queries.WhereInQuery(shard, sourceId, List(State.Normal), destinationIds) 36 | whereInQuery.sizeEstimate()() mustEqual destinationIds.size 37 | } 38 | 39 | "selectWhereIn" in { 40 | val page = List(65L, 63L, 60L) 41 | expect { 42 | one(shard).intersect(sourceId, List(State.Normal), List(60L, 65L)) willReturn Future(List(60L)) 43 | } 44 | val whereInQuery = new queries.WhereInQuery(shard, sourceId, List(State.Normal), destinationIds) 45 | whereInQuery.selectWhereIn(page)().toList mustEqual List(60L) 46 | } 47 | 48 | "selectPage" in { 49 | expect { 50 | allowing(shard).intersect(sourceId, List(State.Normal), destinationIds) willReturn Future(List(85L, 75L, 65L, 55L)) 51 | } 52 | 53 | val whereInQuery = new queries.WhereInQuery(shard, sourceId, List(State.Normal), destinationIds) 54 | 55 | whereInQuery.selectPage(10, Cursor(90L))().toTuple mustEqual (List(85L, 75L, 65L, 55L), Cursor.End, Cursor.End) 56 | whereInQuery.selectPage(10, Cursor(75L))().toTuple mustEqual (List(65L, 55L), Cursor.End, Cursor(-65L)) 57 | whereInQuery.selectPage(2, Cursor(-65L))().toTuple mustEqual (List(85L, 75L), Cursor(75L), Cursor.End) 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/Metadata.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | import com.twitter.util.Time 20 | import com.twitter.gizzard.scheduler._ 21 | import jobs.multi._ 22 | 23 | object Metadata { 24 | def apply(sourceId: Long, state: State, count: Int, updatedAt: Time) = new Metadata(sourceId, state, count, updatedAt) 25 | def apply(sourceId: Long, state: State, updatedAt: Time) = new Metadata(sourceId, state, updatedAt) 26 | val Max = Metadata(Long.MaxValue, State.Normal, Time.fromSeconds(0)) 27 | } 28 | 29 | case class Metadata(sourceId: Long, state: State, count: Int, updatedAtSeconds: Int) extends Ordered[Metadata] { 30 | 31 | def this(sourceId: Long, state: State, count: Int, updatedAt: Time) = 32 | this(sourceId, state, count, updatedAt.inSeconds) 33 | 34 | def this(sourceId: Long, state: State, updatedAt: Time) = 35 | this(sourceId, state, 0, updatedAt.inSeconds) 36 | 37 | val updatedAt = Time.fromSeconds(updatedAtSeconds) 38 | 39 | 40 | def compare(other: Metadata) = { 41 | val out = updatedAt.compare(other.updatedAt) 42 | if (out == 0) { 43 | state.compare(other.state) 44 | } else { 45 | out 46 | } 47 | } 48 | 49 | def max(other: Metadata) = if (this > other) this else other 50 | 51 | def schedule( 52 | tableId: Int, 53 | forwardingManager: ForwardingManager, 54 | scheduler: PrioritizingJobScheduler, 55 | priority: Int 56 | ) = { 57 | val job = new Multi( 58 | sourceId, 59 | tableId, 60 | (if (tableId > 0) Direction.Forward else Direction.Backward), 61 | state, 62 | updatedAt, 63 | Priority.Medium, 64 | 500, 65 | forwardingManager, 66 | scheduler 67 | ) 68 | 69 | scheduler.put(priority, job) 70 | } 71 | 72 | def similar(other: Metadata) = { 73 | sourceId.compare(other.sourceId) 74 | } 75 | } 76 | 77 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/Edge.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | import com.twitter.util.Time 20 | import com.twitter.flockdb.jobs.single._ 21 | import com.twitter.gizzard.scheduler.{PrioritizingJobScheduler, JsonJob} 22 | 23 | object Edge { 24 | def apply(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time, count: Int, state: State) = new Edge(sourceId, destinationId, position, updatedAt, count, state) 25 | val Max = Edge(Long.MaxValue, Long.MaxValue, Long.MaxValue, 0, 0, State.Normal) 26 | } 27 | 28 | case class Edge(sourceId: Long, destinationId: Long, position: Long, updatedAtSeconds: Int, count: Int, 29 | state: State) extends Ordered[Edge] { 30 | 31 | def this(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time, count: Int, state: State) = 32 | this(sourceId, destinationId, position, updatedAt.inSeconds, count, state) 33 | 34 | val updatedAt = Time.fromSeconds(updatedAtSeconds) 35 | 36 | def schedule(tableId: Int, forwardingManager: ForwardingManager, scheduler: PrioritizingJobScheduler, priority: Int) = { 37 | scheduler.put(priority, toJob(tableId, forwardingManager)) 38 | } 39 | 40 | def toJob(tableId: Int, forwardingManager: ForwardingManager) = { 41 | new Single( 42 | sourceId, 43 | tableId, 44 | destinationId, 45 | OrderedUuidGenerator.unapply(position).get, 46 | state, 47 | updatedAt, 48 | forwardingManager, 49 | OrderedUuidGenerator 50 | ) 51 | } 52 | 53 | def similar(other:Edge) = { 54 | sourceId.compare(other.sourceId) match { 55 | case x if x < 0 => -1 56 | case x if x > 0 => 1 57 | case _ => destinationId.compare(other.destinationId) 58 | } 59 | } 60 | 61 | def compare(other: Edge) = { 62 | val out = updatedAt.compare(other.updatedAt) 63 | if (out == 0) { 64 | state.compare(other.state) 65 | } else { 66 | out 67 | } 68 | } 69 | 70 | def max(other: Edge) = if (this > other) this else other 71 | } 72 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/integration/FlockFixRegressionSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package integration 19 | 20 | import com.twitter.gizzard.scheduler.{JsonJob, PrioritizingJobScheduler} 21 | import com.twitter.gizzard.shards.ShardInfo 22 | import com.twitter.util.Time 23 | import com.twitter.conversions.time._ 24 | import org.specs.mock.{ClassMocker, JMocker} 25 | import jobs.multi.Multi 26 | import shards.{Shard, SqlShard} 27 | 28 | class FlockFixRegressionSpec extends IntegrationSpecification { 29 | val alice = 1L 30 | val FOLLOWS = 1 31 | val pageSize = 100 32 | 33 | def alicesFollowings() = { 34 | val term = QueryTerm(alice, FOLLOWS, true, None, List(State.Normal)) 35 | val query = EdgeQuery(term, Page(pageSize, Cursor.Start)) 36 | val resultsList = flockService.selectEdges(List(query))() 37 | resultsList.size mustEqual 1 38 | resultsList(0).toList 39 | } 40 | 41 | "select results" should { 42 | "be in order and still in order after unarchive" in { 43 | reset(config) // I don't know why this isn't working in doBefore 44 | 45 | for(i <- 0 until 10) { 46 | if (i % 2 == 0) { 47 | execute(Select(alice, FOLLOWS, i).add) 48 | } else { 49 | execute(Select(alice, FOLLOWS, i).archive) 50 | } 51 | Thread.sleep(1000) // prevent same-millisecond collision 52 | } 53 | 54 | flock.jobScheduler.size must eventually(be(0)) // Make sure adds get applied. I can't wait for Time.asOf() 55 | 56 | alicesFollowings().size must eventually(be_==(5)) 57 | alicesFollowings().toList.map(_.destinationId) mustEqual List(8,6,4,2,0) 58 | 59 | Thread.sleep(1000) 60 | 61 | val job = new Multi(alice, FOLLOWS, Direction.Forward, State.Normal, Time.now, Priority.High, pageSize, flock.forwardingManager, flock.jobScheduler) 62 | job() 63 | 64 | alicesFollowings().size must eventually(be(10)) 65 | 66 | alicesFollowings().toList.map(_.destinationId) mustEqual List(9, 8, 7, 6, 5, 4, 3, 2, 1, 0) 67 | } 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /src/scripts/setup-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2010 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 6 | # not use this file except in compliance with the License. You may obtain 7 | # a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | VERSION="@VERSION@" 18 | 19 | if java -version 2>&1 |grep "1\.5"; then 20 | echo "Java must be at least 1.6" 21 | exit 1 22 | fi 23 | 24 | if [ "x$DB_USERNAME" = "x" ]; then 25 | echo "Please set DB_USERNAME and/or DB_PASSWORD." 26 | exit 1 27 | fi 28 | 29 | if gizzmo --help > /dev/null; then 30 | gizzmo="gizzmo -H localhost -P 7920" 31 | else 32 | echo "Make sure you have gizzmo available on your path." 33 | echo "Find it here: http://github.com/twitter/gizzmo" 34 | exit 1 35 | fi 36 | 37 | MYSQL_COMMAND=$(if [ "x$DB_PASSWORD" = "x" ]; then 38 | echo "mysql -u$DB_USERNAME" 39 | else 40 | echo "mysql -u$DB_USERNAME -p$DB_PASSWORD" 41 | fi) 42 | 43 | function exec_sql { 44 | echo $1 | $MYSQL_COMMAND 45 | } 46 | 47 | echo "Killing any running flockdb..." 48 | curl http://localhost:9990/shutdown >/dev/null 2>/dev/null 49 | sleep 3 50 | 51 | echo "Launching flockdb..." 52 | exec_sql "DROP DATABASE IF EXISTS flockdb_development" 53 | exec_sql "CREATE DATABASE IF NOT EXISTS flockdb_development" 54 | 55 | JAVA_OPTS="-Xms256m -Xmx256m -XX:NewSize=64m -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -server" 56 | java $JAVA_OPTS -jar ./dist/flockdb/flockdb-${VERSION}.jar config/development.scala & 57 | sleep 10 58 | 59 | echo "Creating shards..." 60 | i=1 61 | while [ $i -le 15 ]; do 62 | /bin/echo -n "$i " 63 | exec_sql "DROP TABLE IF EXISTS edges_development.forward_${i}_0000_edges" 64 | exec_sql "DROP TABLE IF EXISTS edges_development.forward_${i}_0000_metadata" 65 | exec_sql "DROP TABLE IF EXISTS edges_development.backward_${i}_0000_edges" 66 | exec_sql "DROP TABLE IF EXISTS edges_development.backward_${i}_0000_metadata" 67 | forward_shard=$($gizzmo create -s "INT UNSIGNED" -d "INT UNSIGNED" "com.twitter.flockdb.SqlShard" "localhost/forward_${i}_0000") 68 | backward_shard=$($gizzmo create -s "INT UNSIGNED" -d "INT UNSIGNED" "com.twitter.flockdb.SqlShard" "localhost/backward_${i}_0000") 69 | $gizzmo addforwarding -- $i 0 $forward_shard 70 | $gizzmo addforwarding -- -$i 0 $backward_shard 71 | i=$((i + 1)) 72 | done 73 | echo 74 | $gizzmo -f reload 75 | echo "Done." 76 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/integration/IntersectionSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package integration 19 | 20 | import com.twitter.querulous.evaluator.QueryEvaluatorFactory 21 | 22 | object IntersectionSpec extends IntegrationSpecification { 23 | 24 | val FOLLOWS = 1 25 | 26 | val alice = 1L 27 | val bob = 2L 28 | val carl = 3L 29 | val darcy = 4L 30 | var queryEvaluatorFactories: List[QueryEvaluatorFactory] = null 31 | 32 | 33 | def intersectionOf(user1: Long, user2: Long, page: Page) = { 34 | select(Select(user1, FOLLOWS, ()) intersect Select(user2, FOLLOWS, ()), page) 35 | } 36 | 37 | def intersectAlot = { 38 | "intersection_for" in { 39 | "pagination" in { 40 | reset(config) 41 | execute(Select(alice, FOLLOWS, bob).add) 42 | execute(Select(alice, FOLLOWS, carl).add) 43 | execute(Select(alice, FOLLOWS, darcy).add) 44 | execute(Select(carl, FOLLOWS, bob).add) 45 | execute(Select(carl, FOLLOWS, darcy).add) 46 | 47 | flockService.contains(carl, FOLLOWS, darcy)() must eventually(beTrue) 48 | 49 | intersectionOf(alice, carl, new Page(1, Cursor.Start)) mustEqual ((List(darcy), Cursor(darcy), Cursor.End)) 50 | intersectionOf(alice, carl, new Page(1, Cursor(darcy))) mustEqual ((List(bob), Cursor.End, Cursor(-bob))) 51 | intersectionOf(alice, carl, new Page(2, Cursor.Start)) mustEqual ((List(darcy, bob), Cursor.End, Cursor.End)) 52 | } 53 | 54 | "one list is empty" in { 55 | reset(config) 56 | for (i <- 1 until 11) execute(Select(alice, FOLLOWS, i).add) 57 | count(Select(alice, FOLLOWS, ())) must eventually(be_==(10)) 58 | 59 | intersectionOf(alice, carl, new Page(10, Cursor.Start)) mustEqual (Nil, Cursor.End, Cursor.End) 60 | } 61 | } 62 | } 63 | 64 | "Intersection" should { 65 | "with a large intersection" >> { 66 | config.intersectionQuery.intersectionPageSizeMax = 1 67 | 68 | intersectAlot 69 | } 70 | 71 | "with a small intersection" >> { 72 | config.intersectionQuery.intersectionPageSizeMax = Integer.MAX_VALUE - 1 73 | 74 | intersectAlot 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/queries/DifferenceQuery.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package queries 19 | 20 | import com.twitter.util.{Duration, Future} 21 | 22 | class DifferenceQuery(query1: QueryTree, query2: QueryTree, averageIntersectionProportion: Double, 23 | intersectionPageSizeMax: Int, intersectionTimeout: Duration) 24 | extends ComplexQueryNode(query1, query2) { 25 | def sizeEstimate = query1.sizeEstimate 26 | 27 | def selectPage(count: Int, cursor: Cursor) = selectPageByDestinationId(count, cursor) 28 | 29 | def selectPageByDestinationId(count: Int, cursor: Cursor) = time { 30 | val guessedPageSize = (count + count * averageIntersectionProportion).toInt 31 | val internalPageSize = guessedPageSize min intersectionPageSizeMax 32 | val timeout = intersectionTimeout.inMillis 33 | val startTime = System.currentTimeMillis 34 | 35 | def loop(currCursor: Cursor): Future[ResultWindow[Long]] = { 36 | pageDifference(internalPageSize, count, currCursor) flatMap { resultWindow => 37 | if (resultWindow.page.size < count && 38 | resultWindow.continueCursor != Cursor.End && 39 | System.currentTimeMillis - startTime < timeout) { 40 | loop(resultWindow.continueCursor) map { resultWindow ++ _ } 41 | } else { 42 | Future(resultWindow) 43 | } 44 | } 45 | } 46 | 47 | loop(cursor) 48 | } 49 | 50 | def selectWhereIn(page: Seq[Long]) = time { 51 | for { 52 | results <- query1.selectWhereIn(page) 53 | rejects <- query2.selectWhereIn(results) 54 | } yield { 55 | val rejectsSet = rejects.toSet 56 | results.filterNot { rejects.contains(_) } 57 | } 58 | } 59 | 60 | private def pageDifference(internalPageSize: Int, count: Int, cursor: Cursor) = { 61 | for { 62 | results <- query1.selectPageByDestinationId(internalPageSize, cursor) 63 | rejects <- query2.selectWhereIn(results.view) 64 | } yield results.diff(rejects, count) 65 | } 66 | 67 | override def toString = 68 | "" 69 | } 70 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/shards/Shard.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package shards 19 | 20 | import com.twitter.util.{Future, Time} 21 | 22 | trait Shard { 23 | def get(sourceId: Long, destinationId: Long): Future[Option[Edge]] 24 | def getMetadata(sourceId: Long): Future[Option[Metadata]] 25 | def getMetadataForWrite(sourceId: Long): Future[Option[Metadata]] 26 | 27 | def count(sourceId: Long, states: Seq[State]): Future[Int] 28 | 29 | def selectAll(cursor: (Cursor, Cursor), count: Int): Future[(Seq[Edge], (Cursor, Cursor))] 30 | def selectAllMetadata(cursor: Cursor, count: Int): Future[(Seq[Metadata], Cursor)] 31 | def selectIncludingArchived(sourceId: Long, count: Int, cursor: Cursor): Future[ResultWindow[Long]] 32 | def selectByDestinationId(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor): Future[ResultWindow[Long]] 33 | def selectByPosition(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor): Future[ResultWindow[Long]] 34 | def selectEdges(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor): Future[ResultWindow[Edge]] 35 | 36 | def writeCopies(edge: Seq[Edge]): Future[Unit] 37 | def updateMetadata(metadata: Metadata): Future[Unit] 38 | def writeMetadata(metadata: Metadata): Future[Unit] 39 | def writeMetadatas(metadata: Seq[Metadata]): Future[Unit] 40 | 41 | def bulkUnsafeInsertEdges(edge: Seq[Edge]): Future[Unit] 42 | def bulkUnsafeInsertMetadata(edge: Seq[Metadata]): Future[Unit] 43 | 44 | def archive(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time): Future[Unit] 45 | def archive(sourceId: Long, updatedAt: Time): Future[Unit] 46 | 47 | def remove(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time): Future[Unit] 48 | def remove(sourceId: Long, updatedAt: Time): Future[Unit] 49 | 50 | def add(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time): Future[Unit] 51 | def add(sourceId: Long, updatedAt: Time): Future[Unit] 52 | 53 | def negate(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time): Future[Unit] 54 | def negate(sourceId: Long, updatedAt: Time): Future[Unit] 55 | 56 | def intersect(sourceId: Long, states: Seq[State], destinationIds: Seq[Long]): Future[Seq[Long]] 57 | def intersectEdges(sourceId: Long, states: Seq[State], destinationIds: Seq[Long]): Future[Seq[Edge]] 58 | } 59 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/StatsCollectingQuery.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.flockdb 2 | 3 | import com.twitter.gizzard.Stats 4 | import com.twitter.querulous.database.{Database, DatabaseFactory, DatabaseProxy} 5 | import com.twitter.querulous.query.{Query, QueryFactory, QueryClass, QueryProxy} 6 | import com.twitter.util.{Time, Duration} 7 | import java.sql.Connection 8 | 9 | class TransactionStatsCollectingQueryFactory(queryFactory: QueryFactory) 10 | extends QueryFactory { 11 | 12 | def apply(connection: Connection, queryClass: QueryClass, query: String, params: Any*) = { 13 | new TransactionStatsCollectingQuery(queryFactory(connection, queryClass, query, params: _*), queryClass, query) 14 | } 15 | } 16 | 17 | class TransactionStatsCollectingQuery(query: Query, queryClass: QueryClass, queryString: String) extends QueryProxy(query) { 18 | override def delegate[A](f: => A) = { 19 | Stats.transaction.record("Executing "+queryClass.name+" query: "+queryString) 20 | val start = Time.now 21 | try { 22 | val rv = f 23 | val duration = Time.now - start 24 | Stats.transaction.record("Query duration: "+duration.inMillis) 25 | rv 26 | } catch { 27 | case e => 28 | Stats.transaction.record("Failure executing query: "+e) 29 | val duration = Time.now - start 30 | Stats.transaction.record("Query duration: "+duration.inMillis) 31 | throw e 32 | } 33 | } 34 | } 35 | 36 | class TransactionStatsCollectingDatabaseFactory(databaseFactory: DatabaseFactory) extends DatabaseFactory { 37 | def apply(dbhosts: List[String], dbname: String, username: String, password: String, urlOptions: Map[String, String], driverName: String) = { 38 | new TransactionStatsCollectingDatabase(databaseFactory(dbhosts, dbname, username, password, urlOptions, driverName), dbhosts) 39 | } 40 | } 41 | 42 | class TransactionStatsCollectingDatabase(val database: Database, dbhosts: List[String]) extends DatabaseProxy { 43 | override def open(): Connection = { 44 | Stats.transaction.record("Opening a connection to: "+dbhosts.mkString(",")) 45 | val start = Time.now 46 | try { 47 | val rv = database.open() 48 | val duration = Time.now-start 49 | Stats.transaction.record("Open duration: "+duration.inMillis) 50 | rv 51 | } catch { 52 | case e => 53 | Stats.transaction.record("Failure opening a connection: "+e) 54 | val duration = Time.now-start 55 | Stats.transaction.record("Open duration: "+duration.inMillis) 56 | throw e 57 | } 58 | } 59 | 60 | override def close(connection: Connection) = { 61 | Stats.transaction.record("Closing connection to: "+dbhosts.mkString(",")) 62 | val start = Time.now 63 | try { 64 | val rv = database.close(connection) 65 | val duration = Time.now - start 66 | Stats.transaction.record("Close duration: "+duration.inMillis) 67 | rv 68 | } catch { 69 | case e => 70 | Stats.transaction.record("Failure closing a connection: "+e) 71 | val duration = Time.now-start 72 | Stats.transaction.record("Close duration: "+duration.inMillis) 73 | throw e 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/scripts/mkshards.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # 3 | # Copyright 2010 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 6 | # not use this file except in compliance with the License. You may obtain 7 | # a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | $:.push(File.dirname($0)) 18 | require 'optparse' 19 | require 'yaml' 20 | 21 | options = { 22 | :config_filename => ENV['HOME'] + "/.shards.yml", 23 | :count => 500, 24 | } 25 | 26 | $stderr.puts "WARNING: This script is deprecated. Use 'gizzmo create-table' instead." 27 | 28 | parser = OptionParser.new do |opts| 29 | opts.banner = "Usage: #{$0} [options] " 30 | opts.separator "Example: #{$0} -f shards.yml 11" 31 | 32 | opts.on("-f", "--config=FILENAME", "load shard database config (default: #{options[:config_filename]})") do |filename| 33 | options[:config_filename] = filename 34 | end 35 | opts.on("-n", "--count=N", "create N bins (default: #{options[:count]})") do |count| 36 | options[:count] = count.to_i 37 | end 38 | end 39 | 40 | parser.parse!(ARGV) 41 | 42 | if ARGV.size < 1 43 | puts 44 | puts parser 45 | puts 46 | exit 1 47 | end 48 | 49 | config = YAML.load_file(options[:config_filename]) rescue {} 50 | 51 | app_host, app_port = (config['app_host'] || 'localhost').split(':') 52 | app_port ||= 7920 53 | 54 | namespace = config['namespace'] || nil 55 | db_trees = Array(config['databases'] || 'localhost') 56 | graph_id = ARGV[0].to_i 57 | 58 | gizzmo = lambda do |cmd| 59 | `gizzmo --host=#{app_host} --port=#{app_port} #{cmd}` 60 | end 61 | 62 | 63 | print "Creating bins" 64 | STDOUT.flush 65 | options[:count].times do |i| 66 | table_name = [ namespace, "edges_#{graph_id}_%04d" % i ].compact.join("_") 67 | hosts = Array(db_trees[i % db_trees.size]) 68 | lower_bound = (1 << 60) / options[:count] * i 69 | types = "-s 'INT UNSIGNED' -d 'INT UNSIGNED'" 70 | 71 | [ "forward", "backward" ].each do |direction| 72 | gizzmo.call "create com.twitter.gizzard.shards.ReplicatingShard localhost/#{table_name}_#{direction}_replicating" 73 | 74 | distinct = 1 75 | hosts.each do |host| 76 | host, weight = host.split(':') 77 | weight ||= 1 78 | gizzmo.call "create #{types} com.twitter.flockdb.SqlShard #{host}/#{table_name}_#{direction}_#{distinct}" 79 | gizzmo.call "addlink localhost/#{table_name}_#{direction}_replicating #{host}/#{table_name}_#{direction}_#{distinct} #{weight}" 80 | distinct += 1 81 | end 82 | end 83 | 84 | gizzmo.call "addforwarding -- #{graph_id} #{lower_bound} localhost/#{table_name}_forward_replicating" 85 | gizzmo.call "addforwarding -- -#{graph_id} #{lower_bound} localhost/#{table_name}_backward_replicating" 86 | 87 | print "." 88 | print "#{i+1}" if (i + 1) % 100 == 0 89 | STDOUT.flush 90 | end 91 | puts "Done." 92 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/integration/SelectCompilerSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package integration 19 | 20 | import com.twitter.util.Time 21 | import com.twitter.conversions.time._ 22 | import org.specs.mock.{ClassMocker, JMocker} 23 | 24 | object SelectCompilerSpec extends IntegrationSpecification with JMocker with ClassMocker { 25 | "SelectCompiler integration" should { 26 | val FOLLOWS = 1 27 | 28 | val alice = 1L 29 | val bob = 2L 30 | val carl = 3L 31 | val darcy = 4L 32 | 33 | def setup1() { 34 | execute(Select(alice, FOLLOWS, bob).add) 35 | execute(Select(alice, FOLLOWS, carl).add) 36 | execute(Select(alice, FOLLOWS, darcy).add) 37 | execute(Select(carl, FOLLOWS, bob).add) 38 | execute(Select(carl, FOLLOWS, darcy).add) 39 | 40 | flockService.contains(carl, FOLLOWS, darcy)() must eventually(beTrue) 41 | } 42 | 43 | def setup2() { 44 | for (i <- 1 until 11) execute(Select(alice, FOLLOWS, i).add) 45 | for (i <- 1 until 7) execute(Select(bob, FOLLOWS, i * 2).add) 46 | 47 | count(Select(alice, FOLLOWS, ())) must eventually(be_==(10)) 48 | count(Select(bob, FOLLOWS, ())) must eventually(be_==(6)) 49 | } 50 | 51 | "pagination" in { 52 | reset(config) 53 | setup1() 54 | 55 | val program = Select(alice, FOLLOWS, ()) intersect Select(carl, FOLLOWS, ()) 56 | 57 | select(program, Page(1, Cursor.Start)) mustEqual ((List(darcy), Cursor(darcy), Cursor.End)) 58 | select(program, new Page(1, Cursor(darcy))) mustEqual ((List(bob), Cursor.End, Cursor(-bob))) 59 | select(program, Page(2, Cursor.Start)) mustEqual ((List(darcy, bob), Cursor.End, Cursor.End)) 60 | } 61 | 62 | "one list is empty" in { 63 | reset(config) 64 | setup2() 65 | 66 | val program = Select(alice, FOLLOWS, ()) intersect Select(carl, FOLLOWS, ()) 67 | 68 | select(program, new Page(10, Cursor.Start)) mustEqual ((List(), Cursor.End, Cursor.End)) 69 | } 70 | 71 | "difference" in { 72 | reset(config) 73 | setup2() 74 | 75 | val program = Select(alice, FOLLOWS, ()) difference Select(bob, FOLLOWS, ()) 76 | 77 | select(program, new Page(10, Cursor.Start)) mustEqual ((List(9,7,5,3,1), Cursor.End, Cursor.End)) 78 | select(program, new Page(2, Cursor.Start)) mustEqual ((List(9,7), Cursor(7), Cursor.End)) 79 | select(program, new Page(2, Cursor(7))) mustEqual ((List(5,3), Cursor(3), Cursor(-5))) 80 | select(program, new Page(2, Cursor(3))) mustEqual ((List(1), Cursor.End, Cursor(-1))) 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/SimpleQuerySpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package unit 19 | 20 | import com.twitter.util.Future 21 | import org.specs.mock.JMocker 22 | import shards.Shard 23 | 24 | object SimpleQuerySpec extends ConfiguredSpecification with JMocker { 25 | "SimpleQuery" should { 26 | var shard: Shard = null 27 | var simpleQuery: queries.SimpleQuery = null 28 | val sourceId = 900 29 | 30 | doBefore { 31 | shard = mock[Shard] 32 | } 33 | 34 | "sizeEstimate" in { 35 | "when the state is normal" >> { 36 | expect { 37 | one(shard).count(sourceId, List(State.Normal)) willReturn Future(10) 38 | } 39 | simpleQuery = new queries.SimpleQuery(shard, sourceId, List(State.Normal)) 40 | simpleQuery.sizeEstimate()() mustEqual 10 41 | } 42 | 43 | "when the state is abnormal" >> { 44 | expect { 45 | one(shard).count(sourceId, List(State.Removed)) willReturn Future(10) 46 | } 47 | simpleQuery = new queries.SimpleQuery(shard, sourceId, List(State.Removed)) 48 | simpleQuery.sizeEstimate()() mustEqual 10 49 | } 50 | } 51 | 52 | "selectWhereIn" in { 53 | val page = List(1L, 2L, 3L, 4L) 54 | expect { 55 | one(shard).intersect(sourceId, List(State.Normal), page) willReturn Future(List(1L, 2L)) 56 | } 57 | simpleQuery = new queries.SimpleQuery(shard, sourceId, List(State.Normal)) 58 | simpleQuery.selectWhereIn(page)().toList mustEqual List(1L, 2L) 59 | } 60 | 61 | "selectPage" in { 62 | var edges = List[Long](101L, 103L, 104L, 107L, 108L) 63 | val cursor = Cursor(102L) 64 | val count = 5 65 | expect { 66 | allowing(shard).selectByPosition(sourceId, List(State.Normal), count, cursor) willReturn Future(new ResultWindow(Cursor.cursorZip(edges), Cursor.End, Cursor.End, count, cursor)) 67 | } 68 | simpleQuery = new queries.SimpleQuery(shard, sourceId, List(State.Normal)) 69 | simpleQuery.selectPage(count, cursor)().toTuple mustEqual (edges, Cursor.End, Cursor.End) 70 | } 71 | 72 | "selectPageByDestinationId" in { 73 | val edges = List[Long](101L, 103L, 104L, 107L, 108L) 74 | val cursor = Cursor(102L) 75 | val count = 5 76 | expect { 77 | allowing(shard).selectByDestinationId(sourceId, List(State.Normal), count, cursor) willReturn Future(new ResultWindow(Cursor.cursorZip(edges), Cursor.End, Cursor.End, count, cursor)) 78 | } 79 | simpleQuery = new queries.SimpleQuery(shard, sourceId, List(State.Normal)) 80 | simpleQuery.selectPageByDestinationId(count, cursor)().toTuple mustEqual (edges, Cursor.End, Cursor.End) 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/queries/IntersectionQuery.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package queries 19 | 20 | import com.twitter.util.{Duration, Future} 21 | import com.twitter.gizzard.Stats 22 | 23 | class IntersectionQuery(query1: QueryTree, query2: QueryTree, averageIntersectionProportion: Double, intersectionPageSizeMax: Int, intersectionTimeout: Duration) extends ComplexQueryNode(query1, query2) { 24 | def sizeEstimate() = { 25 | getSizeEstimates() map { case (count1, count2) => 26 | ((count1 min count2) * averageIntersectionProportion).toInt 27 | } 28 | } 29 | 30 | def selectPage(count: Int, cursor: Cursor) = selectPageByDestinationId(count, cursor) 31 | 32 | def selectPageByDestinationId(count: Int, cursor: Cursor) = time { 33 | getSizeEstimates() flatMap { case (count1, count2) => 34 | if (count1 == 0 || count2 == 0) { 35 | Future(new ResultWindow(List[(Long,Cursor)](), count, cursor)) 36 | } else { 37 | val guessedPageSize = (count / averageIntersectionProportion).toInt 38 | val internalPageSize = guessedPageSize min intersectionPageSizeMax.toInt 39 | val timeout = intersectionTimeout.inMillis 40 | val startTime = System.currentTimeMillis 41 | 42 | def loop(smaller: Query, larger: Query, currCursor: Cursor): Future[ResultWindow[Long]] = { 43 | pageIntersection(smaller, larger, internalPageSize, count, currCursor) flatMap { resultWindow => 44 | if (resultWindow.page.size < count && 45 | resultWindow.continueCursor != Cursor.End && 46 | System.currentTimeMillis - startTime < timeout) { 47 | loop(smaller, larger, resultWindow.continueCursor) map { resultWindow ++ _ } 48 | } else { 49 | Future(resultWindow) 50 | } 51 | } 52 | } 53 | 54 | orderQueries() flatMap { case (smaller, larger) => loop(smaller, larger, cursor) } 55 | } 56 | } 57 | } 58 | 59 | def selectWhereIn(page: Seq[Long]) = time { 60 | orderQueries() flatMap { case (smaller, larger) => 61 | smaller.selectWhereIn(page) flatMap { larger.selectWhereIn(_) } 62 | } 63 | } 64 | 65 | private def pageIntersection(smallerQuery: Query, largerQuery: Query, internalPageSize: Int, count: Int, cursor: Cursor) = { 66 | for { 67 | results <- smallerQuery.selectPageByDestinationId(internalPageSize, cursor) 68 | whereIn <- largerQuery.selectWhereIn(results.view) 69 | } yield { 70 | new ResultWindow(Cursor.cursorZip(whereIn), results.nextCursor, results.prevCursor, count, cursor) 71 | } 72 | } 73 | 74 | override def toString = 75 | "" 76 | } 77 | -------------------------------------------------------------------------------- /src/scripts/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # flockdb init.d script. 4 | # 5 | 6 | APP_NAME="flock" 7 | ADMIN_PORT="9990" 8 | VERSION="@VERSION@" 9 | APP_HOME="/usr/local/$APP_NAME/current" 10 | DAEMON="/usr/local/bin/daemon" 11 | 12 | JAR_NAME="flockdb-$VERSION.jar" 13 | STAGE="production" 14 | 15 | HEAP_OPTS="-Xmx4096m -Xms4096m -XX:NewSize=768m" 16 | GC_OPTS="-verbosegc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UseConcMarkSweepGC -XX:+UseParNewGC" 17 | DEBUG_OPTS="-XX:ErrorFile=/var/log/$APP_NAME/java_error%p.log" 18 | JAVA_OPTS="-server -Dstage=$STAGE $GC_OPTS $HEAP_OPTS $DEBUG_OPTS" 19 | 20 | pidfile="/var/run/$APP_NAME/$APP_NAME.pid" 21 | daemon_pidfile="/var/run/$APP_NAME/$APP_NAME-daemon.pid" 22 | daemon_args="--name $APP_NAME --pidfile $daemon_pidfile --core --chdir /" 23 | daemon_start_args="--stdout=/var/log/$APP_NAME/stdout --stderr=/var/log/$APP_NAME/error" 24 | 25 | 26 | function running() { 27 | $DAEMON $daemon_args --running 28 | } 29 | 30 | function find_java() { 31 | if [ ! -z "$JAVA_HOME" ]; then 32 | return 33 | fi 34 | for dir in /opt/jdk /System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK/Home /usr/java/default; do 35 | if [ -x $dir/bin/java ]; then 36 | JAVA_HOME=$dir 37 | break 38 | fi 39 | done 40 | } 41 | 42 | find_java 43 | 44 | 45 | case "$1" in 46 | start) 47 | echo -n "Starting $APP_NAME... " 48 | 49 | if [ ! -r $APP_HOME/$JAR_NAME ]; then 50 | echo "FAIL" 51 | echo "*** $APP_NAME jar missing: $APP_HOME/$JAR_NAME - not starting" 52 | exit 1 53 | fi 54 | if [ ! -x $JAVA_HOME/bin/java ]; then 55 | echo "FAIL" 56 | echo "*** $JAVA_HOME/bin/java doesn't exist -- check JAVA_HOME?" 57 | exit 1 58 | fi 59 | if running; then 60 | echo "already running." 61 | exit 0 62 | fi 63 | 64 | ulimit -n 32768 || echo -n " (no ulimit)" 65 | ulimit -c unlimited || echo -n " (no coredump)" 66 | $DAEMON $daemon_args $daemon_start_args -- sh -c "echo "'$$'" > $pidfile; exec ${JAVA_HOME}/bin/java ${JAVA_OPTS} -jar ${APP_HOME}/${JAR_NAME}" 67 | tries=0 68 | while ! running; do 69 | tries=$((tries + 1)) 70 | if [ $tries -ge 5 ]; then 71 | echo "FAIL" 72 | exit 1 73 | fi 74 | sleep 1 75 | done 76 | echo "done." 77 | ;; 78 | 79 | stop) 80 | echo -n "Stopping $APP_NAME... " 81 | if ! running; then 82 | echo "wasn't running." 83 | exit 0 84 | fi 85 | 86 | curl -s http://localhost:${ADMIN_PORT}/shutdown.txt > /dev/null 87 | tries=0 88 | while running; do 89 | tries=$((tries + 1)) 90 | if [ $tries -ge 15 ]; then 91 | echo "FAILED SOFT SHUTDOWN, TRYING HARDER" 92 | if [ -f $pidfile ]; then 93 | kill $(cat $pidfile) 94 | else 95 | echo "CAN'T FIND PID, TRY KILL MANUALLY" 96 | exit 1 97 | fi 98 | hardtries=0 99 | while running; do 100 | hardtries=$((hardtries + 1)) 101 | if [ $hardtries -ge 5 ]; then 102 | echo "FAILED HARD SHUTDOWN, TRY KILL -9 MANUALLY" 103 | exit 1 104 | fi 105 | sleep 1 106 | done 107 | fi 108 | sleep 1 109 | done 110 | echo "done." 111 | ;; 112 | 113 | status) 114 | if running; then 115 | echo "$APP_NAME is running." 116 | else 117 | echo "$APP_NAME is NOT running." 118 | fi 119 | ;; 120 | 121 | restart) 122 | $0 stop 123 | sleep 2 124 | $0 start 125 | ;; 126 | 127 | *) 128 | echo "Usage: /etc/init.d/${APP_NAME}.sh {start|stop|restart|status}" 129 | exit 1 130 | ;; 131 | esac 132 | 133 | exit 0 134 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/queries/ExecuteCompiler.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package queries 19 | 20 | import scala.collection.mutable 21 | import com.twitter.gizzard.Stats 22 | import com.twitter.gizzard.scheduler.{JsonJob, JsonNestedJob, PrioritizingJobScheduler} 23 | import com.twitter.gizzard.shards.ShardException 24 | import com.twitter.gizzard.thrift.conversions.Sequences._ 25 | import com.twitter.util.Time 26 | import com.twitter.util.TimeConversions._ 27 | import jobs.single.Single 28 | import jobs.multi.Multi 29 | import operations.{ExecuteOperations, ExecuteOperationType} 30 | 31 | 32 | class ExecuteCompiler(scheduler: PrioritizingJobScheduler, forwardingManager: ForwardingManager, aggregateJobPageSize: Int) { 33 | @throws(classOf[ShardException]) 34 | def apply(program: ExecuteOperations) { 35 | val now = Time.now 36 | val operations = program.operations 37 | val results = new mutable.ArrayBuffer[JsonJob] 38 | if (operations.size == 0) throw new InvalidQueryException("You must have at least one operation") 39 | 40 | for (op <- operations) { 41 | val term = op.term 42 | val time = program.executeAt.map(Time.fromSeconds).getOrElse(Time.now) 43 | val position = op.position.getOrElse(Time.now.inMillis) 44 | 45 | // force an exception for nonexistent graphs 46 | forwardingManager.find(0, term.graphId, Direction.Forward) 47 | 48 | val state = op.operationType match { 49 | case ExecuteOperationType.Add => State.Normal 50 | case ExecuteOperationType.Remove => State.Removed 51 | case ExecuteOperationType.Archive => State.Archived 52 | case ExecuteOperationType.Negate => State.Negative 53 | case n => throw new InvalidQueryException("Unknown operation " + n) 54 | } 55 | 56 | results ++= processDestinations(term) { (sourceId, destinationId) => 57 | new Single( 58 | sourceId, 59 | term.graphId, 60 | destinationId, 61 | position, 62 | state, 63 | time, 64 | null, 65 | null 66 | ) 67 | } { 68 | new Multi( 69 | term.sourceId, 70 | term.graphId, 71 | Direction(term.isForward), 72 | state, 73 | time, 74 | program.priority, 75 | aggregateJobPageSize, 76 | null, 77 | null 78 | ) 79 | } 80 | } 81 | 82 | Stats.transaction.set("job", results.map { _.toJson }.mkString(", ")) 83 | scheduler.put(program.priority.id, new JsonNestedJob(results)) 84 | } 85 | 86 | private def processDestinations(term: QueryTerm)(handleItemInCollection: (Long, Long) => JsonJob)(noDestinations: JsonJob) = { 87 | if (term.destinationIds.isDefined) { 88 | for (d <- term.destinationIds.get) yield { 89 | val (sourceId, destinationId) = if (term.isForward) { 90 | (term.sourceId, d) 91 | } else { 92 | (d, term.sourceId) 93 | } 94 | handleItemInCollection(sourceId, destinationId) 95 | } 96 | } else { 97 | List(noDestinations) 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /config/development.scala: -------------------------------------------------------------------------------- 1 | import scala.collection.JavaConversions._ 2 | import com.twitter.flockdb.config._ 3 | import com.twitter.gizzard.config._ 4 | import com.twitter.querulous.config._ 5 | import com.twitter.querulous.StatsCollector 6 | import com.twitter.conversions.time._ 7 | import com.twitter.conversions.storage._ 8 | import com.twitter.flockdb.shards.QueryClass 9 | import com.twitter.flockdb.Priority 10 | import com.twitter.ostrich.admin.config.AdminServiceConfig 11 | import com.twitter.logging.Level 12 | import com.twitter.logging.config._ 13 | 14 | trait Credentials extends Connection { 15 | val env = System.getenv().toMap 16 | val username = env.get("DB_USERNAME").getOrElse("root") 17 | val password = env.get("DB_PASSWORD").getOrElse("") 18 | } 19 | 20 | class ProductionQueryEvaluator extends AsyncQueryEvaluator { 21 | override var workPoolSize = 40 22 | database.memoize = true 23 | database.pool = new ThrottledPoolingDatabase { 24 | size = workPoolSize 25 | openTimeout = 100.millis 26 | } 27 | 28 | query.timeouts = Map( 29 | QueryClass.Select -> QueryTimeout(1.second), 30 | QueryClass.Execute -> QueryTimeout(1.second), 31 | QueryClass.SelectCopy -> QueryTimeout(15.seconds), 32 | QueryClass.SelectModify -> QueryTimeout(3.seconds), 33 | QueryClass.SelectSingle -> QueryTimeout(1.second), 34 | QueryClass.SelectIntersection -> QueryTimeout(1.second), 35 | QueryClass.SelectIntersectionSmall -> QueryTimeout(1.second), 36 | QueryClass.SelectMetadata -> QueryTimeout(1.second) 37 | ) 38 | } 39 | 40 | class ProductionNameServerReplica(host: String) extends Mysql { 41 | val connection = new Connection with Credentials { 42 | val hostnames = Seq(host) 43 | val database = "flockdb_development" 44 | } 45 | 46 | queryEvaluator = new QueryEvaluator { 47 | database.memoize = true 48 | database.pool = new ThrottledPoolingDatabase { 49 | size = 1 50 | openTimeout = 1.second 51 | } 52 | } 53 | } 54 | 55 | new FlockDB { 56 | aggregateJobsPageSize = 500 57 | 58 | mappingFunction = ByteSwapper 59 | jobRelay = NoJobRelay 60 | nameServerReplicas = Seq(new ProductionNameServerReplica("localhost")) 61 | jobInjector.timeout = 100.millis 62 | jobInjector.idleTimeout = 60.seconds 63 | jobInjector.threadPool.minThreads = 30 64 | 65 | val databaseConnection = new Credentials { 66 | val hostnames = Seq("localhost") 67 | val database = "edges_development" 68 | urlOptions = Map("rewriteBatchedStatements" -> "true") 69 | } 70 | 71 | val edgesQueryEvaluator = new ProductionQueryEvaluator 72 | 73 | val lowLatencyQueryEvaluator = edgesQueryEvaluator 74 | 75 | val materializingQueryEvaluator = new ProductionQueryEvaluator { 76 | workPoolSize = 1 77 | database.pool = new ThrottledPoolingDatabase { 78 | size = workPoolSize 79 | openTimeout = 1.second 80 | } 81 | } 82 | 83 | class DevelopmentScheduler(val name: String) extends Scheduler { 84 | override val jobQueueName = name + "_jobs" 85 | val schedulerType = new KestrelScheduler { 86 | val queuePath = "." 87 | } 88 | 89 | errorLimit = 100 90 | errorRetryDelay = 15.minutes 91 | errorStrobeInterval = 1.second 92 | perFlushItemLimit = 100 93 | jitterRate = 0 94 | } 95 | 96 | val jobQueues = Map( 97 | Priority.High.id -> new DevelopmentScheduler("edges") { threads = 32 }, 98 | Priority.Medium.id -> new DevelopmentScheduler("copy") { threads = 12; errorRetryDelay = 60.seconds }, 99 | Priority.Low.id -> new DevelopmentScheduler("edges_slow") { threads = 2 } 100 | ) 101 | 102 | val adminConfig = new AdminServiceConfig { 103 | httpPort = Some(9990) 104 | } 105 | 106 | loggers = List(new LoggerConfig { 107 | level = Some(Level.INFO) 108 | handlers = List( 109 | new ThrottledHandlerConfig { 110 | duration = 60.seconds 111 | maxToDisplay = 10 112 | handler = new FileHandlerConfig { 113 | filename = "development.log" 114 | roll = Policy.Hourly 115 | } 116 | } 117 | ) 118 | }) 119 | } 120 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/EdgesSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package unit 19 | 20 | import com.twitter.gizzard.scheduler._ 21 | import com.twitter.gizzard.shards.ShardInfo 22 | import com.twitter.util.{Future, Time} 23 | import com.twitter.conversions.time._ 24 | import org.specs.mock.{ClassMocker, JMocker} 25 | import jobs.single.Single 26 | import shards.Shard 27 | import State._ 28 | import com.twitter.flockdb.operations._ 29 | 30 | 31 | object EdgesSpec extends ConfiguredSpecification with JMocker with ClassMocker { 32 | "Edges" should { 33 | val FOLLOWS = 1 34 | 35 | val bob = 1L 36 | val mary = 2L 37 | 38 | val nestedJob = capturingParam[JsonNestedJob] 39 | val uuidGenerator = mock[UuidGenerator] 40 | val forwardingManager = mock[ForwardingManager] 41 | val shard = mock[Shard] 42 | val scheduler = mock[PrioritizingJobScheduler] 43 | val flock = new EdgesService(forwardingManager, scheduler, config.intersectionQuery, config.aggregateJobsPageSize) 44 | 45 | def toExecuteOperations(e: Execute) = ExecuteOperations(e.toOperations, None, Priority.High) 46 | 47 | "add" in { 48 | Time.withCurrentTimeFrozen { time => 49 | val job = new Single(bob, FOLLOWS, mary, Time.now.inMillis, State.Normal, Time.now, null, null) 50 | expect { 51 | one(forwardingManager).find(0, FOLLOWS, Direction.Forward) 52 | one(scheduler).put(will(beEqual(Priority.High.id)), nestedJob.capture) 53 | } 54 | flock.execute(toExecuteOperations(Select(bob, FOLLOWS, mary).add))() 55 | jsonMatching(List(job), nestedJob.captured.jobs) 56 | } 57 | } 58 | 59 | "add_at" in { 60 | Time.withCurrentTimeFrozen { time => 61 | val job = new Single(bob, FOLLOWS, mary, Time.now.inMillis, State.Normal, Time.now, null, null) 62 | expect { 63 | one(forwardingManager).find(0, FOLLOWS, Direction.Forward) 64 | one(scheduler).put(will(beEqual(Priority.High.id)), nestedJob.capture) 65 | } 66 | flock.execute(toExecuteOperations(Select(bob, FOLLOWS, mary).addAt(Time.now)))() 67 | jsonMatching(List(job), nestedJob.captured.jobs) 68 | } 69 | } 70 | 71 | "remove" in { 72 | Time.withCurrentTimeFrozen { time => 73 | val job = new Single(bob, FOLLOWS, mary, Time.now.inMillis, State.Removed, Time.now, null, null) 74 | expect { 75 | one(forwardingManager).find(0, FOLLOWS, Direction.Forward) 76 | one(scheduler).put(will(beEqual(Priority.High.id)), nestedJob.capture) 77 | } 78 | flock.execute(toExecuteOperations(Select(bob, FOLLOWS, mary).remove))() 79 | jsonMatching(List(job), nestedJob.captured.jobs) 80 | } 81 | } 82 | 83 | "remove_at" in { 84 | Time.withCurrentTimeFrozen { time => 85 | val job = new Single(bob, FOLLOWS, mary, Time.now.inMillis, State.Removed, Time.now, null, null) 86 | expect { 87 | one(forwardingManager).find(0, FOLLOWS, Direction.Forward) 88 | one(scheduler).put(will(beEqual(Priority.High.id)), nestedJob.capture) 89 | } 90 | flock.execute(toExecuteOperations(Select(bob, FOLLOWS, mary).removeAt(Time.now)))() 91 | jsonMatching(List(job), nestedJob.captured.jobs) 92 | } 93 | } 94 | 95 | "contains" in { 96 | Time.withCurrentTimeFrozen { time => 97 | expect { 98 | one(forwardingManager).find(bob, FOLLOWS, Direction.Forward) willReturn shard 99 | one(shard).get(bob, mary) willReturn Future(Some(new Edge(bob, mary, 0, Time.now, 0, State.Normal))) 100 | } 101 | flock.contains(bob, FOLLOWS, mary)() must beTrue 102 | } 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/queries/SelectCompiler.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package queries 19 | 20 | import scala.collection.mutable 21 | import com.twitter.gizzard.Stats 22 | import operations.{SelectOperation, SelectOperationType} 23 | import thrift.FlockException 24 | 25 | 26 | class InvalidQueryException(reason: String) extends FlockException(reason) 27 | 28 | class SelectCompiler(forwardingManager: ForwardingManager, intersectionConfig: config.IntersectionQuery) { 29 | 30 | private def validateProgram(acc: Int, op: SelectOperation) = op.operationType match { 31 | case SelectOperationType.SimpleQuery => acc + 1 32 | case SelectOperationType.Intersection => 33 | if (acc < 2) throw new InvalidQueryException("Need two sub-queries to do an intersection") 34 | acc - 1 35 | case SelectOperationType.Union => 36 | if (acc < 2) throw new InvalidQueryException("Need two sub-queries to do a union") 37 | acc - 1 38 | case SelectOperationType.Difference => 39 | if (acc < 2) throw new InvalidQueryException("Need two sub-queries to do a difference") 40 | acc - 1 41 | case n => throw new InvalidQueryException("Unknown operation " + n) 42 | } 43 | 44 | def apply(program: Seq[SelectOperation]): Query = { 45 | 46 | // program is a list representation of a compound query in reverse polish (postfix) notation 47 | // with one literal (SimpleQuery) and three binary operators (Intersection, Union, Difference) 48 | // left fold over list to ensure that a valid parsing exists 49 | val items = program.foldLeft(0)(validateProgram) 50 | if (items != 1) throw new InvalidQueryException("Left " + items + " items on the stack instaed of 1") 51 | 52 | var stack = new mutable.Stack[QueryTree] 53 | val graphIds = new mutable.HashSet[String] 54 | for (op <- program) op.operationType match { 55 | case SelectOperationType.SimpleQuery => 56 | val term = op.term.get 57 | 58 | // denote n for a backwards query 59 | graphIds += (if (term.isForward) "" else "n") + term.graphId 60 | 61 | val shard = forwardingManager.find(term.sourceId, term.graphId, Direction(term.isForward)) 62 | val states = if (term.states.isEmpty) List(State.Normal) else term.states 63 | val query = if (term.destinationIds.isDefined) { 64 | new WhereInQuery(shard, term.sourceId, states, term.destinationIds.get) 65 | } else { 66 | new SimpleQuery(shard, term.sourceId, states) 67 | } 68 | stack.push(query) 69 | case SelectOperationType.Intersection => 70 | stack.push(intersectionConfig.intersect(stack.pop, stack.pop)) 71 | case SelectOperationType.Union => 72 | stack.push(new UnionQuery(stack.pop, stack.pop)) 73 | case SelectOperationType.Difference => 74 | val rightSide = stack.pop 75 | val leftSide = stack.pop 76 | stack.push(intersectionConfig.difference(leftSide, rightSide)) 77 | } 78 | val rv = stack.pop 79 | 80 | // complexity == 0 indicates only a single literal (no binary operators) -- program is length 1 81 | val complexity = rv.getComplexity() 82 | val name = if (complexity > 0) { 83 | "select-complex-"+complexity 84 | } else { 85 | "select" + (rv match { 86 | case query: WhereInQuery => if (program.head.term.get.destinationIds.get.size == 1) "-single" else "-simple" 87 | case query: SimpleQuery => if (program.head.term.get.states.size > 1) "-multistate" else "" 88 | }) 89 | } 90 | 91 | // collect stats per graph 92 | for (graphId <- graphIds) { 93 | Stats.incr(name + "-graph_" + graphId + "-count"); 94 | } 95 | 96 | Stats.transaction.record("Query Plan: "+rv.toString) 97 | Stats.transaction.name = name 98 | rv 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /config/test.scala: -------------------------------------------------------------------------------- 1 | import com.twitter.flockdb.config._ 2 | import com.twitter.gizzard.config._ 3 | import com.twitter.gizzard.TransactionalStatsProvider 4 | import com.twitter.querulous.config._ 5 | import com.twitter.querulous.database.DatabaseFactory 6 | import com.twitter.querulous.query.QueryFactory 7 | import com.twitter.querulous.StatsCollector 8 | import com.twitter.conversions.time._ 9 | import com.twitter.conversions.storage._ 10 | import com.twitter.flockdb.shards.QueryClass 11 | import com.twitter.flockdb.{MemoizedQueryEvaluators, Priority} 12 | import com.twitter.ostrich.admin.config.AdminServiceConfig 13 | import com.twitter.logging.{Level, Logger} 14 | import com.twitter.logging.config.{FileHandlerConfig, LoggerConfig} 15 | 16 | 17 | trait Credentials extends Connection { 18 | import scala.collection.JavaConversions._ 19 | val env = System.getenv().toMap 20 | val username = env.get("DB_USERNAME").getOrElse("root") 21 | val password = env.get("DB_PASSWORD").getOrElse("") 22 | urlOptions = Map("connectTimeout" -> "0") 23 | } 24 | 25 | class TestQueryEvaluator(label: String) extends AsyncQueryEvaluator { 26 | query.debug = { s => Logger.get("query").debug(s) } 27 | override var workPoolSize = 2 28 | singletonFactory = true 29 | database.memoize = true 30 | database.pool = new ThrottledPoolingDatabase { 31 | size = workPoolSize 32 | openTimeout = 5.seconds 33 | } 34 | 35 | query.timeouts = Map( 36 | QueryClass.Select -> QueryTimeout(5.seconds), 37 | QueryClass.SelectModify -> QueryTimeout(5.seconds), 38 | QueryClass.SelectCopy -> QueryTimeout(15.seconds), 39 | QueryClass.Execute -> QueryTimeout(5.seconds), 40 | QueryClass.SelectSingle -> QueryTimeout(5.seconds), 41 | QueryClass.SelectIntersection -> QueryTimeout(5.seconds), 42 | QueryClass.SelectIntersectionSmall -> QueryTimeout(5.seconds), 43 | QueryClass.SelectMetadata -> QueryTimeout(5.seconds) 44 | ) 45 | } 46 | 47 | class NameserverQueryEvaluator extends QueryEvaluator { 48 | singletonFactory = true 49 | database.memoize = true 50 | database.pool = new ThrottledPoolingDatabase { 51 | size = 1 52 | openTimeout = 5.seconds 53 | } 54 | } 55 | 56 | new FlockDB { 57 | mappingFunction = Identity 58 | jobRelay = NoJobRelay 59 | 60 | nameServerReplicas = Seq(new Mysql { 61 | queryEvaluator = new NameserverQueryEvaluator 62 | 63 | val connection = new Connection with Credentials { 64 | val hostnames = Seq("localhost") 65 | val database = "flock_edges_test" 66 | } 67 | }) 68 | 69 | jobInjector.timeout = 100.milliseconds 70 | jobInjector.idleTimeout = 60.seconds 71 | jobInjector.threadPool.minThreads = 30 72 | 73 | // Database Connectivity 74 | 75 | val databaseConnection = new Credentials { 76 | val hostnames = Seq("localhost") 77 | val database = "edges_test" 78 | } 79 | 80 | val edgesQueryEvaluator = new TestQueryEvaluator("edges") 81 | val lowLatencyQueryEvaluator = edgesQueryEvaluator 82 | val materializingQueryEvaluator = edgesQueryEvaluator 83 | 84 | // schedulers 85 | 86 | class TestScheduler(val name: String) extends Scheduler { 87 | jobQueueName = name + "_jobs" 88 | 89 | val schedulerType = new KestrelScheduler { 90 | path = "/tmp" 91 | keepJournal = false 92 | maxMemorySize = 36.megabytes 93 | } 94 | 95 | threads = 2 96 | errorLimit = 25 97 | errorRetryDelay = 900.seconds 98 | errorStrobeInterval = 30.seconds 99 | perFlushItemLimit = 1000 100 | jitterRate = 0.0f 101 | } 102 | 103 | val jobQueues = Map( 104 | Priority.High.id -> new TestScheduler("edges"), 105 | Priority.Medium.id -> new TestScheduler("copy"), 106 | Priority.Low.id -> new TestScheduler("edges_slow") 107 | ) 108 | 109 | 110 | // Admin/Logging 111 | 112 | val adminConfig = new AdminServiceConfig { 113 | httpPort = Some(9990) 114 | } 115 | 116 | loggers = List(new LoggerConfig { 117 | level = Some(Level.INFO) 118 | handlers = List(new FileHandlerConfig { filename = "test.log" }) 119 | }) 120 | 121 | queryStats.consumers = Seq(new AuditingTransactionalStatsConsumer { 122 | names = Set("execute") 123 | override def apply() = { new com.twitter.gizzard.AuditingTransactionalStatsConsumer(new com.twitter.gizzard.LoggingTransactionalStatsConsumer("audit_log") { 124 | def transactionToString(t: TransactionalStatsProvider) = { t.get("job").asInstanceOf[String] } 125 | }, names)}}) 126 | } 127 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/jobs/Legacy.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb.jobs 18 | 19 | import com.twitter.logging.Logger 20 | import com.twitter.util.Time 21 | import com.twitter.gizzard.scheduler._ 22 | import com.twitter.gizzard.shards._ 23 | import com.twitter.flockdb.{State, ForwardingManager, Cursor, UuidGenerator, Direction, Priority} 24 | import com.twitter.flockdb.conversions.Numeric._ 25 | import com.twitter.flockdb.jobs.single.Single 26 | import com.twitter.flockdb.jobs.multi.Multi 27 | 28 | 29 | // Legacy parsers for old format jobs without state 30 | // XXX: remove once we're off of the old format, or factor out common code with above. 31 | 32 | object LegacySingleJobParser { 33 | def Add(forwardingManager: ForwardingManager, uuidGenerator: UuidGenerator) = { 34 | new LegacySingleJobParser(forwardingManager, uuidGenerator, State.Normal) 35 | } 36 | 37 | def Negate(forwardingManager: ForwardingManager, uuidGenerator: UuidGenerator) = { 38 | new LegacySingleJobParser(forwardingManager, uuidGenerator, State.Negative) 39 | } 40 | 41 | def Archive(forwardingManager: ForwardingManager, uuidGenerator: UuidGenerator) = { 42 | new LegacySingleJobParser(forwardingManager, uuidGenerator, State.Archived) 43 | } 44 | 45 | def Remove(forwardingManager: ForwardingManager, uuidGenerator: UuidGenerator) = { 46 | new LegacySingleJobParser(forwardingManager, uuidGenerator, State.Removed) 47 | } 48 | } 49 | 50 | object LegacyMultiJobParser { 51 | def Archive( 52 | forwardingManager: ForwardingManager, 53 | scheduler: PrioritizingJobScheduler, 54 | aggregateJobPageSize: Int 55 | ) = { 56 | new LegacyMultiJobParser(forwardingManager, scheduler, aggregateJobPageSize, State.Archived) 57 | } 58 | 59 | def Unarchive( 60 | forwardingManager: ForwardingManager, 61 | scheduler: PrioritizingJobScheduler, 62 | aggregateJobPageSize: Int 63 | ) = { 64 | new LegacyMultiJobParser(forwardingManager, scheduler, aggregateJobPageSize, State.Normal) 65 | } 66 | 67 | def RemoveAll( 68 | forwardingManager: ForwardingManager, 69 | scheduler: PrioritizingJobScheduler, 70 | aggregateJobPageSize: Int 71 | ) = { 72 | new LegacyMultiJobParser(forwardingManager, scheduler, aggregateJobPageSize, State.Removed) 73 | } 74 | 75 | def Negate( 76 | forwardingManager: ForwardingManager, 77 | scheduler: PrioritizingJobScheduler, 78 | aggregateJobPageSize: Int 79 | ) = { 80 | new LegacyMultiJobParser(forwardingManager, scheduler, aggregateJobPageSize, State.Negative) 81 | } 82 | } 83 | 84 | class LegacySingleJobParser( 85 | forwardingManager: ForwardingManager, 86 | uuidGenerator: UuidGenerator, 87 | state: State) 88 | extends JsonJobParser { 89 | 90 | def log = Logger.get 91 | 92 | def apply(attributes: Map[String, Any]): JsonJob = { 93 | val casted = attributes.asInstanceOf[Map[String, AnyVal]] 94 | 95 | new Single( 96 | casted("source_id").toLong, 97 | casted("graph_id").toInt, 98 | casted("destination_id").toLong, 99 | casted("position").toLong, 100 | state, // ONLY DIFFERENCE FROM SingleJobParser 101 | Time.fromSeconds(casted("updated_at").toInt), 102 | forwardingManager, 103 | uuidGenerator 104 | ) 105 | } 106 | } 107 | 108 | class LegacyMultiJobParser( 109 | forwardingManager: ForwardingManager, 110 | scheduler: PrioritizingJobScheduler, 111 | aggregateJobPageSize: Int, 112 | state: State) 113 | extends JsonJobParser { 114 | 115 | def apply(attributes: Map[String, Any]): JsonJob = { 116 | val casted = attributes.asInstanceOf[Map[String, AnyVal]] 117 | 118 | new Multi( 119 | casted("source_id").toLong, 120 | casted("graph_id").toInt, 121 | Direction(casted("direction").toInt), 122 | state, 123 | Time.fromSeconds(casted("updated_at").toInt), 124 | Priority(casted.get("priority").map(_.toInt).getOrElse(Priority.Low.id)), 125 | aggregateJobPageSize, 126 | forwardingManager, 127 | scheduler 128 | ) 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /config/production.scala: -------------------------------------------------------------------------------- 1 | import com.twitter.flockdb.config._ 2 | import com.twitter.gizzard.config._ 3 | import com.twitter.querulous.config._ 4 | import com.twitter.querulous.StatsCollector 5 | import com.twitter.conversions.time._ 6 | import com.twitter.conversions.storage._ 7 | import com.twitter.flockdb.shards.QueryClass 8 | import com.twitter.flockdb.Priority 9 | import com.twitter.ostrich.admin.config.AdminServiceConfig 10 | import com.twitter.logging.Level 11 | import com.twitter.logging.config._ 12 | 13 | trait Credentials extends Connection { 14 | val username = "root" 15 | val password = "" 16 | } 17 | 18 | class ProductionQueryEvaluator extends AsyncQueryEvaluator { 19 | override var workPoolSize = 40 20 | database.memoize = true 21 | database.pool = new ThrottledPoolingDatabase { 22 | size = workPoolSize 23 | openTimeout = 100.millis 24 | } 25 | 26 | query.timeouts = Map( 27 | QueryClass.Select -> QueryTimeout(1.second), 28 | QueryClass.Execute -> QueryTimeout(1.second), 29 | QueryClass.SelectCopy -> QueryTimeout(15.seconds), 30 | QueryClass.SelectModify -> QueryTimeout(3.seconds), 31 | QueryClass.SelectSingle -> QueryTimeout(1.second), 32 | QueryClass.SelectIntersection -> QueryTimeout(1.second), 33 | QueryClass.SelectIntersectionSmall -> QueryTimeout(1.second), 34 | QueryClass.SelectMetadata -> QueryTimeout(1.second) 35 | ) 36 | } 37 | 38 | class ProductionNameServerReplica(host: String) extends Mysql { 39 | val connection = new Connection with Credentials { 40 | val hostnames = Seq(host) 41 | val database = "flock_edges_production" 42 | } 43 | 44 | queryEvaluator = new QueryEvaluator { 45 | database.memoize = true 46 | database.pool = new ThrottledPoolingDatabase { 47 | size = 1 48 | openTimeout = 1.second 49 | } 50 | } 51 | } 52 | 53 | new FlockDB { 54 | mappingFunction = ByteSwapper 55 | jobRelay = NoJobRelay 56 | 57 | nameServerReplicas = Seq( 58 | new ProductionNameServerReplica("flockdb001.twitter.com"), 59 | new ProductionNameServerReplica("flockdb002.twitter.com") 60 | ) 61 | 62 | jobInjector.timeout = 100.millis 63 | jobInjector.idleTimeout = 60.seconds 64 | jobInjector.threadPool.minThreads = 30 65 | 66 | val databaseConnection = new Credentials { 67 | val hostnames = Seq("localhost") 68 | val database = "edges" 69 | urlOptions = Map("rewriteBatchedStatements" -> "true") 70 | } 71 | 72 | val edgesQueryEvaluator = new ProductionQueryEvaluator 73 | val lowLatencyQueryEvaluator = new ProductionQueryEvaluator 74 | 75 | val materializingQueryEvaluator = new ProductionQueryEvaluator { 76 | workPoolSize = 1 77 | database.pool = new ThrottledPoolingDatabase { 78 | size = workPoolSize 79 | openTimeout = 1.second 80 | } 81 | } 82 | 83 | class ProductionScheduler(val name: String) extends Scheduler { 84 | jobQueueName = name + "_jobs" 85 | 86 | val schedulerType = new KestrelScheduler { 87 | path = "/var/spool/kestrel" 88 | maxMemorySize = 36.megabytes 89 | } 90 | 91 | errorLimit = 100 92 | errorRetryDelay = 15.minutes 93 | errorStrobeInterval = 1.second 94 | perFlushItemLimit = 100 95 | jitterRate = 0 96 | } 97 | 98 | val jobQueues = Map( 99 | Priority.High.id -> new ProductionScheduler("edges") { threads = 32 }, 100 | Priority.Medium.id -> new ProductionScheduler("copy") { threads = 12; errorRetryDelay = 60.seconds }, 101 | Priority.Low.id -> new ProductionScheduler("edges_slow") { threads = 2 } 102 | ) 103 | 104 | val adminConfig = new AdminServiceConfig { 105 | httpPort = Some(9990) 106 | } 107 | 108 | loggers = List( 109 | new LoggerConfig { 110 | level = Some(Level.INFO) 111 | handlers = List( 112 | new ThrottledHandlerConfig { 113 | duration = 60.seconds 114 | maxToDisplay = 10 115 | handler = new FileHandlerConfig { 116 | filename = "/var/log/flock/production.log" 117 | roll = Policy.Hourly 118 | } 119 | }) 120 | }, 121 | new LoggerConfig { 122 | node = "stats" 123 | useParents = false 124 | level = Some(Level.INFO) 125 | handlers = List(new ScribeHandlerConfig { 126 | category = "flock-stats" 127 | }) 128 | }, 129 | new LoggerConfig { 130 | node = "bad_jobs" 131 | useParents = false 132 | level = Some(Level.INFO) 133 | handlers = List(new FileHandlerConfig { 134 | roll = Policy.Never 135 | filename = "/var/log/flock/bad_jobs.log" 136 | }) 137 | }) 138 | } 139 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/integration/OptimisticLockRegressionSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package integration 19 | 20 | import org.specs.mock.{ClassMocker, JMocker} 21 | import org.specs.util.{Duration => SpecsDuration} 22 | import org.specs.matcher.Matcher 23 | import com.twitter.gizzard.scheduler.{JsonJob, PrioritizingJobScheduler} 24 | import com.twitter.gizzard.shards._ 25 | import com.twitter.gizzard.nameserver.NameServer 26 | import com.twitter.util.Time 27 | import com.twitter.conversions.time._ 28 | import com.twitter.flockdb.operations._ 29 | import jobs.single._ 30 | import shards.{Shard, SqlShard} 31 | 32 | 33 | class OptimisticLockRegressionSpec extends IntegrationSpecification() { 34 | val FOLLOWS = 1 35 | val alice = 1 36 | 37 | val MIN = 3 38 | val MAX = 100 39 | val errorLimit = 5 40 | 41 | "Inserting conflicting items" should { 42 | "recover via the optimistic lock" in { 43 | reset(config) 44 | 45 | val scheduler = flock.jobScheduler(Priority.High.id) 46 | val errors = scheduler.errorQueue 47 | 48 | // No thrift api for this, so this is the best I know how to do. 49 | scheduler.put(new Single(1, FOLLOWS, 5106, 123456, State.Normal, Time.now, flock.forwardingManager, OrderedUuidGenerator)) 50 | 51 | execute(Select(1, FOLLOWS, ()).archive) 52 | 53 | playNormalJobs() 54 | 55 | var found = false 56 | while (errors.size > 0) { 57 | val job = errors.get.get.job 58 | if (job.errorMessage.indexOf("lost optimistic lock") > 0) { 59 | found = true 60 | } 61 | job() 62 | } 63 | playScheduledJobs() 64 | 65 | found mustEqual true 66 | 67 | flockService.get(1, FOLLOWS, 5106)().state must eventually(be_==(State.Archived)) 68 | } 69 | 70 | 71 | "still work even if we spam a ton of operations" in { 72 | // println("gogo") 73 | reset(config) 74 | 75 | val scheduler = flock.jobScheduler(Priority.High.id) 76 | val errors = scheduler.errorQueue 77 | 78 | // println("spamming edges") 79 | for(i <- 1 to 500) { 80 | (i % 2) match { 81 | case 0 => execute(Select(1, FOLLOWS, i).add) 82 | case 1 => execute(Select(1, FOLLOWS, i).archive) 83 | } 84 | } 85 | 86 | // println("spamming removes") 87 | for(i <- 1 to 50) { 88 | execute(Select((), FOLLOWS, i * 10).remove) 89 | } 90 | 91 | // println("spamming bulks") 92 | for(i <- 1 to 10) { 93 | (i % 2) match { 94 | case 0 => execute(Select(1, FOLLOWS, ()).add) 95 | case 1 => execute(Select(1, FOLLOWS, ()).archive) 96 | } 97 | } 98 | 99 | // println("final state") 100 | execute(Select(1, FOLLOWS, ()).archive) 101 | 102 | // println("draining") 103 | playNormalJobs() 104 | 105 | while (errors.size > 0) { 106 | // println("looping through the error queue") 107 | val job = errors.get.get.job 108 | try { 109 | job() 110 | } catch { 111 | case e => { 112 | job.errorCount += 1 113 | job.errorMessage = e.toString 114 | if (job.errorCount > errorLimit) { 115 | throw new RuntimeException("too many bad jobs") 116 | } else { 117 | errors.put(job) 118 | } 119 | } 120 | } 121 | 122 | playNormalJobs() 123 | } 124 | 125 | Thread.sleep(1000) 126 | 127 | val selectArchived = SimpleSelect( 128 | SelectOperation( 129 | SelectOperationType.SimpleQuery, 130 | Some(QueryTerm(alice, FOLLOWS, true, None, List(State.Archived))) 131 | ) 132 | ) 133 | 134 | count(selectArchived) must eventually(be_==(450)) 135 | count(Select(1, FOLLOWS, ())) mustEqual 0 136 | 137 | for(i <- 1 to 500) { 138 | (i % 10) match { 139 | case 0 => () 140 | case _ => flockService.get(1, FOLLOWS, i)().state mustEqual State.Archived 141 | } 142 | } 143 | } 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/Select.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | import com.twitter.conversions.time._ 20 | import com.twitter.util.Time 21 | import com.twitter.flockdb.operations.SelectOperationType._ 22 | import com.twitter.flockdb.operations._ 23 | 24 | 25 | object Select { 26 | def apply(sourceId: Unit, graphId: Int, destinationId: Long) = { 27 | new SimpleSelect(new SelectOperation(SimpleQuery, Some(new QueryTerm(destinationId, graphId, false, None, List(State.Normal))))) 28 | } 29 | 30 | def apply(sourceId: Long, graphId: Int, destinationId: Unit) = { 31 | new SimpleSelect(new SelectOperation(SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal))))) 32 | } 33 | 34 | def apply(sourceId: Long, graphId: Int, destinationId: Long) = { 35 | new SimpleSelect(new SelectOperation(SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, Some(List[Long](destinationId)), List(State.Normal))))) 36 | } 37 | 38 | def apply(sourceId: Long, graphId: Int, destinationIds: Seq[Long]) = { 39 | new SimpleSelect(new SelectOperation(SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, Some(destinationIds), List(State.Normal))))) 40 | } 41 | 42 | def apply(sourceIds: Seq[Long], graphId: Int, destinationId: Long) = { 43 | new SimpleSelect(new SelectOperation(SimpleQuery, Some(new QueryTerm(destinationId, graphId, false, Some(sourceIds), List(State.Normal))))) 44 | } 45 | } 46 | 47 | trait Select { 48 | def toList: List[SelectOperation] 49 | def intersect(that: Select): Select = new CompoundSelect(Intersection, this, that) 50 | def difference(that: Select): Select = new CompoundSelect(Difference, this, that) 51 | } 52 | 53 | trait Execute { 54 | def toOperations: List[ExecuteOperation] 55 | def at(time: Time): Execute 56 | def +(execute: Execute): Execute 57 | } 58 | 59 | // FIXME this is infinity-select not null :) 60 | object NullSelect extends Select { 61 | override def intersect(that: Select) = that 62 | def toList = { throw new Exception("Not Applicable") } 63 | } 64 | 65 | case class SimpleSelect(operation: SelectOperation) extends Select { 66 | def toList = List(operation) 67 | 68 | def addAt(at: Time) = execute(ExecuteOperationType.Add, at) 69 | def add = addAt(Time.now) 70 | def archiveAt(at: Time) = execute(ExecuteOperationType.Archive, at) 71 | def archive = archiveAt(Time.now) 72 | def removeAt(at: Time) = execute(ExecuteOperationType.Remove, at) 73 | def remove = removeAt(Time.now) 74 | def negateAt(at: Time) = execute(ExecuteOperationType.Negate, at) 75 | def negate = negateAt(Time.now) 76 | private def execute(executeOperationType: ExecuteOperationType.Value, at: Time) = 77 | new SimpleExecute(new ExecuteOperation(executeOperationType, operation.term.get, 78 | Some(Time.now.inMillis)), at) 79 | 80 | def negative = { 81 | val negativeOperation = operation.clone 82 | negativeOperation.term.get.states = List(State.Negative) 83 | new SimpleSelect(negativeOperation) 84 | } 85 | 86 | def states(states: State*) = { 87 | val statefulOperation = operation.clone 88 | statefulOperation.term.get.states = states 89 | new SimpleSelect(statefulOperation) 90 | } 91 | } 92 | 93 | case class CompoundSelect(operation: SelectOperationType.Value, operand1: Select, operand2: Select) extends Select { 94 | def toList = operand1.toList ++ operand2.toList ++ List(new SelectOperation(operation, None)) 95 | } 96 | 97 | case class SimpleExecute(operation: ExecuteOperation, at: Time) extends Execute { 98 | def toOperations = List(operation) 99 | def at(time: Time) = new SimpleExecute(operation, time) 100 | def +(execute: Execute) = new CompoundExecute(this, execute, at, Priority.High) 101 | } 102 | 103 | case class CompoundExecute(operand1: Execute, operand2: Execute, at: Time, priority: Priority.Value) extends Execute { 104 | def toOperations = operand1.toOperations ++ operand2.toOperations 105 | 106 | def +(execute: Execute) = new CompoundExecute(this, execute, at, priority) 107 | def withPriority(priority: Priority.Value) = new CompoundExecute(operand2, operand2, at, priority) 108 | def at(time: Time) = new CompoundExecute(operand1, operand2, time, priority) 109 | } 110 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | # STATUS 2 | 3 | Twitter is no longer maintaining this project or responding to issues or PRs. 4 | 5 | # FlockDB 6 | 7 | FlockDB is a distributed graph database for storing adjancency lists, with 8 | goals of supporting: 9 | 10 | - a high rate of add/update/remove operations 11 | - potientially complex set arithmetic queries 12 | - paging through query result sets containing millions of entries 13 | - ability to "archive" and later restore archived edges 14 | - horizontal scaling including replication 15 | - online data migration 16 | 17 | Non-goals include: 18 | 19 | - multi-hop queries (or graph-walking queries) 20 | - automatic shard migrations 21 | 22 | FlockDB is much simpler than other graph databases such as neo4j because it 23 | tries to solve fewer problems. It scales horizontally and is designed for 24 | on-line, low-latency, high throughput environments such as web-sites. 25 | 26 | Twitter uses FlockDB to store social graphs (who follows whom, who blocks 27 | whom) and secondary indices. As of April 2010, the Twitter FlockDB cluster 28 | stores 13+ billion edges and sustains peak traffic of 20k writes/second and 29 | 100k reads/second. 30 | 31 | 32 | # It does what? 33 | 34 | If, for example, you're storing a social graph (user A follows user B), and 35 | it's not necessarily symmetrical (A can follow B without B following A), then 36 | FlockDB can store that relationship as an edge: node A points to node B. It 37 | stores this edge with a sort position, and in both directions, so that it can 38 | answer the question "Who follows A?" as well as "Whom is A following?" 39 | 40 | This is called a directed graph. (Technically, FlockDB stores the adjacency 41 | lists of a directed graph.) Each edge has a 64-bit source ID, a 64-bit 42 | destination ID, a state (normal, removed, archived), and a 32-bit position 43 | used for sorting. The edges are stored in both a forward and backward 44 | direction, meaning that an edge can be queried based on either the source or 45 | destination ID. 46 | 47 | For example, if node 134 points to node 90, and its sort position is 5, then 48 | there are two rows written into the backing store: 49 | 50 | forward: 134 -> 90 at position 5 51 | backward: 90 <- 134 at position 5 52 | 53 | If you're storing a social graph, the graph might be called "following", and 54 | you might use the current time as the position, so that a listing of followers 55 | is in recency order. In that case, if user 134 is Nick, and user 90 is Robey, 56 | then FlockDB can store: 57 | 58 | forward: Nick follows Robey at 9:54 today 59 | backward: Robey is followed by Nick at 9:54 today 60 | 61 | The (source, destination) must be unique: only one edge can point from node A 62 | to node B, but the position and state may be modified at any time. Position is 63 | used only for sorting the results of queries, and state is used to mark edges 64 | that have been removed or archived (placed into cold sleep). 65 | 66 | 67 | # Building 68 | 69 | In theory, building is as simple as 70 | 71 | $ sbt clean update package-dist 72 | 73 | but there are some pre-requisites. You need: 74 | 75 | - java 1.6 76 | - sbt 0.7.4 77 | - thrift 0.5.0 78 | 79 | If you haven't used sbt before, this page has a quick setup: 80 | [http://code.google.com/p/simple-build-tool/wiki/Setup](http://code.google.com/p/simple-build-tool/wiki/Setup). 81 | My `~/bin/sbt` looks like this: 82 | 83 | #!/bin/bash 84 | java -server -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=256m -Xmx1024m -jar `dirname $0`/sbt-launch-0.7.4.jar "$@" 85 | 86 | Apache Thrift 0.5.0 is pre-requisite for building java stubs of the thrift 87 | IDL. It can't be installed via jar, so you'll need to install it separately 88 | before you build. It can be found on the apache thrift site: 89 | [http://thrift.apache.org/](http://thrift.apache.org/). 90 | You can find the download for 0.5.0 here: 91 | [http://archive.apache.org/dist/incubator/thrift/0.5.0-incubating/](http://archive.apache.org/dist/incubator/thrift/0.5.0-incubating/). 92 | 93 | In addition, the tests require a local mysql instance to be running, and for 94 | `DB_USERNAME` and `DB_PASSWORD` env vars to contain login info for it. You can 95 | skip the tests if you want (but you should feel a pang of guilt): 96 | 97 | $ NO_TESTS=1 sbt package-dist 98 | 99 | 100 | # Running 101 | 102 | Check out 103 | [the demo](http://github.com/twitter/flockdb/blob/master/doc/demo.markdown) 104 | for instructions on how to start up a local development instance of FlockDB. 105 | It also shows how to add edges, query them, etc. 106 | 107 | 108 | # Community 109 | 110 | - Twitter: #flockdb 111 | - IRC: #twinfra on freenode (irc.freenode.net) 112 | - Mailing list: [subscribe](http://groups.google.com/group/flockdb) 113 | 114 | 115 | # Contributors 116 | 117 | - Nick Kallen @nk 118 | - Robey Pointer @robey 119 | - John Kalucki @jkalucki 120 | - Ed Ceaser @asdf 121 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/ResultWindow.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | import scala.util.Sorting 20 | import com.twitter.util.Time 21 | import com.twitter.util.TimeConversions._ 22 | 23 | 24 | case class ResultWindowRow[T](id: T, cursor: Cursor) extends Ordered[ResultWindowRow[T]] { 25 | def compare(that: ResultWindowRow[T]) = that.cursor.compare(cursor) 26 | } 27 | 28 | class ResultWindowRows[T](data: Seq[ResultWindowRow[T]]) extends Seq[ResultWindowRow[T]] { 29 | def length = data.length 30 | def apply(i: Int) = data(i) 31 | def iterator = data.iterator 32 | } 33 | 34 | class ResultWindow[T](val data: ResultWindowRows[T], val inNextCursor: Cursor, val inPrevCursor: Cursor, val count: Int, val cursor: Cursor) extends Seq[T] { 35 | def this(data: Seq[(T, Cursor)], inNextCursor: Cursor, inPrevCursor: Cursor, count: Int, cursor: Cursor) = 36 | this(new ResultWindowRows(data.map { datum => ResultWindowRow(datum._1, datum._2) }), inNextCursor, inPrevCursor, count, cursor) 37 | def this(data: Seq[(T, Cursor)], count: Int, cursor: Cursor) = 38 | this(data, Cursor.End, Cursor.End, count, cursor) 39 | def this() = 40 | this(List[(T, Cursor)](), 0, Cursor.End) 41 | 42 | var page: Seq[ResultWindowRow[T]] = data 43 | var nextChanged, prevChanged = false 44 | if (cursor < Cursor.Start) { 45 | page = data.takeWhile(_.cursor > cursor.magnitude) 46 | nextChanged = page.size < data.size 47 | prevChanged = page.size > count 48 | page = page.drop(page.size - count) 49 | } else if (cursor == Cursor.Start) { 50 | nextChanged = page.size > count 51 | page = page.take(count) 52 | } else { 53 | page = data.dropWhile(_.cursor >= cursor) 54 | nextChanged = page.size > count 55 | prevChanged = page.size < data.size 56 | page = page.take(count) 57 | } 58 | val nextCursor = if (nextChanged && !page.isEmpty) page(page.size - 1).cursor else inNextCursor 59 | val prevCursor = if (prevChanged && !page.isEmpty) page(0).cursor.reverse else inPrevCursor 60 | 61 | def ++(other: ResultWindow[T]) = concat(other) 62 | 63 | def concat(other: ResultWindow[T], newCount: Int = count) = { 64 | if (cursor < Cursor.Start) { 65 | new ResultWindow(new ResultWindowRows(other.page ++ page), nextCursor, other.prevCursor, newCount, cursor) 66 | } else { 67 | new ResultWindow(new ResultWindowRows(page ++ other.page), other.nextCursor, prevCursor, newCount, cursor) 68 | } 69 | } 70 | 71 | 72 | def merge(other: ResultWindow[T]) = { 73 | val newPage = Sorting.stableSort((Set((page ++ other.page): _*)).toSeq) 74 | val newNextCursor = if (nextCursor == Cursor.End && other.nextCursor == Cursor.End) Cursor.End else newPage(newPage.size - 1).cursor 75 | val newPrevCursor = if (prevCursor == Cursor.End && other.prevCursor == Cursor.End) Cursor.End else newPage(0).cursor.reverse 76 | new ResultWindow(new ResultWindowRows(newPage), newNextCursor, newPrevCursor, count, cursor) 77 | } 78 | 79 | def --(values: Seq[T]) = diff(values) 80 | 81 | def diff(values: Seq[T], newCount: Int = count) = { 82 | val rejects = Set(values: _*) 83 | val newPage = page.filter { row => !rejects.contains(row.id) } 84 | val newNextCursor = if (nextCursor == Cursor.End || newPage.size == 0) Cursor.End else newPage(newPage.size - 1).cursor 85 | val newPrevCursor = if (prevCursor == Cursor.End || newPage.size == 0) Cursor.End else newPage(0).cursor.reverse 86 | new ResultWindow(new ResultWindowRows(newPage), newNextCursor, newPrevCursor, newCount, cursor) 87 | } 88 | 89 | def length = page.length 90 | def apply(i: Int) = page(i).id 91 | def iterator = page.view.map(_.id).iterator 92 | def continueCursor = if (cursor < Cursor.Start) prevCursor else nextCursor 93 | override def headOption = page.headOption.map { _.id } 94 | 95 | override def toString = (iterator.toList, nextCursor, prevCursor, count, cursor).toString 96 | 97 | override def equals(that: Any) = that match { 98 | case that: ResultWindow[_] => iterator.toList == that.iterator.toList && nextCursor == that.nextCursor && prevCursor == that.prevCursor && cursor == that.cursor 99 | case _ => false 100 | } 101 | 102 | // convenience method that makes for easier matching in tests 103 | def toTuple = (iterator.toList, nextCursor, prevCursor) 104 | } 105 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/jobs/multi/Multi.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb.jobs.multi 18 | 19 | import com.twitter.gizzard.scheduler._ 20 | import com.twitter.gizzard.shards.ShardBlackHoleException 21 | import com.twitter.util.Time 22 | import com.twitter.util.TimeConversions._ 23 | import com.twitter.flockdb.{State, ForwardingManager, Cursor, Priority, Direction} 24 | import com.twitter.flockdb.conversions.Numeric._ 25 | import com.twitter.flockdb.shards.Shard 26 | import com.twitter.flockdb.jobs.single.Single 27 | 28 | // TODO: Make this async. 29 | class MultiJobParser( 30 | forwardingManager: ForwardingManager, 31 | scheduler: PrioritizingJobScheduler, 32 | aggregateJobPageSize: Int) 33 | extends JsonJobParser { 34 | 35 | def apply(attributes: Map[String, Any]): JsonJob = { 36 | val casted = attributes.asInstanceOf[Map[String, AnyVal]] 37 | 38 | new Multi( 39 | casted("source_id").toLong, 40 | casted("graph_id").toInt, 41 | Direction(casted("direction").toInt), 42 | State(casted("state").toInt), 43 | Time.fromSeconds(casted("updated_at").toInt), 44 | Priority(casted.get("priority").map(_.toInt).getOrElse(Priority.Low.id)), 45 | aggregateJobPageSize, 46 | casted.get("cursor").map( c => Cursor(c.toLong)).getOrElse(Cursor.Start), 47 | forwardingManager, 48 | scheduler 49 | ) 50 | } 51 | } 52 | 53 | class Multi( 54 | sourceId: Long, 55 | graphId: Int, 56 | direction: Direction, 57 | preferredState: State, 58 | updatedAt: Time, 59 | priority: Priority.Value, 60 | aggregateJobPageSize: Int, 61 | var cursor: Cursor, 62 | forwardingManager: ForwardingManager, 63 | scheduler: PrioritizingJobScheduler) 64 | extends JsonJob { 65 | 66 | def this( 67 | sourceId: Long, 68 | graphId: Int, 69 | direction: Direction, 70 | preferredState: State, 71 | updatedAt: Time, 72 | priority: Priority.Value, 73 | aggregateJobPageSize: Int, 74 | forwardingManager: ForwardingManager, 75 | scheduler: PrioritizingJobScheduler 76 | ) = { 77 | this( 78 | sourceId, 79 | graphId, 80 | direction, 81 | preferredState, 82 | updatedAt, 83 | priority, 84 | aggregateJobPageSize, 85 | Cursor.Start, 86 | forwardingManager, 87 | scheduler 88 | ) 89 | } 90 | 91 | def toMap = Map( 92 | "source_id" -> sourceId, 93 | "updated_at" -> updatedAt.inSeconds, 94 | "graph_id" -> graphId, 95 | "direction" -> direction.id, 96 | "priority" -> priority.id, 97 | "state" -> preferredState.id, 98 | "cursor" -> cursor.position 99 | ) 100 | 101 | def apply() { 102 | val forwardShard = forwardingManager.find(sourceId, graphId, direction) 103 | 104 | if (cursor == Cursor.Start) try { 105 | updateMetadata(forwardShard, preferredState) 106 | } catch { 107 | case e: ShardBlackHoleException => return 108 | } 109 | 110 | while (cursor != Cursor.End) { 111 | val resultWindow = forwardShard.selectIncludingArchived(sourceId, aggregateJobPageSize, cursor)() 112 | 113 | val chunkOfTasks = resultWindow.map { destinationId => 114 | val (a, b) = if (direction == Direction.Backward) (destinationId, sourceId) else (sourceId, destinationId) 115 | singleEdgeJob(a, graphId, b, preferredState) 116 | } 117 | 118 | scheduler.put(priority.id, new JsonNestedJob(chunkOfTasks)) 119 | 120 | // "commit" the current iteration by saving the next cursor. 121 | // if the job blows up in the next round, it will be re-serialized 122 | // with this cursor. 123 | cursor = resultWindow.nextCursor 124 | } 125 | } 126 | 127 | // XXX: since this job gets immediately serialized, pass null for forwardingManager and uuidGenerator. 128 | protected def singleEdgeJob(sourceId: Long, graphId: Int, destinationId: Long, state: State) = { 129 | new Single(sourceId, graphId, destinationId, updatedAt.inMillis, state, updatedAt, null, null) 130 | } 131 | 132 | protected def updateMetadata(shard: Shard, state: State) = state match { 133 | case State.Normal => shard.add(sourceId, updatedAt)() 134 | case State.Removed => shard.remove(sourceId, updatedAt)() 135 | case State.Archived => shard.archive(sourceId, updatedAt)() 136 | case State.Negative => shard.negate(sourceId, updatedAt)() 137 | } 138 | 139 | override def equals(o: Any) = o match { 140 | case o: Multi => this.toMap == o.toMap 141 | case _ => false 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/ConfiguredSpecification.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | import java.io.File 20 | import org.specs.Specification 21 | import com.twitter.gizzard.shards.{Busy, ShardId, ShardInfo} 22 | import com.twitter.gizzard.nameserver.Forwarding 23 | import com.twitter.gizzard.scheduler._ 24 | import com.twitter.gizzard.test.NameServerDatabase 25 | import com.twitter.util.{Eval, Time} 26 | import com.twitter.querulous.evaluator.QueryEvaluatorFactory 27 | import com.twitter.logging.Logger 28 | import scala.collection.mutable 29 | import com.twitter.flockdb 30 | import com.twitter.flockdb.operations._ 31 | 32 | object MemoizedQueryEvaluators { 33 | val evaluators = mutable.Map[String,QueryEvaluatorFactory]() 34 | } 35 | 36 | object Config { 37 | val config = { 38 | val c = Eval[flockdb.config.FlockDB](new File("config/test.scala")) 39 | try { 40 | c.loggers.foreach { _() } 41 | c 42 | } catch { 43 | case e: Exception => { 44 | e.printStackTrace() 45 | throw e 46 | } 47 | } 48 | } 49 | } 50 | 51 | abstract class ConfiguredSpecification extends Specification { 52 | val config = Config.config 53 | 54 | def jsonMatching(list1: Iterable[JsonJob], list2: Iterable[JsonJob]) = { 55 | list1 must eventually(verify(l1 => { l1.map(_.toJson).sameElements(list2.map(_.toJson))})) 56 | } 57 | } 58 | 59 | abstract class IntegrationSpecification extends ConfiguredSpecification with NameServerDatabase { 60 | lazy val flock = { 61 | val f = new FlockDB(config) 62 | f.jobScheduler.start() 63 | f 64 | } 65 | 66 | lazy val flockService = flock.flockService 67 | 68 | def execute(e: Execute, t: Option[Time] = None) = { 69 | flockService.execute(ExecuteOperations(e.toOperations, t map { _.inSeconds }, Priority.High))() 70 | } 71 | 72 | def count(s: Select) = { 73 | flockService.count(Seq(s.toList))().head 74 | } 75 | 76 | def select(s: Select, page: Page) = { 77 | flockService.select(SelectQuery(s.toList, page))().toTuple 78 | } 79 | 80 | def reset(config: flockdb.config.FlockDB) { reset(config, 1) } 81 | 82 | def reset(config: flockdb.config.FlockDB, count: Int) { 83 | materialize(config) 84 | flock.nameServer.reload() 85 | 86 | val rootQueryEvaluator = config.edgesQueryEvaluator()(config.databaseConnection.withoutDatabase) 87 | //rootQueryEvaluator.execute("DROP DATABASE IF EXISTS " + config.databaseConnection.database) 88 | val queryEvaluator = config.edgesQueryEvaluator()(config.databaseConnection) 89 | 90 | for (graph <- (1 until 10)) { 91 | Seq("forward", "backward").foreach { direction => 92 | val tableId = if (direction == "forward") graph else graph * -1 93 | val replicatingShardId = ShardId("localhost", "replicating_" + direction + "_" + graph) 94 | flock.shardManager.createAndMaterializeShard( 95 | ShardInfo(replicatingShardId, "com.twitter.gizzard.shards.ReplicatingShard", "", "", Busy.Normal) 96 | ) 97 | flock.shardManager.setForwarding(Forwarding(tableId, 0, replicatingShardId)) 98 | 99 | for (sqlShardId <- (1 to count)) { 100 | val shardId = ShardId("localhost", direction + "_" + sqlShardId + "_" + graph) 101 | 102 | flock.shardManager.createAndMaterializeShard(ShardInfo(shardId, 103 | "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal)) 104 | flock.shardManager.addLink(replicatingShardId, shardId, 1) 105 | 106 | queryEvaluator.execute("DELETE FROM " + shardId.tablePrefix + "_edges")() 107 | queryEvaluator.execute("DELETE FROM " + shardId.tablePrefix + "_metadata")() 108 | } 109 | } 110 | } 111 | 112 | flock.nameServer.reload() 113 | } 114 | 115 | def playScheduledJobs() { 116 | Thread.sleep(100) 117 | val s = flock.jobScheduler 118 | while (s.size > 0 || s.errorSize > 0 || s.activeThreads > 0) { 119 | s.retryErrors() 120 | 121 | Thread.sleep(50) 122 | } 123 | } 124 | 125 | def playNormalJobs() { 126 | Thread.sleep(100) 127 | val s = flock.jobScheduler 128 | while (s.size > 0 || s.activeThreads > 0) { 129 | Thread.sleep(50) 130 | } 131 | } 132 | 133 | def reset(config: flockdb.config.FlockDB, db: String) { 134 | try { 135 | evaluator(config).execute("DROP DATABASE IF EXISTS " + db) 136 | } catch { 137 | case e => 138 | e.printStackTrace() 139 | throw e 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/jobs/single/Single.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb.jobs.single 18 | 19 | import com.twitter.logging.Logger 20 | import com.twitter.util.{Time, Return, Throw} 21 | import com.twitter.gizzard.scheduler._ 22 | import com.twitter.gizzard.shards._ 23 | import com.twitter.conversions.time._ 24 | import com.twitter.flockdb.{State, ForwardingManager, Cursor, UuidGenerator, Direction} 25 | import com.twitter.flockdb.conversions.Numeric._ 26 | import com.twitter.flockdb.shards.Shard 27 | import com.twitter.flockdb.shards.LockingNodeSet._ 28 | 29 | 30 | // TODO: Make this async. 31 | class SingleJobParser( 32 | forwardingManager: ForwardingManager, 33 | uuidGenerator: UuidGenerator) 34 | extends JsonJobParser { 35 | 36 | def log = Logger.get 37 | 38 | def apply(attributes: Map[String, Any]): JsonJob = { 39 | val writeSuccesses = try { 40 | attributes.get("write_successes") map { 41 | _.asInstanceOf[Seq[Seq[String]]] map { case Seq(h, tp) => ShardId(h, tp) } 42 | } getOrElse Nil 43 | } catch { 44 | case e => { 45 | log.warning("Error parsing write successes. falling back to non-memoization", e) 46 | Nil 47 | } 48 | } 49 | 50 | val casted = attributes.asInstanceOf[Map[String, AnyVal]] 51 | 52 | new Single( 53 | casted("source_id").toLong, 54 | casted("graph_id").toInt, 55 | casted("destination_id").toLong, 56 | casted("position").toLong, 57 | State(casted("state").toInt), 58 | Time.fromSeconds(casted("updated_at").toInt), 59 | forwardingManager, 60 | uuidGenerator, 61 | writeSuccesses.toList 62 | ) 63 | } 64 | } 65 | 66 | class Single( 67 | sourceId: Long, 68 | graphId: Int, 69 | destinationId: Long, 70 | position: Long, 71 | preferredState: State, 72 | updatedAt: Time, 73 | forwardingManager: ForwardingManager, 74 | uuidGenerator: UuidGenerator, 75 | var successes: List[ShardId] = Nil) 76 | extends JsonJob { 77 | 78 | def toMap = { 79 | val base = Map( 80 | "source_id" -> sourceId, 81 | "graph_id" -> graphId, 82 | "destination_id" -> destinationId, 83 | "position" -> position, 84 | "state" -> preferredState.id, 85 | "updated_at" -> updatedAt.inSeconds 86 | ) 87 | 88 | if (successes.isEmpty) { 89 | base 90 | } else { 91 | base + ("write_successes" -> (successes map { case ShardId(h, tp) => Seq(h, tp) })) 92 | } 93 | } 94 | 95 | def apply() = { 96 | val forward = forwardingManager.findNode(sourceId, graphId, Direction.Forward).write 97 | val backward = forwardingManager.findNode(destinationId, graphId, Direction.Backward).write 98 | val uuid = uuidGenerator(position) 99 | 100 | var currSuccesses: List[ShardId] = Nil 101 | var currErrs: List[Throwable] = Nil 102 | 103 | forward.optimistically(sourceId) { left => 104 | backward.optimistically(destinationId) { right => 105 | val state = left max right max preferredState 106 | val forwardResults = writeToShard(forward, sourceId, destinationId, uuid, state) 107 | val backwardResults = writeToShard(backward, destinationId, sourceId, uuid, state) 108 | 109 | List(forwardResults, backwardResults) foreach { 110 | _ foreach { 111 | case Return(id) => currSuccesses = id :: currSuccesses 112 | case Throw(e) => currErrs = e :: currErrs 113 | } 114 | } 115 | } 116 | } 117 | 118 | // add successful writes here, since we are only successful if an optimistic lock exception is not raised. 119 | successes = successes ++ currSuccesses 120 | 121 | currErrs.headOption foreach { e => throw e } 122 | } 123 | 124 | def writeToShard(shards: NodeSet[Shard], sourceId: Long, destinationId: Long, uuid: Long, state: State) = { 125 | shards.skip(successes) all { (shardId, shard) => 126 | state match { 127 | case State.Normal => shard.add(sourceId, destinationId, uuid, updatedAt)() 128 | case State.Removed => shard.remove(sourceId, destinationId, uuid, updatedAt)() 129 | case State.Archived => shard.archive(sourceId, destinationId, uuid, updatedAt)() 130 | case State.Negative => shard.negate(sourceId, destinationId, uuid, updatedAt)() 131 | } 132 | 133 | shardId 134 | } 135 | } 136 | 137 | override def equals(o: Any) = o match { 138 | case o: Single => this.toMap == o.toMap 139 | case _ => false 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/shards/ReadWriteShardAdapter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package shards 19 | 20 | import com.twitter.gizzard.shards.RoutingNode 21 | import com.twitter.util.Future 22 | import com.twitter.util.Time 23 | 24 | class ReadWriteShardAdapter(shard: RoutingNode[Shard]) extends Shard { 25 | def selectIncludingArchived(sourceId: Long, count: Int, cursor: Cursor) = shard.read.futureAny { _.selectIncludingArchived(sourceId, count, cursor) } 26 | def intersect(sourceId: Long, states: Seq[State], destinationIds: Seq[Long]) = shard.read.futureAny { _.intersect(sourceId, states, destinationIds) } 27 | def intersectEdges(sourceId: Long, states: Seq[State], destinationIds: Seq[Long]) = shard.read.futureAny { _.intersectEdges(sourceId, states, destinationIds) } 28 | def getMetadata(sourceId: Long) = shard.read.futureAny { _.getMetadata(sourceId) } 29 | def getMetadataForWrite(sourceId: Long) = shard.read.futureAny { _.getMetadataForWrite(sourceId) } 30 | def selectByDestinationId(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor) = shard.read.futureAny { _.selectByDestinationId(sourceId, states, count, cursor) } 31 | def selectByPosition(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor) = shard.read.futureAny { _.selectByPosition(sourceId, states, count, cursor) } 32 | def selectEdges(sourceId: Long, states: Seq[State], count: Int, cursor: Cursor) = shard.read.futureAny { _.selectEdges(sourceId, states, count, cursor) } 33 | def selectAll(cursor: (Cursor, Cursor), count: Int) = shard.read.futureAny { _.selectAll(cursor, count) } 34 | def selectAllMetadata(cursor: Cursor, count: Int) = shard.read.futureAny { _.selectAllMetadata(cursor, count) } 35 | def get(sourceId: Long, destinationId: Long) = shard.read.futureAny { _.get(sourceId, destinationId) } 36 | def count(sourceId: Long, states: Seq[State]) = shard.read.futureAny { _.count(sourceId, states) } 37 | 38 | def bulkUnsafeInsertEdges(edges: Seq[Edge]) = Future.join(shard.write.fmap { _.bulkUnsafeInsertEdges(edges) }) 39 | def bulkUnsafeInsertMetadata(metadata: Seq[Metadata]) = Future.join(shard.write.fmap { _.bulkUnsafeInsertMetadata(metadata) }) 40 | 41 | def writeCopies(edges: Seq[Edge]) = Future.join(shard.write.fmap { _.writeCopies(edges) }) 42 | def writeMetadata(metadata: Metadata) = Future.join(shard.write.fmap { _.writeMetadata(metadata) }) 43 | def writeMetadatas(metadata: Seq[Metadata]) = Future.join(shard.write.fmap { _.writeMetadatas(metadata) }) 44 | def updateMetadata(metadata: Metadata) = Future.join(shard.write.fmap { _.updateMetadata(metadata) }) 45 | def remove(sourceId: Long, updatedAt: Time) = Future.join(shard.write.fmap { _.remove(sourceId, updatedAt) }) 46 | def remove(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time) = Future.join(shard.write.fmap { _.remove(sourceId, destinationId, position, updatedAt) }) 47 | def add(sourceId: Long, updatedAt: Time) = Future.join(shard.write.fmap { _.add(sourceId, updatedAt) }) 48 | def add(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time) = Future.join(shard.write.fmap { _.add(sourceId, destinationId, position, updatedAt) }) 49 | def negate(sourceId: Long, updatedAt: Time) = Future.join(shard.write.fmap { _.negate(sourceId, updatedAt) }) 50 | def negate(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time) = Future.join(shard.write.fmap { _.negate(sourceId, destinationId, position, updatedAt) }) 51 | def archive(sourceId: Long, destinationId: Long, position: Long, updatedAt: Time) = Future.join(shard.write.fmap { _.archive(sourceId, destinationId, position, updatedAt) }) 52 | def archive(sourceId: Long, updatedAt: Time) = Future.join(shard.write.fmap { _.archive(sourceId, updatedAt) }) 53 | } 54 | -------------------------------------------------------------------------------- /src/main/thrift/Flockdb.thrift: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | namespace java com.twitter.flockdb.thrift 18 | namespace rb Flock.Edges 19 | 20 | exception FlockException { 21 | 1: string description 22 | } 23 | 24 | struct Results { 25 | # byte-packed list of i64, little-endian: 26 | 1: binary ids 27 | 2: i64 next_cursor 28 | 3: i64 prev_cursor 29 | } 30 | 31 | # Set Cursor = -1 when requesting the first Page. Cursor = 0 indicates the end of the result set. 32 | struct Page { 33 | 1: i32 count 34 | 2: i64 cursor 35 | } 36 | 37 | struct Metadata { 38 | 1: i64 source_id 39 | 2: i32 state_id 40 | 3: i32 count 41 | 4: i32 updated_at 42 | } 43 | 44 | struct Edge { 45 | 1: i64 source_id 46 | 2: i64 destination_id 47 | 3: i64 position 48 | 4: i32 updated_at 49 | 5: i32 count 50 | 6: i32 state_id 51 | } 52 | 53 | enum SelectOperationType { 54 | SimpleQuery = 1 55 | Intersection = 2 56 | Union = 3 57 | Difference = 4 58 | } 59 | 60 | # Add and Negate set an edge positive or negative, which are both "normal" states. You can use them 61 | # to track 2 different "colors" of edge. Often, negative means private. 62 | # Archive will change positive/negative edges to archived. 63 | # Remove will change any edge to removed. 64 | enum ExecuteOperationType { 65 | Add = 1 66 | Remove = 2 67 | Archive = 3 68 | Negate = 4 69 | } 70 | 71 | enum EdgeState { 72 | Positive = 0 73 | Negative = 3 74 | Removed = 1 75 | Archived = 2 76 | } 77 | 78 | # Basic FlockDB query term. 79 | # Terms can query a specific edge in either direction, or a one-to-many edge in either direction. 80 | # Wildcard queries can be specified by leaving `destination_ids` empty. 81 | # Only edges matching the set of given `state_ids` are included. 82 | # 83 | # Examples: 84 | # (30, 2, true, [40], [Positive]) 85 | # -- in graph 2, is there an edge from 30 -> 40? 86 | # (30, 1, false, [40, 50, 60], [Positive]) 87 | # -- in graph 1, which of (40 -> 30, 50 -> 30, 60 -> 30) exist? 88 | # (30, 3, false, [], [Removed, Archived]) 89 | # -- in graph 3, which edges point to -> 30, and are either removed or archived? 90 | struct QueryTerm { 91 | 1: i64 source_id 92 | 2: i32 graph_id 93 | 3: bool is_forward 94 | # byte-packed list of i64, little-endian. if not present, it means "all": 95 | 4: optional binary destination_ids 96 | 5: optional list state_ids 97 | } 98 | 99 | struct SelectOperation { 100 | 1: SelectOperationType operation_type 101 | 2: optional QueryTerm term 102 | } 103 | 104 | enum Priority { 105 | Low = 1 106 | Medium = 2 107 | High = 3 108 | } 109 | 110 | struct ExecuteOperation { 111 | 1: ExecuteOperationType operation_type 112 | 2: QueryTerm term 113 | 3: optional i64 position 114 | } 115 | 116 | struct ExecuteOperations { 117 | 1: list operations 118 | 2: optional i32 execute_at 119 | 3: Priority priority 120 | } 121 | 122 | struct SelectQuery { 123 | 1: list operations 124 | 2: Page page 125 | } 126 | 127 | struct EdgeQuery { 128 | 1: QueryTerm term 129 | 2: Page page 130 | } 131 | 132 | struct EdgeResults { 133 | 1: list edges 134 | 2: i64 next_cursor 135 | 3: i64 prev_cursor 136 | } 137 | 138 | service FlockDB { 139 | # return true if the edge exists. 140 | bool contains(1: i64 source_id, 2: i32 graph_id, 3: i64 destination_id) throws(1: FlockException ex) 141 | 142 | # return all data about an edge if it exists (otherwise, throw an exception). 143 | Edge get(1: i64 source_id, 2: i32 graph_id, 3: i64 destination_id) throws(1: FlockException ex) 144 | 145 | # return all data about a node if it exists (otherwise, throw an exception). 146 | Metadata get_metadata(1: i64 source_id, 2: i32 graph_id) throws(1: FlockException ex) 147 | 148 | # return true if the node exists. 149 | bool contains_metadata(1: i64 source_id, 2: i32 graph_id) throws(1: FlockException ex) 150 | 151 | # perform a list of queries in parallel. each query may be paged, and may be compound. 152 | list select2(1: list queries) throws(1: FlockException ex) 153 | 154 | # perform a list of queries in parallel, and return the counts of results. 155 | # if the queries are compound, the counts will be estimates. 156 | binary count2(1: list> queries) throws(1: FlockException ex) 157 | 158 | # perferm a list of simple queries and return the results as full Edge objects. 159 | # compound queries are not supported. 160 | list select_edges(1: list queries) throws(1: FlockException ex) 161 | 162 | void execute(1: ExecuteOperations operations) throws(1: FlockException ex) 163 | 164 | # deprecated: 165 | i32 count(1: list operations) throws(1: FlockException ex) 166 | Results select(1: list operations, 2: Page page) throws(1: FlockException ex) 167 | } 168 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/integration/BlackHoleLockingRegressionSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package integration 19 | 20 | import com.twitter.gizzard.scheduler.{JsonJob, PrioritizingJobScheduler} 21 | import com.twitter.gizzard.shards.{ShardInfo, ShardId, Busy} 22 | import com.twitter.gizzard.nameserver.Forwarding 23 | import com.twitter.util.Time 24 | import com.twitter.conversions.time._ 25 | import org.specs.mock.{ClassMocker, JMocker} 26 | import com.twitter.flockdb 27 | import com.twitter.flockdb.config.{FlockDB => FlockDBConfig} 28 | import shards.{Shard, SqlShard} 29 | 30 | 31 | class BlackHoleLockingRegressionSpec extends IntegrationSpecification { 32 | override def reset(config: FlockDBConfig, name: String) { 33 | materialize(config) 34 | flock.nameServer.reload() 35 | 36 | val queryEvaluator = config.edgesQueryEvaluator()(config.databaseConnection) 37 | 38 | for (graph <- (1 until 10)) { 39 | Seq("forward", "backward").foreach { direction => 40 | val tableId = if (direction == "forward") graph else graph * -1 41 | if (direction == "forward") { 42 | val shardId1 = ShardId("localhost", direction + "_" + graph + "_a") 43 | val shardId2 = ShardId("localhost", direction + "_" + graph + "_b") 44 | val replicatingShardId = ShardId("localhost", "replicating_" + direction + "_" + graph) 45 | 46 | flock.shardManager.createAndMaterializeShard(ShardInfo(shardId1, 47 | "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal)) 48 | flock.shardManager.createAndMaterializeShard(ShardInfo(shardId2, 49 | "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal)) 50 | flock.shardManager.createAndMaterializeShard(ShardInfo(replicatingShardId, 51 | "ReplicatingShard", "", "", Busy.Normal)) 52 | flock.shardManager.addLink(replicatingShardId, shardId1, 1) 53 | flock.shardManager.addLink(replicatingShardId, shardId2, 1) 54 | flock.shardManager.setForwarding(Forwarding(tableId, 0, replicatingShardId)) 55 | 56 | queryEvaluator.execute("DELETE FROM " + direction + "_" + graph + "_a_edges")() 57 | queryEvaluator.execute("DELETE FROM " + direction + "_" + graph + "_a_metadata")() 58 | queryEvaluator.execute("DELETE FROM " + direction + "_" + graph + "_b_edges")() 59 | queryEvaluator.execute("DELETE FROM " + direction + "_" + graph + "_b_metadata")() 60 | } else { 61 | val shardId1 = ShardId("localhost", direction + "_" + graph + "_replicating") 62 | val shardId2 = ShardId("localhost", direction + "_" + graph + "_a") 63 | val shardId3 = ShardId("localhost", direction + "_" + graph + "_b") 64 | flock.shardManager.createAndMaterializeShard(ShardInfo(shardId1, "ReplicatingShard", "", "", Busy.Normal)) 65 | flock.shardManager.createAndMaterializeShard(ShardInfo(shardId2, name, "", "", Busy.Normal)) 66 | flock.shardManager.createAndMaterializeShard(ShardInfo(shardId3, 67 | "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal)) 68 | 69 | flock.shardManager.addLink(shardId1, shardId2, 1) 70 | flock.shardManager.addLink(shardId2, shardId3, 1) 71 | flock.shardManager.setForwarding(Forwarding(tableId, 0, shardId1)) 72 | } 73 | } 74 | } 75 | 76 | flock.nameServer.reload() 77 | } 78 | 79 | val alice = 1L 80 | val FOLLOWS = 1 81 | val pageSize = 100 82 | 83 | def alicesFollowings() = { 84 | val term = QueryTerm(alice, FOLLOWS, true, None, List(State.Normal)) 85 | val query = EdgeQuery(term, new Page(pageSize, Cursor.Start)) 86 | val resultsList = flockService.selectEdges(List(query))() 87 | resultsList.size mustEqual 1 88 | resultsList(0).toList 89 | } 90 | 91 | "select results" should { 92 | "black hole" in { 93 | reset(config, "com.twitter.gizzard.shards.BlackHoleShard") // I don't know why this isn't working in doBefore 94 | 95 | for(i <- 0 until 10) { 96 | execute(Select(alice, FOLLOWS, i).add) 97 | } 98 | 99 | alicesFollowings.size must eventually(be(10)) 100 | } 101 | } 102 | 103 | "select results" should { 104 | "read-only" in { 105 | reset(config, "com.twitter.gizzard.shards.ReadOnlyShard") // I don't know why this isn't working in doBefore 106 | 107 | for(i <- 0 until 10) { 108 | execute(Select(alice, FOLLOWS, i).add) 109 | } 110 | 111 | val scheduler = flock.jobScheduler(flockdb.Priority.High.id) 112 | val errors = scheduler.errorQueue 113 | errors.size must eventually(be(10)) 114 | } 115 | } 116 | 117 | "select results" should { 118 | "write-only" in { 119 | reset(config, "com.twitter.gizzard.shards.WriteOnlyShard") // I don't know why this isn't working in doBefore 120 | 121 | for(i <- 0 until 10) { 122 | execute(Select(alice, FOLLOWS, i).add) 123 | } 124 | 125 | val scheduler = flock.jobScheduler(flockdb.Priority.High.id) 126 | val errors = scheduler.errorQueue 127 | alicesFollowings.size must eventually(be(10)) 128 | } 129 | } 130 | 131 | } 132 | 133 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/shards/Optimism.scala: -------------------------------------------------------------------------------- 1 | package com.twitter.flockdb 2 | package shards 3 | 4 | import com.twitter.util.{Time, Try, Return, Throw} 5 | import com.twitter.logging.Logger 6 | import com.twitter.gizzard.shards.{ShardException, NodeSet} 7 | 8 | class OptimisticLockException(message: String) extends ShardException(message) 9 | 10 | /** 11 | * This handles a new optimistic lock of one direction of an edge. In order to lock an 12 | * entire edge, you should nest two of these--one for each direction. This lock is *only* 13 | * responsible for maintaining color/state consistency. 14 | * 15 | * Inconsistencies between replicas are handled by consensus. The most recent and therefore 16 | * highest priority row wins. It helps to visualize each replication set as a unified state 17 | * of the union of all rows, taking the highest priority rows where conflicts happen. If two 18 | * rows are different, and we play an operation, that operation can't make things worse, because 19 | * it will move the timeline forwards. 20 | * 21 | * This lock does a read across every available replica. If any read fails, the entire 22 | * lock fails (i.e. reenqueues happen). However, the writes will still proceed first. We 23 | * want to propagate writes, as they're "better" than the old state, while still not 24 | * neccessarily being the final state. 25 | * 26 | * There would be two ways to make things worse: 27 | * 1. regress the timeline (by writing an old operation, which is impossible under the contract 28 | * of the shards themselves) 29 | * 2. write a color that doesn't represent the current state of the consensus at some time at or 30 | * after the row is written. The key case to worry about here is when the consensus looks like 31 | * Seq(old_row, Offline(new_row)). In this case, the non-unanimous consensus would write the 32 | * wrong color. We require that the consensus be unanimous for this reason. 33 | * 34 | * The current iteration of the optimism branch is (perhaps overly) conservative by maintaining 35 | * a lock over all replicas. We could choose to go a couple different ways from here: 36 | * 37 | * 1. Hold the course. This works. 38 | * 2. Only lock over (any) one server of each replication set (so two total--one forwards, one 39 | * backwards). This would reduce the number of reads. If our data is (a) eventually-but-not- 40 | * yet-consistent and not just corrupted, and (b) two copies of a node have different states, 41 | * then the job to update the state of the node to the newer state is still active. If we write 42 | * the wrong edge color based upon the old node, it is true that there is a multi job in some 43 | * presently existing queue that will fix the edge. 44 | * 3. Do read repair. When we do the optimistic lock read over all replicas, we're already grabbing 45 | * all of the information we need to repair the graphs. Instead of just throwing an exception, 46 | * or ignoring the inconsistency, we could enqueue the appropriate multi job to perform the repair. 47 | * My chief concern would be that we could issue a storm of multi jobs. 48 | * 49 | * Eventually, we will probably choose (2) or (3), depending on how we weigh the tradeoff between 50 | * repairing consistency and raw performance. However, we should stick to (1) for now, and evaluate 51 | * the other options soon. 52 | * 53 | * Also, in the short term, it's worth understanding (2), so that you can realize that adding 54 | * replicas doesn't screw things up. 55 | */ 56 | // TODO: Make this async. 57 | trait OptimisticStateMonitor { 58 | 59 | def getMetadatas(sourceId: Long): Seq[Try[Option[Metadata]]] 60 | 61 | // implementation 62 | 63 | private val log = Logger.get(getClass.getName) 64 | 65 | def optimistically(sourceId: Long)(f: State => Unit) = { 66 | try { 67 | log.debug("Optimistic Lock: starting optimistic lock for " + sourceId) 68 | 69 | val (beforeStateOpt, beforeEx) = getDominantState(sourceId) 70 | val beforeState = beforeStateOpt.getOrElse(State.Normal) 71 | 72 | if (beforeStateOpt.isEmpty) beforeEx.foreach(throw _) 73 | 74 | f(beforeState) 75 | 76 | // We didn't do this immediately if we got a result from one shard, because we still want to propagate writes with best effort. 77 | // We should reenqueue if the optimistic lock only covers a subset of the intended targets. 78 | beforeEx.foreach(throw _) 79 | 80 | val (afterStateOpt, afterEx) = getDominantState(sourceId) 81 | val afterState = afterStateOpt.getOrElse(State.Normal) 82 | 83 | afterEx.foreach(throw _) 84 | 85 | if(beforeState != afterState) { 86 | val msg = "Optimistic Lock: lost optimistic lock for " + sourceId + ": was " + beforeState +", now " + afterState 87 | 88 | log.debug(msg) 89 | throw new OptimisticLockException(msg) 90 | } 91 | 92 | log.debug("Optimistic Lock: successful optimistic lock for " + sourceId) 93 | 94 | } catch { 95 | case e => { 96 | log.debug("Optimistic Lock: exception in optimistic lock for " + sourceId + ": " + e.getMessage) 97 | throw e 98 | } 99 | } 100 | } 101 | 102 | def getDominantState(sourceId: Long) = { 103 | // The default metadata 104 | var winning: Option[Metadata] = None 105 | var exceptions: List[Throwable] = Nil 106 | 107 | getMetadatas(sourceId).foreach { 108 | case Throw(ex) => exceptions = ex :: exceptions 109 | case Return(Some(metadata)) => winning = winning.map(_ max metadata).orElse(Some(metadata)) 110 | case Return(None) => () 111 | } 112 | 113 | (winning.map(_.state), exceptions.headOption) 114 | } 115 | } 116 | 117 | object LockingNodeSet { 118 | implicit def asLockingNodeSet(n: NodeSet[Shard]) = new LockingNodeSet(n) 119 | } 120 | 121 | class LockingNodeSet(node: NodeSet[Shard]) extends OptimisticStateMonitor { 122 | def getMetadatas(id: Long) = node.all { _.getMetadataForWrite(id)() } 123 | } 124 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/LegacyJobParserSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb.unit 18 | 19 | import com.twitter.util.Time 20 | import com.twitter.gizzard.scheduler.JsonCodec 21 | import com.twitter.flockdb.ConfiguredSpecification 22 | import com.twitter.flockdb.{Direction, State, Priority} 23 | import com.twitter.flockdb.jobs.single.Single 24 | import com.twitter.flockdb.jobs.multi.Multi 25 | import com.twitter.flockdb.jobs._ 26 | 27 | 28 | class LegacyJobParserSpec extends ConfiguredSpecification { 29 | 30 | val updatedAt = Time.fromSeconds(1111) 31 | val codec = new JsonCodec(_ => ()) 32 | 33 | codec += ("com.twitter.flockdb.jobs.single.Add".r, LegacySingleJobParser.Add(null, null)) 34 | codec += ("com.twitter.flockdb.jobs.single.Remove".r, LegacySingleJobParser.Remove(null, null)) 35 | codec += ("com.twitter.flockdb.jobs.single.Negate".r, LegacySingleJobParser.Negate(null, null)) 36 | codec += ("com.twitter.flockdb.jobs.single.Archive".r, LegacySingleJobParser.Archive(null, null)) 37 | codec += ("com.twitter.flockdb.jobs.multi.Archive".r, LegacyMultiJobParser.Archive(null, null, 500)) 38 | codec += ("com.twitter.flockdb.jobs.multi.Unarchive".r, LegacyMultiJobParser.Unarchive(null, null, 500)) 39 | codec += ("com.twitter.flockdb.jobs.multi.RemoveAll".r, LegacyMultiJobParser.RemoveAll(null, null, 500)) 40 | codec += ("com.twitter.flockdb.jobs.multi.Negate".r, LegacyMultiJobParser.Negate(null, null, 500)) 41 | 42 | "LegacySingleJobParser" should { 43 | "correctly generate a new style job from an old serialized Add job" in { 44 | val map = Map( 45 | "com.twitter.flockdb.jobs.single.Add" -> Map( 46 | "source_id" -> 22, 47 | "graph_id" -> 1, 48 | "destination_id" -> 11, 49 | "position" -> 1111, 50 | "updated_at" -> 1111 51 | ) 52 | ) 53 | 54 | codec.inflate(map) mustEqual new Single(22, 1, 11, 1111, State.Normal, updatedAt, null, null) 55 | } 56 | 57 | "correctly generate a new style job from an old serialized Remove job" in { 58 | val map = Map( 59 | "com.twitter.flockdb.jobs.single.Remove" -> Map( 60 | "source_id" -> 22, 61 | "graph_id" -> 1, 62 | "destination_id" -> 11, 63 | "position" -> 1111, 64 | "updated_at" -> 1111 65 | ) 66 | ) 67 | 68 | codec.inflate(map) mustEqual new Single(22, 1, 11, 1111, State.Removed, updatedAt, null, null) 69 | } 70 | 71 | "correctly generate a new style job from an old serialized Negate job" in { 72 | val map = Map( 73 | "com.twitter.flockdb.jobs.single.Negate" -> Map( 74 | "source_id" -> 22, 75 | "graph_id" -> 1, 76 | "destination_id" -> 11, 77 | "position" -> 1111, 78 | "updated_at" -> 1111 79 | ) 80 | ) 81 | 82 | codec.inflate(map) mustEqual new Single(22, 1, 11, 1111, State.Negative, updatedAt, null, null) 83 | } 84 | 85 | "correctly generate a new style job from an old serialized Archive job" in { 86 | val map = Map( 87 | "com.twitter.flockdb.jobs.single.Archive" -> Map( 88 | "source_id" -> 22, 89 | "graph_id" -> 1, 90 | "destination_id" -> 11, 91 | "position" -> 1111, 92 | "updated_at" -> 1111 93 | ) 94 | ) 95 | 96 | codec.inflate(map) mustEqual new Single(22, 1, 11, 1111, State.Archived, updatedAt, null, null) 97 | } 98 | } 99 | 100 | "LegacyMultiJobParser" should { 101 | "correctly generate a new style job from an old serialized Archive job" in { 102 | val map = Map( 103 | "com.twitter.flockdb.jobs.multi.Archive" -> Map( 104 | "source_id" -> 22, 105 | "graph_id" -> 1, 106 | "direction" -> 0, 107 | "updated_at" -> 1111, 108 | "priority" -> 1 109 | ) 110 | ) 111 | 112 | val job = new Multi(22, 1, Direction.Forward, State.Archived, updatedAt, Priority.Low, 500, null, null) 113 | 114 | codec.inflate(map) mustEqual job 115 | } 116 | 117 | "correctly generate a new style job from an old serialized Unarchive job" in { 118 | val map = Map( 119 | "com.twitter.flockdb.jobs.multi.Unarchive" -> Map( 120 | "source_id" -> 22, 121 | "graph_id" -> 1, 122 | "direction" -> 0, 123 | "updated_at" -> 1111, 124 | "priority" -> 1 125 | ) 126 | ) 127 | 128 | val job = new Multi(22, 1, Direction.Forward, State.Normal, updatedAt, Priority.Low, 500, null, null) 129 | 130 | codec.inflate(map) mustEqual job 131 | } 132 | 133 | "correctly generate a new style job from an old serialized RemoveAll job" in { 134 | val map = Map( 135 | "com.twitter.flockdb.jobs.multi.RemoveAll" -> Map( 136 | "source_id" -> 22, 137 | "graph_id" -> 1, 138 | "direction" -> 0, 139 | "updated_at" -> 1111, 140 | "priority" -> 1 141 | ) 142 | ) 143 | 144 | val job = new Multi(22, 1, Direction.Forward, State.Removed, updatedAt, Priority.Low, 500, null, null) 145 | 146 | codec.inflate(map) mustEqual job 147 | } 148 | 149 | "correctly generate a new style job from an old serialized Negate job" in { 150 | val map = Map( 151 | "com.twitter.flockdb.jobs.multi.Negate" -> Map( 152 | "source_id" -> 22, 153 | "graph_id" -> 1, 154 | "direction" -> 0, 155 | "updated_at" -> 1111, 156 | "priority" -> 1 157 | ) 158 | ) 159 | 160 | val job = new Multi(22, 1, Direction.Forward, State.Negative, updatedAt, Priority.Low, 500, null, null) 161 | 162 | codec.inflate(map) mustEqual job 163 | } 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/JobSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package unit 19 | 20 | import scala.collection.mutable 21 | import com.twitter.gizzard.scheduler.{JsonJob, PrioritizingJobScheduler} 22 | import com.twitter.gizzard.shards._ 23 | import com.twitter.util.{Future, Time} 24 | import com.twitter.util.TimeConversions._ 25 | import org.specs.mock.{ClassMocker, JMocker} 26 | import com.twitter.flockdb 27 | import flockdb.Direction._ 28 | import flockdb.State._ 29 | import shards.{Shard, SqlShard, ReadWriteShardAdapter, OptimisticLockException} 30 | import jobs.single.Single 31 | import jobs.multi.Multi 32 | 33 | class JobSpec extends ConfiguredSpecification with JMocker with ClassMocker { 34 | val FOLLOWS = 1 35 | 36 | val bob = 1L 37 | val mary = 23L 38 | val carl = 42L 39 | val jane = 56L 40 | val darcy = 62L 41 | 42 | val uuidGenerator = IdentityUuidGenerator 43 | val forwardingManager = mock[ForwardingManager] 44 | val mocks = (0 to 3) map { _ => mock[Shard] } 45 | 46 | // allow the readwrite shard adapter to implement optimistically 47 | val shards = mocks map { m => LeafRoutingNode(m) } 48 | val scheduler = mock[PrioritizingJobScheduler] 49 | 50 | def test( 51 | desc: String, 52 | jobState: State, 53 | bobBefore: State, 54 | maryBefore: State, 55 | bobAfter: State, 56 | maryAfter: State, 57 | applied: State, 58 | f: jobs.single.Single => Unit 59 | ) = { 60 | desc in { 61 | Time.withCurrentTimeFrozen { time => 62 | val job = new Single(bob, FOLLOWS, mary, 1, jobState, Time.now, forwardingManager, uuidGenerator) 63 | 64 | expect { 65 | allowing(forwardingManager).findNode(bob, FOLLOWS, Forward) willReturn shards(0) 66 | allowing(forwardingManager).findNode(mary, FOLLOWS, Backward) willReturn shards(1) 67 | 68 | // Before 69 | one(mocks(0)).getMetadataForWrite(bob) willReturn Future(Some(new Metadata(bob, bobBefore, 1, Time.now - 1.second))) 70 | one(mocks(1)).getMetadataForWrite(mary) willReturn Future(Some(new Metadata(mary, maryBefore, 1, Time.now - 1.second))) 71 | 72 | // After 73 | allowing(mocks(0)).getMetadataForWrite(bob) willReturn Future(Some(new Metadata(mary, bobAfter, 1, Time.now))) 74 | allowing(mocks(1)).getMetadataForWrite(mary) willReturn Future(Some(new Metadata(mary, maryAfter, 1, Time.now))) 75 | 76 | // Results 77 | applied match { 78 | case Normal => { 79 | one(mocks(0)).add(bob, mary, 1, Time.now)() 80 | one(mocks(1)).add(mary, bob, 1, Time.now)() 81 | } 82 | case Archived => { 83 | one(mocks(0)).archive(bob, mary, 1, Time.now)() 84 | one(mocks(1)).archive(mary, bob, 1, Time.now)() 85 | } 86 | case Removed => { 87 | one(mocks(0)).remove(bob, mary, 1, Time.now)() 88 | one(mocks(1)).remove(mary, bob, 1, Time.now)() 89 | } 90 | } 91 | } 92 | 93 | f(job) 94 | } 95 | } 96 | } 97 | 98 | "Single" should { 99 | "toJson" in { 100 | Time.withCurrentTimeFrozen { time => 101 | val job = new Single(bob, FOLLOWS, mary, 1, State.Normal, Time.now, forwardingManager, uuidGenerator) 102 | val json = job.toJson 103 | json mustMatch "Single" 104 | json mustMatch "\"source_id\":" + bob 105 | json mustMatch "\"graph_id\":" + FOLLOWS 106 | json mustMatch "\"destination_id\":" + mary 107 | json mustMatch "\"state\":" 108 | json mustMatch "\"updated_at\":" + Time.now.inSeconds 109 | } 110 | } 111 | 112 | "toJson with successes" in { 113 | Time.withCurrentTimeFrozen { time => 114 | val job = new Single(bob, FOLLOWS, mary, 1, State.Normal, Time.now, forwardingManager, uuidGenerator, List(ShardId("host", "prefix"))) 115 | val json = job.toJson 116 | json mustMatch "Single" 117 | json mustMatch "\"source_id\":" + bob 118 | json mustMatch "\"graph_id\":" + FOLLOWS 119 | json mustMatch "\"destination_id\":" + mary 120 | json mustMatch "\"updated_at\":" + Time.now.inSeconds 121 | json must include("\"write_successes\":[[\"host\",\"prefix\"]]") 122 | } 123 | } 124 | } 125 | 126 | "Add" should { 127 | // Input Before After Resulting 128 | // Job Bob Mary Bob Mary Job 129 | test("normal add", Normal, Normal, Normal, Normal, Normal, Normal, _.apply) 130 | test("lost lock add", Normal, Normal, Normal, Normal, Archived, Normal, _.apply must throwA[OptimisticLockException]) 131 | test("when bob archived", Normal, Archived, Normal, Archived, Normal, Archived, _.apply) 132 | test("when mary archived", Normal, Normal, Archived, Normal, Archived, Archived, _.apply) 133 | } 134 | 135 | "Remove" should { 136 | // Input Before After Resulting 137 | // Job Bob Mary Bob Mary Job 138 | test("normal remove", Removed, Normal, Normal, Normal, Normal, Removed, _.apply) 139 | } 140 | 141 | "Archive" should { 142 | // Input Before After Resulting 143 | // Job Bob Mary Bob Mary Job 144 | test("normal archive", Archived, Normal, Normal, Normal, Normal, Archived, _.apply) 145 | test("archive removed", Archived, Normal, Removed, Normal, Removed, Removed, _.apply) 146 | test("archive removed", Archived, Removed, Normal, Removed, Normal, Removed, _.apply) 147 | } 148 | 149 | 150 | "Multi" should { 151 | "toJson" in { 152 | Time.withCurrentTimeFrozen { time => 153 | val job = new Multi(bob, FOLLOWS, Direction.Forward, State.Normal, Time.now, Priority.Low, 500, null, null) 154 | val json = job.toJson 155 | json mustMatch "Multi" 156 | json mustMatch "\"source_id\":" + bob 157 | json mustMatch "\"updated_at\":" + Time.now.inSeconds 158 | json mustMatch "\"graph_id\":" + FOLLOWS 159 | json mustMatch "\"direction\":" + Direction.Forward.id 160 | json mustMatch "\"priority\":" + Priority.Low.id 161 | json mustMatch "\"state\":" + State.Normal.id 162 | json mustMatch "\"cursor\":" + Cursor.Start.position 163 | } 164 | } 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/integration/CopySpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package integration 19 | 20 | import scala.collection.JavaConversions._ 21 | import scala.collection.mutable 22 | import com.twitter.gizzard.thrift.conversions.ShardInfo._ 23 | import com.twitter.gizzard.scheduler.{JsonJob, PrioritizingJobScheduler} 24 | import com.twitter.gizzard.thrift.conversions.Sequences._ 25 | import com.twitter.gizzard.shards.{ShardInfo, ShardId, Busy, RoutingNode} 26 | import com.twitter.gizzard.nameserver.Forwarding 27 | import com.twitter.util.Time 28 | import com.twitter.util.TimeConversions._ 29 | import org.specs.util.{Duration => SpecsDuration} 30 | import org.specs.mock.{ClassMocker, JMocker} 31 | import com.twitter.flockdb 32 | import com.twitter.flockdb.{Edge, Metadata} 33 | import com.twitter.flockdb.config.{FlockDB => FlockDBConfig} 34 | import shards.{Shard, SqlShard} 35 | import thrift._ 36 | 37 | class CopySpec extends IntegrationSpecification { 38 | "Copy" should { 39 | val sourceShardId = ShardId("localhost", "copy_test1") 40 | val destinationShardId = ShardId("localhost", "copy_test2") 41 | val shard3Id = ShardId("localhost", "copy_test3") 42 | val sourceShardInfo = new ShardInfo(sourceShardId, 43 | "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal) 44 | val destinationShardInfo = new ShardInfo(destinationShardId, 45 | "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal) 46 | val shard3Info = new ShardInfo(shard3Id, 47 | "com.twitter.flockdb.SqlShard", "INT UNSIGNED", "INT UNSIGNED", Busy.Normal) 48 | val time = Time.now 49 | 50 | doBefore { 51 | val queryEvaluator = config.edgesQueryEvaluator()(config.databaseConnection) 52 | 53 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test1_edges")() 54 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test1_metadata")() 55 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test2_edges")() 56 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test2_metadata")() 57 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test3_edges")() 58 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test3_metadata")() 59 | flock.nameServer.reload() 60 | flock.shardManager.createAndMaterializeShard(sourceShardInfo) 61 | flock.shardManager.createAndMaterializeShard(destinationShardInfo) 62 | flock.shardManager.createAndMaterializeShard(shard3Info) 63 | flock.shardManager.setForwarding(new Forwarding(0, Long.MinValue, sourceShardInfo.id)) 64 | 65 | } 66 | 67 | doAfter { 68 | val queryEvaluator = config.edgesQueryEvaluator()(config.databaseConnection) 69 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test1_edges")() 70 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test1_metadata")() 71 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test2_edges")() 72 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test2_metadata")() 73 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test3_edges")() 74 | queryEvaluator.execute("DROP TABLE IF EXISTS copy_test3_metadata")() 75 | } 76 | 77 | 78 | def writeEdges(shard: RoutingNode[Shard], num: Int, start: Int, step: Int, outdated: Boolean, state: State = State.Normal) { 79 | val edges = for (id <- start to num by step) yield { 80 | Edge(1L, id.toLong, id.toLong, (if (outdated) time-1.seconds else time), 0, state) 81 | } 82 | 83 | shard.write.foreach { _.writeCopies(edges)() } 84 | } 85 | 86 | def getEdges(shard: RoutingNode[Shard], num: Int) { 87 | shard.read.any { _.count(1L, Seq(State.Normal))() } mustEqual num 88 | } 89 | 90 | def validateEdges(shards: Seq[RoutingNode[Shard]], num: Int) { 91 | playScheduledJobs() 92 | 93 | val shardsEdges = shards map { _.read.any { _.selectAll((Cursor.Start, Cursor.Start), 2*num)()._1 } } 94 | shardsEdges.foreach { _.length mustEqual num } 95 | 96 | for (idx <- 0 until num) { 97 | val head :: others = shardsEdges 98 | 99 | others foreach { edges => 100 | head zip edges foreach { case (a, b) => 101 | a mustEqual b 102 | b.updatedAt.inSeconds mustEqual time.inSeconds 103 | } 104 | } 105 | } 106 | } 107 | 108 | "do nothing on equivalent shards" in { 109 | val numData = 100 110 | val shard1 = flock.nameServer.findShardById[Shard](sourceShardId) 111 | val shard2 = flock.nameServer.findShardById[Shard](destinationShardId) 112 | writeEdges(shard1, numData, 1, 1, false) 113 | writeEdges(shard2, numData, 1, 1, false) 114 | 115 | flock.managerServer.copy_shard(Seq(sourceShardInfo.toThrift.id, destinationShardInfo.toThrift.id)) 116 | 117 | validateEdges(Seq(shard1, shard2), numData) 118 | } 119 | 120 | "copy" in { 121 | val numData = 100 122 | val shard1 = flock.nameServer.findShardById[Shard](sourceShardId) 123 | val shard2 = flock.nameServer.findShardById[Shard](destinationShardId) 124 | writeEdges(shard1, numData, 1, 1, false) 125 | 126 | flock.managerServer.copy_shard(Seq(sourceShardInfo.toThrift.id, destinationShardInfo.toThrift.id)) 127 | 128 | validateEdges(Seq(shard1, shard2), numData) 129 | } 130 | 131 | "repair by merging" in { 132 | val numData = 100 133 | val shard1 = flock.nameServer.findShardById[Shard](sourceShardId) 134 | val shard2 = flock.nameServer.findShardById[Shard](destinationShardId) 135 | writeEdges(shard1, numData, 1, 2, false) 136 | writeEdges(shard2, numData, 2, 2, false) 137 | 138 | flock.managerServer.copy_shard(Seq(sourceShardInfo.toThrift.id, destinationShardInfo.toThrift.id)) 139 | 140 | validateEdges(Seq(shard1, shard2), numData) 141 | } 142 | 143 | "repair and fill out of date" in { 144 | val numData = 100 145 | 146 | val shard1 = flock.nameServer.findShardById[Shard](sourceShardId) 147 | val shard2 = flock.nameServer.findShardById[Shard](destinationShardId) 148 | val shard3 = flock.nameServer.findShardById[Shard](shard3Id) 149 | 150 | writeEdges(shard1, numData, 1, 2, false) 151 | writeEdges(shard2, numData/2, 2, 2, false) 152 | writeEdges(shard2, numData/2, 1, 2, true) 153 | writeEdges(shard2, numData, numData/2, 1, false) 154 | writeEdges(shard3, numData, 1, 3, true, State.Archived) 155 | 156 | shard1.write.foreach { _.writeMetadata(Metadata(1L, State.Normal, time))() } 157 | shard2.write.foreach { _.writeMetadata(Metadata(1L, State.Normal, time))() } 158 | shard3.write.foreach { _.writeMetadata(Metadata(1L, State.Archived, (time - 1.seconds)) )() } 159 | 160 | flock.managerServer.copy_shard(Seq(sourceShardInfo.toThrift.id, destinationShardInfo.toThrift.id, shard3Info.toThrift.id)) 161 | validateEdges(Seq(shard1, shard2, shard3), numData) 162 | } 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/EdgesService.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | 19 | import com.twitter.logging.Logger 20 | import com.twitter.gizzard.Stats 21 | import com.twitter.gizzard.nameserver.{NameServer, NonExistentShard, InvalidShard} 22 | import com.twitter.gizzard.scheduler.{CopyJobFactory, JsonJob, PrioritizingJobScheduler} 23 | import com.twitter.gizzard.shards._ 24 | import com.twitter.flockdb.operations.{ExecuteOperations, SelectOperation} 25 | import com.twitter.flockdb.queries._ 26 | import com.twitter.flockdb.thrift.FlockException 27 | import com.twitter.util.Future 28 | 29 | class EdgesService( 30 | forwardingManager: ForwardingManager, 31 | schedule: PrioritizingJobScheduler, 32 | intersectionQueryConfig: config.IntersectionQuery, 33 | aggregateJobsPageSize: Int) { 34 | 35 | // TODO: Make serverName configurable. 36 | private val serverName = "edges" 37 | private val log = Logger.get(getClass.getName) 38 | private val exceptionLog = Logger.get("exception") 39 | private val selectCompiler = new SelectCompiler(forwardingManager, intersectionQueryConfig) 40 | private var executeCompiler = new ExecuteCompiler(schedule, forwardingManager, aggregateJobsPageSize) 41 | 42 | def containsMetadata(sourceId: Long, graphId: Int): Future[Boolean] = { 43 | wrapRPC("contains_metadata") { 44 | val name = "contains-metadata" 45 | Stats.transaction.name = name 46 | Stats.incr(name + "-graph_" + graphId + "-count") 47 | forwardingManager.find(sourceId, graphId, Direction.Forward).getMetadata(sourceId) map { _.isDefined } 48 | } 49 | } 50 | 51 | def contains(sourceId: Long, graphId: Int, destinationId: Long): Future[Boolean] = { 52 | wrapRPC("contains") { 53 | val name = "contains" 54 | Stats.transaction.name = name 55 | Stats.incr(name + "-graph_" + graphId + "-count") 56 | forwardingManager.find(sourceId, graphId, Direction.Forward).get(sourceId, destinationId) map { 57 | _ map { edge => edge.state == State.Normal || edge.state == State.Negative } getOrElse false 58 | } 59 | } 60 | } 61 | 62 | def get(sourceId: Long, graphId: Int, destinationId: Long): Future[Edge] = { 63 | wrapRPC("get") { 64 | val name = "get" 65 | Stats.transaction.name = name 66 | Stats.incr(name + "-graph_" + graphId + "-count") 67 | forwardingManager.find(sourceId, graphId, Direction.Forward).get(sourceId, destinationId) flatMap { 68 | case Some(edge) => Future(edge) 69 | case _ => Future.exception(new FlockException("Record not found: (%d, %d, %d)".format(sourceId, graphId, destinationId))) 70 | } 71 | } 72 | } 73 | 74 | def getMetadata(sourceId: Long, graphId: Int): Future[Metadata] = { 75 | wrapRPC("get_metadata") { 76 | val name = "get-metadata" 77 | Stats.transaction.name = name 78 | Stats.incr(name + "-graph_" + graphId + "-count") 79 | forwardingManager.find(sourceId, graphId, Direction.Forward).getMetadata(sourceId) flatMap { 80 | case Some(metadata) => Future(metadata) 81 | case _ => Future.exception(new FlockException("Record not found: (%d, %d)".format(sourceId, graphId))) 82 | } 83 | } 84 | } 85 | 86 | def select(query: SelectQuery): Future[ResultWindow[Long]] = select(List(query)) map { _.head } 87 | 88 | def select(queries: Seq[SelectQuery]): Future[Seq[ResultWindow[Long]]] = { 89 | wrapRPC("select") { 90 | Future.collect(queries map { query => 91 | val queryTree = selectCompiler(query.operations) 92 | queryTree.select(query.page) onSuccess { _ => 93 | Stats.transaction.record(queryTree.toString) 94 | } rescue { 95 | case e: ShardBlackHoleException => 96 | Future.exception(new FlockException("Shard is blackholed: " + e)) 97 | } 98 | }) 99 | } 100 | } 101 | 102 | def selectEdges(queries: Seq[EdgeQuery]): Future[Seq[ResultWindow[Edge]]] = { 103 | wrapRPC("select_edges") { 104 | Future.collect(queries map { query => 105 | val term = query.term 106 | Stats.incr("select-edge-graph_" + (if (term.isForward) "" else "n") + term.graphId + "-count") 107 | val shard = forwardingManager.find(term.sourceId, term.graphId, Direction(term.isForward)) 108 | val states = if (term.states.isEmpty) List(State.Normal) else term.states 109 | 110 | if (term.destinationIds.isDefined) { 111 | shard.intersectEdges(term.sourceId, states, term.destinationIds.get) map { results => 112 | new ResultWindow(results.map { edge => (edge, Cursor(edge.destinationId)) }, query.page.count, query.page.cursor) 113 | } 114 | } else { 115 | shard.selectEdges(term.sourceId, states, query.page.count, query.page.cursor) 116 | } 117 | }) 118 | } 119 | } 120 | 121 | def execute(operations: ExecuteOperations): Future[Unit] = { 122 | wrapRPC("execute") { 123 | Stats.transaction.name = "execute" 124 | // TODO: This results in a kestrel enqueue, which can block on disk I/O. Consider moving this work 125 | // to a separate threadpool. 126 | executeCompiler(operations) 127 | Future.Unit 128 | } 129 | } 130 | 131 | def count(queries: Seq[Seq[SelectOperation]]): Future[Seq[Int]] = { 132 | wrapRPC("count") { 133 | Future.collect(queries map { query => 134 | val queryTree = selectCompiler(query) 135 | queryTree.sizeEstimate onSuccess { _ => 136 | Stats.transaction.record(queryTree.toString) 137 | } 138 | }) 139 | } 140 | } 141 | 142 | private[this] def logAndWrapException(rpcName: String, e: Throwable) = { 143 | val endpoint = serverName +"/"+ rpcName 144 | e match { 145 | case e: NonExistentShard => 146 | Stats.incr("nonexistent_shard_error_count") 147 | log.error(e, "Nonexistent shard: %s", e) 148 | case e: InvalidShard => 149 | Stats.incr("invalid_shard_error_count") 150 | log.error(e, "Invalid shard: %s", e) 151 | case e: FlockException => 152 | Stats.incr("normal_error_count_" + endpoint) 153 | case e: ShardDatabaseTimeoutException => 154 | Stats.incr("timeout_count_" + endpoint) 155 | case e: ShardTimeoutException => 156 | Stats.incr("timeout_count_" + endpoint) 157 | case e: ShardOfflineException => 158 | Stats.incr("offline_count_" + endpoint) 159 | case _ => 160 | Stats.incr("internal_error_count_" + endpoint) 161 | exceptionLog.error(e, "Unhandled error in EdgesService", e) 162 | log.error("Unhandled error in EdgesService: " + e.toString) 163 | } 164 | 165 | e match { 166 | case e: FlockException => Future.exception(e) 167 | case _ => Future.exception(new FlockException("%s: %s".format(e.getClass.getName, e.getMessage))) 168 | } 169 | } 170 | 171 | private[this] def timeFuture[T](label: String)(f: => Future[T]) = { 172 | Stats.timeFutureMillis(serverName +"/"+ label)(f) 173 | } 174 | 175 | private[this] def wrapRPC[T](rpcName: String)(f: => Future[T]) = timeFuture(rpcName) { 176 | val rv = try { 177 | f 178 | } catch { 179 | case e => Future.exception(e) 180 | } 181 | 182 | rv respond { _ => 183 | Stats.incr(serverName +"/"+ rpcName +"_count") 184 | } onSuccess { _ => 185 | Stats.incr(serverName +"/"+ rpcName +"_success_count") 186 | } rescue { 187 | case e => logAndWrapException(rpcName, e) 188 | } 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /src/test/scala/com/twitter/flockdb/unit/SelectCompilerSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package unit 19 | 20 | import scala.collection.mutable 21 | import com.twitter.util.{Future, Time} 22 | import com.twitter.util.TimeConversions._ 23 | import org.specs.mock.{ClassMocker, JMocker} 24 | import com.twitter.flockdb 25 | import com.twitter.flockdb.{Page => FlockPage} 26 | import queries.{SelectCompiler, InvalidQueryException} 27 | import operations.{SelectOperation, SelectOperationType} 28 | import shards.Shard 29 | import thrift.{Page, Results} 30 | 31 | 32 | object SelectCompilerSpec extends ConfiguredSpecification with JMocker with ClassMocker { 33 | "SelectCompiler" should { 34 | var forwardingManager: ForwardingManager = null 35 | var shard: Shard = null 36 | var shard2: Shard = null 37 | var selectCompiler: SelectCompiler = null 38 | val sourceId = 900 39 | val graphId = 5 40 | val states = new mutable.ArrayBuffer[State] { 41 | override def equals(that: Any) = that match { 42 | case that: Seq[_] => this.toList == that.toList 43 | case that => false 44 | } 45 | } 46 | states += State.Normal 47 | 48 | doBefore { 49 | forwardingManager = mock[ForwardingManager] 50 | shard = mock[Shard] 51 | shard2 = mock[Shard] 52 | selectCompiler = new SelectCompiler(forwardingManager, new flockdb.config.IntersectionQuery { averageIntersectionProportion = 1.0 }) 53 | } 54 | 55 | "execute a simple wildcard query" in { 56 | "when the state is given" >> { 57 | expect { 58 | one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard 59 | one(shard).count(sourceId, states) willReturn Future(23) 60 | } 61 | val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) :: Nil 62 | val query = selectCompiler(program) 63 | query.getClass.getName mustMatch "SimpleQuery" 64 | query.sizeEstimate()() mustEqual 23 65 | } 66 | } 67 | 68 | "should throw" in { 69 | "on an empty query" in { 70 | val program = Nil 71 | selectCompiler(program) must throwA[InvalidQueryException] 72 | } 73 | 74 | "on a malformed binary operation query" in { 75 | val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) :: 76 | new SelectOperation(SelectOperationType.Intersection, None) :: Nil 77 | selectCompiler(program) must throwA[InvalidQueryException] 78 | } 79 | 80 | "on a malformed dual-literal query" in { 81 | val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) :: 82 | new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) :: Nil 83 | selectCompiler(program) must throwA[InvalidQueryException] 84 | } 85 | } 86 | 87 | 88 | "execute a simple list query" in { 89 | expect { 90 | one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard 91 | } 92 | val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, Some(List[Long](12, 13)), List(State.Normal)))) :: Nil 93 | val query = selectCompiler(program) 94 | query.getClass.getName mustMatch "WhereInQuery" 95 | query.sizeEstimate()() mustEqual 2 96 | } 97 | 98 | "execute a compound query" in { 99 | expect { 100 | one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard 101 | one(forwardingManager).find(sourceId, graphId, Direction.Backward) willReturn shard 102 | one(shard).count(sourceId, states) willReturn Future(23) 103 | } 104 | val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) :: 105 | new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, false, Some(List[Long](12, 13)), List(State.Normal)))) :: 106 | new SelectOperation(SelectOperationType.Intersection, None) :: Nil 107 | val query = selectCompiler(program) 108 | query.getClass.getName mustMatch "IntersectionQuery" 109 | query.sizeEstimate()() mustEqual 2 110 | } 111 | 112 | "execute a nested compound query" in { 113 | expect { 114 | one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard 115 | one(forwardingManager).find(sourceId, graphId, Direction.Backward) willReturn shard 116 | one(forwardingManager).find(sourceId + 1, graphId, Direction.Forward) willReturn shard2 117 | one(shard).count(sourceId, states) willReturn Future(23) 118 | one(shard2).count(sourceId + 1, states) willReturn Future(25) 119 | } 120 | val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) :: 121 | new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, false, Some(List[Long](12, 13)), List(State.Normal)))) :: 122 | new SelectOperation(SelectOperationType.Intersection, None) :: 123 | new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId + 1, graphId, true, None, List(State.Normal)))) :: 124 | new SelectOperation(SelectOperationType.Union, None) :: Nil 125 | val query = selectCompiler(program) 126 | query.getClass.getName mustMatch "UnionQuery" 127 | query.sizeEstimate()() mustEqual 25 128 | } 129 | 130 | "execute a difference query in the right order" in { 131 | expect { 132 | one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard 133 | one(forwardingManager).find(sourceId + 1, graphId, Direction.Forward) willReturn shard2 134 | one(shard).count(sourceId, states) willReturn Future(10) 135 | allowing(shard2).count(sourceId + 1, states) willReturn Future(2) 136 | } 137 | val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, None, List(State.Normal)))) :: 138 | new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId + 1, graphId, true, None, List(State.Normal)))) :: 139 | new SelectOperation(SelectOperationType.Difference, None) :: Nil 140 | val query = selectCompiler(program) 141 | query.getClass.getName mustMatch "DifferenceQuery" 142 | query.sizeEstimate()() mustEqual 10 143 | } 144 | 145 | 146 | "time a simple list query" in { 147 | expect { 148 | one(forwardingManager).find(sourceId, graphId, Direction.Forward) willReturn shard 149 | one(shard).intersect(sourceId, List(State.Normal), List[Long](12, 13)) willReturn Future(List[Long](12,13)) 150 | } 151 | val program = new SelectOperation(SelectOperationType.SimpleQuery, Some(new QueryTerm(sourceId, graphId, true, Some(List[Long](12, 13)), List(State.Normal)))) :: Nil 152 | val queryTree = selectCompiler(program) 153 | queryTree.toString mustEqual "" 154 | val rv = queryTree.select(FlockPage(0,Cursor(0)))() 155 | queryTree.toString mustMatch "time" 156 | } 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/main/scala/com/twitter/flockdb/jobs/Copy.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Twitter, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | * not use this file except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.twitter.flockdb 18 | package jobs 19 | 20 | import com.twitter.gizzard.scheduler._ 21 | import com.twitter.gizzard.shards.{ShardId, RoutingNode} 22 | import com.twitter.gizzard.nameserver.NameServer 23 | import com.twitter.gizzard.Stats 24 | import com.twitter.util.TimeConversions._ 25 | import conversions.Numeric._ 26 | import shards.{Shard, ReadWriteShardAdapter} 27 | import scala.collection.mutable 28 | 29 | 30 | object Copy { 31 | type CopyCursor = (Cursor, Cursor) 32 | 33 | val START = (Cursor.Start, Cursor.Start) 34 | val END = (Cursor.End, Cursor.End) 35 | val COUNT = 10000 36 | } 37 | 38 | class CopyFactory(nameServer: NameServer, scheduler: JobScheduler) 39 | extends CopyJobFactory[Shard] { 40 | def apply(shardIds: Seq[ShardId]) = 41 | new MetadataCopy(shardIds, MetadataCopy.START, Copy.COUNT, 42 | nameServer, scheduler) 43 | } 44 | 45 | class CopyParser(nameServer: NameServer, scheduler: JobScheduler) 46 | extends CopyJobParser[Shard] { 47 | def deserialize(attributes: Map[String, Any], shardIds: Seq[ShardId], count: Int) = { 48 | val cursor = (Cursor(attributes("cursor1").asInstanceOf[AnyVal].toLong), 49 | Cursor(attributes("cursor2").asInstanceOf[AnyVal].toLong)) 50 | new Copy(shardIds, cursor, count, nameServer, scheduler) 51 | } 52 | 53 | 54 | } 55 | 56 | case class CopyState( 57 | var pos: Int, 58 | items: Seq[Edge], 59 | cursor: Copy.CopyCursor, 60 | total: Int, 61 | val diffs: mutable.ArrayBuffer[Edge] 62 | ) 63 | 64 | /** Given a seq of shards, compares them and copies the most up to date data between them */ 65 | class Copy(shardIds: Seq[ShardId], var cursor: Copy.CopyCursor, 66 | count: Int, nameServer: NameServer, scheduler: JobScheduler) 67 | extends CopyJob[Shard](shardIds, count, nameServer, scheduler) { 68 | 69 | def copyPage(nodes: Seq[RoutingNode[Shard]], count: Int) = { 70 | val shards = nodes map { new ReadWriteShardAdapter(_) } 71 | 72 | var cursors = Seq(cursor) 73 | 74 | while (!cursors.isEmpty) { 75 | cursor = cursors.min 76 | val shardStates = shards map { shard => 77 | val (edges, nextCursor) = shard.selectAll(cursor, count)() 78 | shard -> CopyState(0, edges, nextCursor, edges.size, mutable.ArrayBuffer[Edge]()) 79 | } toMap 80 | 81 | /* 82 | * Loop through the edges we got and add diffs to each. Stop when we either run out of edges to process or we get through 83 | * one shard's batch of edges but haven't reached its END. Stopping in the latter case saves cycles that we'll 84 | * repeat on the next iteration anyway and potentially saves us from recording useless diffs. 85 | */ 86 | while (shardStates.find { case (shard, state) => state.pos < state.total }.isDefined && shardStates.find { case (shard, state) => state.pos >= state.total && state.cursor != Copy.END}.isEmpty ) { 87 | val edges = shardStates.map { case (shard, state) => 88 | val edge = if (state.pos < state.total) state.items(state.pos) else Edge.Max 89 | (shard, edge) 90 | } 91 | 92 | val (minShard, minEdge) = edges.foldLeft(edges.head) { case (min, pair) => 93 | val minEdge = min._2 94 | val pairEdge = pair._2 95 | 96 | if (pairEdge.similar(minEdge) < 0) pair else min 97 | } 98 | 99 | val sameEdges = edges.filter { case (shard, edge) => edge.similar(minEdge) == 0 } 100 | 101 | val (bestShard, bestEdge) = sameEdges.foldLeft((minShard, minEdge)) { case (newest, pair) => 102 | if (pair._2.updatedAt > newest._2.updatedAt) pair else newest 103 | } 104 | edges.foreach { case (shard, edge) => 105 | if (bestEdge.similar(edge) < 0) { 106 | shardStates(shard).diffs += bestEdge 107 | } else if (bestEdge.similar(edge) == 0) { 108 | if (bestEdge.updatedAt > edge.updatedAt) { 109 | shardStates(shard).diffs += bestEdge 110 | } 111 | shardStates(shard).pos += 1 112 | } 113 | } 114 | } 115 | 116 | shardStates.foreach { case (shard, state) => 117 | shard.writeCopies(state.diffs)() 118 | Stats.incr("edges-copy", state.diffs.size) 119 | state.diffs.clear 120 | } 121 | 122 | cursors = shardStates.toSeq.map { case (shard, state) => state.cursor}.filterNot{ _ == Copy.END } 123 | } 124 | 125 | None 126 | } 127 | 128 | def serialize = Map("cursor1" -> cursor._1.position, "cursor2" -> cursor._2.position) 129 | } 130 | 131 | object MetadataCopy { 132 | type CopyCursor = Cursor 133 | val START = Cursor.Start 134 | val END = Cursor.End 135 | } 136 | 137 | class MetadataCopyParser(nameServer: NameServer, scheduler: JobScheduler) 138 | extends CopyJobParser[Shard] { 139 | def deserialize(attributes: Map[String, Any], shardIds: Seq[ShardId], count: Int) = { 140 | val cursor = Cursor(attributes("cursor").asInstanceOf[AnyVal].toLong) 141 | new MetadataCopy(shardIds, cursor, count, nameServer, scheduler) 142 | } 143 | } 144 | 145 | case class MetadataCopyState( 146 | var pos: Int, 147 | items: Seq[Metadata], 148 | cursor: MetadataCopy.CopyCursor, 149 | total: Int, 150 | val diffs: mutable.ArrayBuffer[Metadata] 151 | ) 152 | 153 | class MetadataCopy(shardIds: Seq[ShardId], var cursor: MetadataCopy.CopyCursor, 154 | count: Int, nameServer: NameServer, scheduler: JobScheduler) 155 | extends CopyJob[Shard](shardIds, count, nameServer, scheduler) { 156 | 157 | def copyPage(nodes: Seq[RoutingNode[Shard]], count: Int) = { 158 | val shards = nodes.map { new ReadWriteShardAdapter(_) } 159 | 160 | var cursors = Seq(cursor) 161 | 162 | while(!cursors.isEmpty) { 163 | cursor = cursors.min 164 | 165 | val shardStates = Map(shards.map { shard => 166 | val (items, nextCursor) = shard.selectAllMetadata(cursor, count)() 167 | (shard, MetadataCopyState(0, items, nextCursor, items.size, mutable.ArrayBuffer[Metadata]())) 168 | }: _*) 169 | 170 | while (shardStates.find{case (shard, state) => state.pos < state.total}.isDefined && shardStates.find{case (shard, state) => state.pos >= state.total && state.cursor != MetadataCopy.END}.isEmpty) { 171 | val items = shardStates.map { case (shard, state) => 172 | val item = if (state.pos < state.total) state.items(state.pos) else Metadata.Max 173 | (shard, item) 174 | } 175 | 176 | val (minShard, minItem) = items.foldLeft(items.head) { case (min, pair) => 177 | val minItem = min._2 178 | val pairItem = pair._2 179 | 180 | if (pairItem.similar(minItem) < 0) pair else min 181 | } 182 | 183 | val sameItems = items.filter { case (shard, item) => item.similar(minItem) == 0 } 184 | 185 | val (bestShard, bestItem) = sameItems.foldLeft((minShard, minItem)) { case (newest, pair) => 186 | if (pair._2.updatedAt > newest._2.updatedAt) pair else newest } 187 | 188 | items.foreach { case (shard, item) => 189 | if (bestItem.similar(item) < 0) { 190 | shardStates(shard).diffs += bestItem 191 | } else if (bestItem.similar(item) == 0) { 192 | if (bestItem.updatedAt > item.updatedAt) { 193 | shardStates(shard).diffs += bestItem 194 | } 195 | shardStates(shard).pos += 1 196 | } 197 | } 198 | } 199 | 200 | shardStates.foreach { case (shard, state) => 201 | shard.writeMetadatas(state.diffs)() 202 | Stats.incr("edges-copy", state.diffs.size) 203 | state.diffs.clear 204 | } 205 | 206 | cursors = shardStates.toSeq.map { case (shard, state) => state.cursor }.filterNot{ _ == MetadataCopy.END } 207 | } 208 | 209 | Some(new Copy(shardIds, Copy.START, Copy.COUNT, nameServer, scheduler)) 210 | 211 | } 212 | 213 | def serialize = Map("cursor" -> cursor.position) 214 | } 215 | -------------------------------------------------------------------------------- /doc/blog.md: -------------------------------------------------------------------------------- 1 | 2 | # Introducing FlockDB 3 | 4 | Twitter stores many graphs of relationships between people: who you're following, who's following 5 | you, who you receive phone notifications from, and so on. 6 | 7 | Some of the features of these graphs have been challenging to store in scalable ways as we've grown. 8 | For example, instead of requiring each friendship to be requested and confirmed, you can build 9 | one-way relationships by just following other people. There's also no limit to how many people are 10 | allowed to follow you, so some people have millions of followers (like @aplusk), while others have 11 | only a few. 12 | 13 | To deliver a tweet, we need to be able to look up someone's followers and page through them rapidly. 14 | But we also need to handle heavy write traffic, as followers are added or removed, or spammers are 15 | caught and put on ice. And for some operations, like delivering a @mention, we need to do set 16 | arithmetic like "who's following both of these users?" These features are difficult to implement in a traditional relational database. 17 | 18 | ## A valiant effort 19 | 20 | We went through several storage layers in the early days, including abusive use of relational tables 21 | and key-value storage of denormalized lists. They were either good at handling write operations or 22 | good at paging through giant result sets, but never good at both. 23 | 24 | A little over a year ago, we could see that we needed to try something new. Our goals were: 25 | 26 | - Write the simplest possible thing that could work. 27 | 28 | - Use off-the-shelf MySQL as the storage engine, because we understand its behavior -- in normal use 29 | as well as under extreme load and unusual failure conditions. Give it enough memory to keep 30 | everything in cache. 31 | 32 | - Allow for horizontal partitioning so we can add more database hardware as the corpus grows. 33 | 34 | - Allow write operations to arrive out of order or be processed more than once. (Allow failures to 35 | result in redundant work rather than lost work.) 36 | 37 | FlockDB was the result. We finished migrating to it about 9 months ago and never looked back. 38 | 39 | ## A valiant-er effort 40 | 41 | FlockDB is a database that stores graph data, but it isn't a database optimized for graph-traversal 42 | operations. Instead, it's optimized for very large [adjacency 43 | lists](http://en.wikipedia.org/wiki/Adjacency_list), fast reads and writes, and page-able set 44 | arithmetic queries. 45 | 46 | It stores graphs as sets of edges between nodes identified by 64-bit integers. For a social graph, 47 | these node IDs will be user IDs, but in a graph storing "favorite" tweets, the destination may be a 48 | tweet ID. Each edge is also marked with a 64-bit position, used for sorting. (Twitter puts a 49 | timestamp here for the "following" graph, so that your follower list is displayed latest-first.) 50 | 51 | ![schema](schema.png) 52 | 53 | When an edge is "deleted", the row isn't actually deleted from MySQL; it's just marked as being in 54 | the deleted state, which has the effect of moving the primary key (a compound key of the source ID, 55 | state, and position). Similarly, users who delete their account can have their edges put into an 56 | archived state, allowing them to be restored later. We keep only a compound primary key and a 57 | secondary index for each row, and answer all queries from a single index. This kind of schema 58 | optimization allows MySQL to shine and gives us predictable performance. 59 | 60 | A complex query like "What's the intersection of people I follow and people who are following 61 | President Obama?" can be answered quickly by decomposing it into single-user queries ("Who is 62 | following President Obama?"). Data is partitioned by node, so these queries can each be answered by 63 | a single partition, using an indexed range query. Similarly, paging through long result sets is done 64 | by using the position field as a cursor, rather than using `LIMIT/OFFSET`, so any page of results 65 | for a query is indexed and is equally fast. 66 | 67 | Write operations are [idempotent](http://en.wikipedia.org/wiki/Idempotence) and 68 | [commutative](http://en.wikipedia.org/wiki/Commutative), based on the time they enter the system. We 69 | can process operations out of order and end up with the same result, so we can paper over temporary 70 | network and hardware failures, or even replay lost data from minutes or hours ago. This was 71 | especially helpful during the initial roll-out. 72 | 73 | Commutative writes also simplify the process of bringing up new partitions. A new partition can 74 | receive write traffic immediately, and receive a dump of data from the old partitions slowly in the 75 | background. Once the dump is over, the partition is immediately "live" and ready to receive reads. 76 | 77 | The app servers (affectionately called "flapps") are written in Scala, are stateless, and are 78 | horizontally scalable. We can add more as query load increases, independent of the databases. They 79 | expose a very small thrift API to clients, though we've written [a Ruby 80 | client](http://github.com/twitter/flockdb-client) with a much richer interface. 81 | 82 | ![it's in the cloud](flockdb-layout.png) 83 | 84 | We use [the Gizzard library](http://github.com/twitter/gizzard) to handle the partitioning layer. A 85 | forwarding layer maps ranges of source IDs to physical databases, and replication is handled by 86 | building a tree of such tables under the same forwarding address. Write operations are acknowledged 87 | after being journalled locally, so that disruptions in database availability or performance are 88 | decoupled from website response times. 89 | 90 | Each edge is actually stored twice: once in the "forward" direction (indexed and partitioned by the 91 | source ID) and once in the "backward" direction (indexed and partitioned by the destination ID). 92 | That way a query like "Who follows me?" is just as efficient as "Who do I follow?", and the answer 93 | to each query can be found entirely on a single partition. 94 | 95 | The end result is a cluster of commodity servers that we can expand as needed. Over the winter, we 96 | added 50% database capacity without anyone noticing. We currently store over **13 billion edges** 97 | and sustain peak traffic of **20k writes/second** and **100k reads/second**. 98 | 99 | ## Lessons learned 100 | 101 | Some helpful patterns fell out of our experience, even though they weren't goals originally: 102 | 103 | - **Use aggressive timeouts to cut off the long tail.** 104 | 105 | You can't ever shake out all the unfairness in the system, so some requests will take an 106 | unreasonably long time to finish -- way over the 99.9th percentile. If there are multiple 107 | stateless app servers, you can just cut a client loose when it has passed a "reasonable" amount of 108 | time, and let it try its luck with a different app server. 109 | 110 | - **Make every case an error case.** 111 | 112 | Or, to put it another way, use the same code path for errors as you use in normal operation. Don't 113 | create rarely-tested modules that only kick in during emergencies, when you're least likely to 114 | feel like trying new things. 115 | 116 | We queue all write operations locally (using [Kestrel](http://github.com/robey/kestrel) as a 117 | library), and any that fail are thrown into a separate error queue. This error queue is 118 | periodically flushed back into the write queue, so that retries use the same code path as the 119 | initial attempt. 120 | 121 | - **Do nothing automatically at first.** 122 | 123 | Provide lots of gauges and levers, and automate with scripts once patterns emerge. FlockDB 124 | measures the latency distribution of each query type across each service (MySQL, Kestrel, Thrift) 125 | so we can tune timeouts, and reports counts of each operation so we can see when a client library 126 | suddenly doubles its query load (or we need to add more hardware). 127 | 128 | Write operations that cycle through the error queue too many times are dumped into a log for 129 | manual inspection. If it turns out to be a bug, we can fix it, and re-inject the job. If it's a 130 | client error, we have a good bug report. 131 | 132 | ## Check it out 133 | 134 | The source is in github: [http://github.com/twitter/flockdb](http://github.com/twitter/flockdb) 135 | 136 | In particular, check out the demo to get a feel for the kind of data that can be stored and what you 137 | can do with it: 138 | [http://github.com/twitter/flockdb/blob/master/doc/demo.markdown](http://github.com/twitter/flockdb/blob/master/doc/demo.markdown) 139 | 140 | Talk to us on IRC, in #twinfra (irc.freenode.net), or join the mailing list: 141 | [http://groups.google.com/group/flockdb](http://groups.google.com/group/flockdb) 142 | --------------------------------------------------------------------------------