├── .gitignore
├── README.md
├── images
├── inode.png
├── sblock.png
└── snackfs-split.png
├── project
├── SnackfsBuild.scala
├── build.properties
└── plugins.sbt
└── src
├── it
└── scala
│ └── org
│ └── apache
│ └── hadoop
│ └── fs
│ ├── FSShellSpec.scala
│ └── SnackFSShellSpec.scala
├── main
├── resources
│ └── core-site.xml
├── scala
│ ├── com
│ │ └── tuplejump
│ │ │ └── snackfs
│ │ │ ├── SnackFS.scala
│ │ │ ├── api
│ │ │ ├── model
│ │ │ │ ├── AppendFileCommand.scala
│ │ │ │ ├── CreateFileCommand.scala
│ │ │ │ ├── DeleteCommand.scala
│ │ │ │ ├── FileStatusCommand.scala
│ │ │ │ ├── ListCommand.scala
│ │ │ │ ├── MakeDirectoryCommand.scala
│ │ │ │ ├── OpenFileCommand.scala
│ │ │ │ ├── RenameCommand.scala
│ │ │ │ └── SnackFileStatus.scala
│ │ │ └── partial
│ │ │ │ └── Command.scala
│ │ │ ├── cassandra
│ │ │ ├── model
│ │ │ │ ├── ClientPoolFactory.scala
│ │ │ │ ├── GenericOpSuccess.scala
│ │ │ │ ├── Keyspace.scala
│ │ │ │ ├── SnackFSConfiguration.scala
│ │ │ │ └── ThriftClientAndSocket.scala
│ │ │ ├── partial
│ │ │ │ └── FileSystemStore.scala
│ │ │ └── store
│ │ │ │ └── ThriftStore.scala
│ │ │ ├── fs
│ │ │ ├── model
│ │ │ │ ├── BlockMeta.scala
│ │ │ │ ├── INode.scala
│ │ │ │ └── SubBlockMeta.scala
│ │ │ └── stream
│ │ │ │ ├── BlockInputStream.scala
│ │ │ │ ├── FileSystemInputStream.scala
│ │ │ │ └── FileSystemOutputStream.scala
│ │ │ └── util
│ │ │ ├── AsyncUtil.scala
│ │ │ └── LogConfiguration.scala
│ └── org
│ │ └── apache
│ │ └── hadoop
│ │ └── fs
│ │ └── SnackFSShell.scala
└── scripts
│ ├── hadoop
│ └── snackfs
└── test
├── java
└── org
│ └── apache
│ └── hadoop
│ └── fs
│ └── TestFileSystem.java
├── resources
├── small.txt
└── vsmall.txt
└── scala
└── com
└── tuplejump
└── snackfs
├── SnackFSSpec.scala
├── cassandra
└── store
│ └── ThriftStoreSpec.scala
└── fs
├── model
└── INodeSpec.scala
└── stream
└── FileSystemStreamSpec.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | .idea_modules/
3 | build/
4 | lib_managed/
5 | project/project/
6 | project/target/
7 | target/
8 | fs_test/
9 | **/*~
10 |
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [SnackFS @ Calliope](http://tuplejump.github.io/calliope/snackfs.html)
2 |
3 |
4 | # SnackFS
5 |
6 | SnackFS is our bite-sized, lightweight HDFS compatible FileSystem built over Cassandra.
7 | With it's unique fat driver design it requires no additional SysOps or setup on the Cassanndra Cluster. All you have to do is point to your Cassandra cluster and you are ready to go.
8 |
9 | As SnackFS was written as a dropin replacement for HDFS, your existing HDFS backed applications not only run as-is on SnackFS, but they also run faster!
10 | SnackFS cluster is also more resilient than a HDFS cluster as there is no SPOF like the NameNode.
11 |
12 | ## Prerequisites
13 |
14 | 1. SBT : It can be set up from the instructions [here](http://www.scala-sbt.org/release/docs/Getting-Started/Setup.html#installing-sbt).
15 |
16 | 2. Cassandra(v1.2.12) : Instructions can be found [here](http://wiki.apache.org/cassandra/GettingStarted). An easier alternative would be using [CCM](https://github.com/pcmanus/ccm)
17 |
18 | ## Using SnackFS
19 |
20 | ### Use the binary
21 |
22 | * You can download the SnackFS distribution built with [Scala 2.9.x here](http://bit.ly/1eKV1ae) and [Scala 2.10.x here](http://bit.ly/1jI7vVw)
23 |
24 | * To add SnackFS to your SBT project use,
25 |
26 | For SBT
27 | ```scala
28 | "com.tuplejump" %% "snackfs" % "0.6.1-EA"
29 | ```
30 |
31 | * To add SnackFS to your Maven project use,
32 | with Scala 2.9.3 use,
33 |
34 | ```xml
35 |
36 | com.tuplejump
37 | snackfs_2.9.3
38 | 0.6.1-EA
39 |
40 | ```
41 |
42 | And with Scala 2.10.3,
43 | ```xml
44 |
45 | com.tuplejump
46 | snackfs_2.10
47 | 0.6.1-EA
48 |
49 | ```
50 |
51 | ### Build from Source
52 |
53 | 1. Checkout the source from http://github.com/tuplejump/snackfs
54 |
55 | 2. To build SnackFS distribution run sbt's dist command in the project directory
56 | ```
57 | [snackfs]$ sbt dist
58 | ```
59 |
60 | This will result in a "snackfs-{version}.tgz" file in the "target" directory of "snackfs".
61 | Extract "snackfs-{version}.tgz" to the desired location.
62 |
63 | 3. Start Cassandra (default setup for snackfs assumes its a cluster with 3 nodes)
64 |
65 | 4. It is possible to configure the file system by updating core-site.xml.
66 | The following properties can be added.
67 | * snackfs.cassandra.host (default 127.0.0.1)
68 | * snackfs.cassandra.port (default 9160)
69 | * snackfs.consistencyLevel.write (default QUORUM)
70 | * snackfs.consistencyLevel.read (default QUORUM)
71 | * snackfs.keyspace (default snackfs)
72 | * snackfs.subblock.size (default 8 MB (8 * 1024 * 1024))
73 | * snackfs.block.size (default 128 MB (128 * 1024 * 1024))
74 | * snackfs.replicationFactor (default 3)
75 | * snackfs.replicationStrategy (default org.apache.cassandra.locator.SimpleStrategy)
76 |
77 | 5. SnackFS Shell provides the fs commands similar to Hadoop Shell. For example to create a directory,
78 | ```
79 | [Snackfs(extracted)]$bin/snackfs -mkdir snackfs:///random
80 | ```
81 |
82 | ###To build and use with Hadoop
83 |
84 | 1. Setup Apache Hadoop v1.0.4.(http://hadoop.apache.org/#Getting+Started). The base directory will be referred as 'hadoop-1.0.4' in the following steps.
85 |
86 | 2. Execute the following commands in the snackfs project directory.
87 | ```2.
88 | [snackfs]$ sbt package
89 | ```
90 |
91 | This will result in a "snackfs_<scala_version>-<version>.jar" file in the "target/scala-<scala_version>" directory of "snackfs".
92 | Copy the jar to 'hadoop-1.0.4/lib'.
93 |
94 | 3. Copy all the jars in snackfs/lib_managed and scala-library-<scala_version>.jar
95 | (located at '~/.ivy2/cache/org.scala-lang/scala-library/jars') to 'hadoop-1.0.4/lib'.
96 |
97 | 4. Copy snackfs/src/main/resources/core-site.xml to 'hadoop-1.0.4/conf'
98 |
99 | 5. Start Cassandra (default setup for snackfs assumes its a cluster with 3 nodes)
100 |
101 | 6. Hadoop fs commands can now be run using snackfs. For example,
102 | ```
103 | [hadoop-1.0.4]$ bin/hadoop fs -mkdir snackfs:///random
104 | ```
105 |
106 | ###To configure logging,
107 |
108 | #### In System Environment
109 |
110 | Set SNACKFS_LOG_LEVEL in the Shell to one of the following Values
111 |
112 | * DEBUG
113 | * INFO
114 | * ERROR
115 | * ALL
116 | * OFF
117 |
118 | Default value if not set if ERROR
119 |
120 | ####In code (for further control/tuning)
121 | If you want your logs in a File, update LogConfiguration.scala like below
122 |
123 | ```scala
124 | val config = new LoggerFactory("", Option(Level.ALL), List(FileHandler("logs")), true)
125 | ```
126 |
127 | The arguments for LoggerFactory are
128 |
129 | 1. node - Name of the logging node. ("") is the top-level logger.
130 | 2. level - Log level for this node. Leaving it None implies the parent logger's level.
131 | 3. handlers - Where to send log messages.
132 | 4. useParents - indicates if log messages are passed up to parent nodes.To stop at this node level, set it to false
133 |
134 | Additional logging configuration details can be found [here](https://github.com/twitter/util/tree/master/util-logging#configuring)
135 |
136 |
--------------------------------------------------------------------------------
/images/inode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tuplejump/snackfs/4e5f1575ac0dce62ee99fba16d8c5c7c2bfef8cd/images/inode.png
--------------------------------------------------------------------------------
/images/sblock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tuplejump/snackfs/4e5f1575ac0dce62ee99fba16d8c5c7c2bfef8cd/images/sblock.png
--------------------------------------------------------------------------------
/images/snackfs-split.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tuplejump/snackfs/4e5f1575ac0dce62ee99fba16d8c5c7c2bfef8cd/images/snackfs-split.png
--------------------------------------------------------------------------------
/project/SnackfsBuild.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 |
20 | import sbt._
21 | import sbt.Keys._
22 |
23 | object SnackfsBuild extends Build {
24 |
25 | lazy val VERSION = "0.6.1-EA"
26 |
27 | lazy val CAS_VERSION = "1.2.12"
28 |
29 | lazy val THRIFT_VERSION = "0.7.0"
30 |
31 | lazy val TWITTER_UTIL_VERSION = "6.7.0"
32 |
33 | lazy val dist = TaskKey[Unit]("dist", "Generates project distribution")
34 |
35 | lazy val pom = {
36 |
37 | git@github.com:tuplejump/snackfs.git
38 | scm:git:git@github.com:tuplejump/snackfs.git
39 |
40 |
41 |
42 | milliondreams
43 | Rohit Rai
44 | https://twitter.com/milliondreams
45 |
46 |
47 | shiti
48 | Shiti Saxena
49 | https://eraoferrors.blogspot.com
50 |
51 |
52 | }
53 |
54 | def sparkDependency(scalaVersion: String) =
55 | scalaVersion match {
56 | case "2.9.3" => "org.apache.spark" %% "spark-core" % "0.8.1-incubating"
57 | case "2.10.3" => "org.apache.spark" %% "spark-core" % "0.9.0-incubating"
58 | case x => "org.apache.spark" %% "spark-core" % "0.9.0-incubating"
59 | }
60 |
61 |
62 | lazy val dependencies = Seq("org.apache.hadoop" % "hadoop-core" % "1.0.4" % "provided",
63 | "org.apache.cassandra" % "cassandra-thrift" % CAS_VERSION intransitive(),
64 | "org.apache.cassandra" % "cassandra-all" % CAS_VERSION intransitive(),
65 | "org.apache.thrift" % "libthrift" % THRIFT_VERSION exclude("org.slf4j", "slf4j-api") exclude("javax.servlet", "servlet-api"),
66 | "commons-pool" % "commons-pool" % "1.6",
67 | "com.twitter" % "util-logging" % TWITTER_UTIL_VERSION cross CrossVersion.binaryMapped {
68 | case "2.9.3" => "2.9.2"
69 | case "2.10.3" => "2.10"
70 | case x => x
71 | },
72 | "org.scalatest" %% "scalatest" % "1.9.1" % "it,test",
73 | "org.apache.commons" % "commons-io" % "1.3.2" % "it,test",
74 | "com.novocode" % "junit-interface" % "0.10" % "it,test",
75 | "org.apache.commons" % "commons-lang3" % "3.1" % "it,test"
76 | )
77 |
78 | lazy val snackSettings = Project.defaultSettings ++ Seq(
79 | name := "snackfs",
80 |
81 | organization := "com.tuplejump",
82 |
83 | version := VERSION,
84 |
85 | crossScalaVersions := Seq("2.9.3", "2.10.3"),
86 |
87 | parallelExecution in Test := false,
88 |
89 | retrieveManaged := true,
90 |
91 | libraryDependencies ++= dependencies,
92 |
93 | libraryDependencies <+= (scalaVersion)(sparkDependency),
94 |
95 | parallelExecution in Test := false,
96 |
97 | pomExtra := pom,
98 |
99 | publishArtifact in Test := false,
100 |
101 | pomIncludeRepository := {
102 | _ => false
103 | },
104 |
105 | publishMavenStyle := true,
106 |
107 | retrieveManaged := true,
108 |
109 | publishTo <<= version {
110 | (v: String) =>
111 | val nexus = "https://oss.sonatype.org/"
112 | if (v.trim.endsWith("SNAPSHOT"))
113 | Some("snapshots" at nexus + "content/repositories/snapshots")
114 | else
115 | Some("releases" at nexus + "service/local/staging/deploy/maven2")
116 | },
117 |
118 | licenses := Seq("Apache License, Version 2.0" -> url("http://www.apache.org/licenses/LICENSE-2.0")),
119 |
120 | homepage := Some(url("https://tuplejump.github.io/calliope/snackfs.html")),
121 |
122 | organizationName := "Tuplejump, Inc.",
123 |
124 | organizationHomepage := Some(url("http://www.tuplejump.com"))
125 |
126 | )
127 |
128 | lazy val snackfs = Project(
129 | id = "snackfs",
130 | base = file("."),
131 | settings = snackSettings ++ Seq(distTask)
132 | ).configs(IntegrationTest).settings(Defaults.itSettings: _*)
133 |
134 | def distTask = dist in Compile <<= (packageBin in Compile, scalaVersion in Compile, version in Compile, streams) map {
135 | (f: File, sv: String, v: String, s) =>
136 | val userHome = System.getProperty("user.home")
137 | val ivyHome = userHome + "/.ivy2/cache/" //should be updated to point to ivy cache if its not in home directory
138 |
139 | val destination = "target/SnackFS_%s-%s/".format(sv, v)
140 | val lib = destination + "lib/"
141 | val bin = destination + "bin/"
142 | val conf = destination + "conf/"
143 | val spark = destination + "snack-spark/"
144 |
145 | def twitterUtil =
146 | sv match {
147 | case "2.9.3" => Seq("util-core_2.9.2-" + TWITTER_UTIL_VERSION + ".jar", "util-logging_2.9.2-" + TWITTER_UTIL_VERSION + ".jar")
148 | case "2.10.3" => Seq("util-core_2.10-" + TWITTER_UTIL_VERSION + ".jar", "util-logging_2.10-" + TWITTER_UTIL_VERSION + ".jar")
149 | case x => Seq("util-core_2.10-" + TWITTER_UTIL_VERSION + ".jar", "util-logging_2.10-" + TWITTER_UTIL_VERSION + ".jar")
150 | }
151 |
152 | val forSpark = Set("cassandra-all-" + CAS_VERSION + ".jar",
153 | "cassandra-thrift-" + CAS_VERSION + ".jar",
154 | "commons-pool-1.6.jar",
155 | "libthrift-" + THRIFT_VERSION + ".jar",
156 | "snackfs_" + sv + "-" + VERSION + ".jar") ++ twitterUtil
157 |
158 | IO.copyFile(f, new File(lib + f.getName))
159 | IO.copyFile(f, new File(spark + f.getName))
160 |
161 | /*Dependencies*/
162 | IO.copyFile(new File(ivyHome + "org.scala-lang/scala-library/jars/scala-library-" + sv + ".jar"),
163 | new File(lib + "scala-library-" + sv + ".jar"))
164 |
165 | val jars = getLibraries(sv)
166 | jars.foreach(j => {
167 | val jarFile = new File(j)
168 | IO.copyFile(jarFile, new File(lib + jarFile.getName))
169 | println(jarFile.getName)
170 | if (forSpark.contains(jarFile.getName)) {
171 | IO.copyFile(jarFile, new File(spark + jarFile.getName))
172 | }
173 | })
174 |
175 | /*script and configuration */
176 | val shellBin: sbt.File = new File(bin + "snackfs")
177 | IO.copyFile(new File("src/main/scripts/snackfs"), shellBin)
178 | shellBin.setExecutable(true, false)
179 | IO.copyFile(new File("src/main/resources/core-site.xml"), new File(conf + "core-site.xml"))
180 |
181 | val jarFiles = IO.listFiles(new File(lib))
182 | val configFiles = IO.listFiles(new File(conf))
183 | val scriptFiles = IO.listFiles(new File(bin))
184 | val allFiles = jarFiles ++ configFiles ++ scriptFiles
185 | val fileSeq = for (f <- allFiles) yield (f, f.getPath)
186 |
187 | val distTgz: sbt.File = new File("target/snackfs_%s-%s.tgz".format(sv, v))
188 | val tarball: sbt.File = makeTarball("snackfs_%s-%s".format(sv, v), new File(destination), new File("target"))
189 | IO.gzip(tarball, distTgz)
190 |
191 | IO.delete(tarball)
192 | IO.delete(new File(destination))
193 | s.log.info("SnackFS Distribution created at %s".format(distTgz.getAbsolutePath))
194 | }
195 |
196 | def getLibraries(sv: String): List[String] = {
197 | val jarSource = "lib_managed/jars/"
198 |
199 | val cassandra = jarSource + "org.apache.cassandra/"
200 | val cassandraRelated = List(cassandra + "cassandra-all/cassandra-all-" + CAS_VERSION + ".jar",
201 | cassandra + "cassandra-thrift/cassandra-thrift-" + CAS_VERSION + ".jar",
202 | jarSource + "org.apache.thrift/libthrift/libthrift-0.7.0.jar",
203 | jarSource + "commons-pool/commons-pool/commons-pool-1.6.jar"
204 | )
205 |
206 | val hadoopRelated = List(jarSource + "org.apache.hadoop/hadoop-core/hadoop-core-1.0.4.jar",
207 | jarSource + "commons-cli/commons-cli/commons-cli-1.2.jar",
208 | jarSource + "commons-configuration/commons-configuration/commons-configuration-1.6.jar",
209 | jarSource + "commons-lang/commons-lang/commons-lang-2.5.jar",
210 | jarSource + "commons-logging/commons-logging/commons-logging-1.1.1.jar"
211 | )
212 |
213 | val jackson = jarSource + "org.codehaus.jackson/"
214 | val log4j = "lib_managed/bundles/log4j/log4j/"
215 |
216 | val otherHadoopDeps = List(jackson + "jackson-core-asl/jackson-core-asl-1.8.8.jar",
217 | jackson + "jackson-mapper-asl/jackson-mapper-asl-1.8.8.jar",
218 | log4j + "log4j-1.2.17.jar",
219 | jarSource + "org.slf4j/slf4j-log4j12/slf4j-log4j12-1.7.2.jar",
220 | jarSource + "org.slf4j/slf4j-api/slf4j-api-1.7.2.jar"
221 | )
222 |
223 | val logger = jarSource + "com.twitter/"
224 |
225 | val loggingRelated =
226 | sv match {
227 | case "2.9.3" =>
228 | List(logger + "util-app_2.9.2/util-app_2.9.2-" + TWITTER_UTIL_VERSION + ".jar",
229 | logger + "util-core_2.9.2/util-core_2.9.2-" + TWITTER_UTIL_VERSION + ".jar",
230 | logger + "util-logging_2.9.2/util-logging_2.9.2-" + TWITTER_UTIL_VERSION + ".jar")
231 |
232 | case "2.10.3" =>
233 | List(logger + "util-app_2.10/util-app_2.10-" + TWITTER_UTIL_VERSION + ".jar",
234 | logger + "util-core_2.10/util-core_2.10-" + TWITTER_UTIL_VERSION + ".jar",
235 | logger + "util-logging_2.10/util-logging_2.10-" + TWITTER_UTIL_VERSION + ".jar")
236 |
237 | case x =>
238 | List(logger + "util-app_2.10/util-app_2.10-" + TWITTER_UTIL_VERSION + ".jar",
239 | logger + "util-core_2.10/util-core_2.10-" + TWITTER_UTIL_VERSION + ".jar",
240 | logger + "util-logging_2.10/util-logging_2.10-" + TWITTER_UTIL_VERSION + ".jar")
241 | }
242 |
243 | val requiredJars = cassandraRelated ++ hadoopRelated ++ otherHadoopDeps ++ loggingRelated
244 | requiredJars
245 | }
246 |
247 | def makeTarball(name: String, tarDir: File, rdir: File /* mappings: Seq[(File, String)]*/): File = {
248 | val tarball = new File("target") / (name + ".tar")
249 | val process: ProcessBuilder = Process(Seq("tar", "-pcf", tarball.getAbsolutePath, tarDir.getName), Some(rdir))
250 | process.! match {
251 | case 0 => ()
252 | case n => sys.error("Error tarballing " + tarball + ". Exit code: " + n)
253 | }
254 | tarball
255 | }
256 |
257 | }
258 |
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=0.12.4
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.5.1")
2 |
3 |
--------------------------------------------------------------------------------
/src/it/scala/org/apache/hadoop/fs/FSShellSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 |
20 | /**
21 | * To run the tests set projectHome(line no 65) and HADOOP_HOME
22 | */
23 | package org.apache.hadoop.fs
24 |
25 | import org.scalatest.{BeforeAndAfterAll, FlatSpec}
26 | import org.scalatest.matchers.MustMatchers
27 | import java.io.{File, IOException}
28 | import org.apache.commons.io.IOUtils
29 | import scala.io.Source
30 | import java.util.Date
31 |
32 | class FSShellSpec extends FlatSpec with BeforeAndAfterAll with MustMatchers {
33 |
34 | val hadhome = System.getenv("HADOOP_HOME")
35 |
36 | assert(hadhome != null && !hadhome.isEmpty, "Must have hadoop, and must set HADOOP_HOME before running these tests")
37 |
38 |
39 | def executeAndGetOutput(command: Seq[String]): String = {
40 |
41 | val hadoopHome = new File(hadhome + File.separator + "bin")
42 |
43 | val builder = new ProcessBuilder(command: _*)
44 | .directory(hadoopHome)
45 |
46 | val process = builder.start()
47 | val error = new String(IOUtils.toByteArray(process.getErrorStream))
48 | val output = IOUtils.readLines(process.getInputStream)
49 | if (error.length > 0) {
50 | throw new IOException(error)
51 | }
52 | output.toString
53 | }
54 |
55 | val isTrue = true
56 |
57 | val hadoopFSCommands = Seq("./hadoop", "fs")
58 |
59 | val timestamp = new Date().getTime
60 | val basePath = "testFSShell" + timestamp
61 |
62 | val base = "snackfs://localhost:9000/test" + timestamp + "/"
63 |
64 | val testingDir = base + "testFSShell/"
65 | val projectHome = "/snackfs/src/" //to be set
66 |
67 | val listCommand = hadoopFSCommands ++ Seq("-lsr", base)
68 |
69 | //mkdir
70 | it should "make a new directory" in {
71 | val command = hadoopFSCommands ++ Seq("-mkdir", testingDir)
72 | executeAndGetOutput(command)
73 | val listoutPut = executeAndGetOutput(listCommand)
74 | listoutPut must include("/testFSShell")
75 | }
76 |
77 | it should "not make a new directory with the name of an existing one" in {
78 | val command = hadoopFSCommands ++ Seq("-mkdir", testingDir)
79 | val exception = intercept[IOException] {
80 | executeAndGetOutput(command)
81 | }
82 | val errMsg = "mkdir: cannot create directory"
83 | exception.getMessage must startWith(errMsg)
84 | }
85 |
86 | //copyFromLocal
87 | it should "copy a file into the filesystem using copyFromLocal" in {
88 | val source = projectHome + "test/resources/small.txt"
89 | val command = hadoopFSCommands ++ Seq("-copyFromLocal", source, testingDir)
90 | executeAndGetOutput(command)
91 |
92 | val listoutPut = executeAndGetOutput(listCommand)
93 | listoutPut must include("small.txt")
94 | }
95 |
96 | it should "not overwrite a file into the filesystem using copyFromLocal" in {
97 | val source = projectHome + "test/resources/small.txt"
98 | val command = hadoopFSCommands ++ Seq("-copyFromLocal", source, testingDir)
99 | val exception = intercept[IOException] {
100 | executeAndGetOutput(command)
101 | }
102 | exception.getMessage must include("already exists")
103 | }
104 |
105 | //copyToLocal
106 | it should "copy a file from the filesystem using copyToLocal" in {
107 | val destination = projectHome + "test/resources/" + basePath + "/TestSmall.txt"
108 | val source = testingDir + "small.txt"
109 | val command = hadoopFSCommands ++ Seq("-copyToLocal", source, destination)
110 | executeAndGetOutput(command)
111 |
112 | val copiedFile = new File(destination)
113 | copiedFile.exists() must be(isTrue)
114 | }
115 |
116 | it should "not overwrite a file using copyToLocal" in {
117 | val destination = projectHome + "test/resources/small.txt"
118 | val source = testingDir + "small.txt"
119 | val command = hadoopFSCommands ++ Seq("-copyToLocal", source, destination)
120 | val exception = intercept[IOException] {
121 | executeAndGetOutput(command)
122 | }
123 | exception.getMessage must include("already exists")
124 | }
125 |
126 | //get
127 | it should "copy a file from the filesystem using get" in {
128 | val destination = projectHome + "test/resources/" + basePath + "/TestGetSmall.txt"
129 | val source = testingDir + "small.txt"
130 | val command = hadoopFSCommands ++ Seq("-copyToLocal", source, destination)
131 | executeAndGetOutput(command)
132 |
133 | val copiedFile = new File(destination)
134 | copiedFile.exists() must be(isTrue)
135 | }
136 |
137 | //cat
138 | it should "print file content" in {
139 | val source = projectHome + "test/resources/vsmall.txt"
140 | val writeCommand = hadoopFSCommands ++ Seq("-copyFromLocal", source, testingDir)
141 | executeAndGetOutput(writeCommand)
142 | val readCommand = hadoopFSCommands ++ Seq("-cat", testingDir + "/vsmall.txt")
143 | val output = executeAndGetOutput(readCommand)
144 | val fileContent = IOUtils.readLines(Source.fromFile(new File(source)).bufferedReader()).toString
145 | output must be(fileContent)
146 | }
147 |
148 | //cp
149 | it should "copy all files from a directory into another" in {
150 | val destName = "testCpCommand"
151 | val destination = base + destName + "/"
152 | val source = testingDir
153 | val command = hadoopFSCommands ++ Seq("-cp", source, destination)
154 | executeAndGetOutput(command)
155 | val listoutPut = executeAndGetOutput(listCommand)
156 | listoutPut must include("/" + destName + "/small.txt")
157 | listoutPut must include("/" + destName + "/vsmall.txt")
158 | }
159 |
160 | //du
161 | it should "display aggregate length of files in a directory" in {
162 | val command = hadoopFSCommands ++ Seq("-du", base)
163 | val output = executeAndGetOutput(command)
164 | output must include(base + "testFSShell")
165 | output must include(base + "testCpCommand")
166 | output must startWith("[Found 2 items, 598419")
167 | }
168 |
169 | it should "display aggregate length of file" in {
170 | val command = hadoopFSCommands ++ Seq("-du", testingDir + "vsmall.txt")
171 | val output = executeAndGetOutput(command)
172 | output must startWith("[Found 1 items, 623 ")
173 | output must endWith("/testFSShell/vsmall.txt]")
174 | }
175 |
176 | //dus
177 | it should "display summary of file lengths" in {
178 | val command = hadoopFSCommands ++ Seq("-dus", base)
179 | val output = executeAndGetOutput(command)
180 | output must include("/test" + timestamp)
181 | output must include("1196838")
182 | }
183 |
184 | //ls
185 | it should "list children of directory" in {
186 | val command = hadoopFSCommands ++ Seq("-ls", base)
187 | val output = executeAndGetOutput(command)
188 | output must startWith("[Found 2 items,")
189 | output must include("/testFSShell")
190 | output must include("/testCpCommand")
191 | }
192 |
193 | it should "list stats of a file" in {
194 | val command = hadoopFSCommands ++ Seq("-ls", testingDir + "vsmall.txt")
195 | val output = executeAndGetOutput(command)
196 | output must startWith("[Found 1 items,")
197 | output must include("/testFSShell/vsmall.txt")
198 | }
199 |
200 | //lsr
201 | it should "list children of directory recursive" in {
202 | val output = executeAndGetOutput(listCommand)
203 | output must include("/testFSShell")
204 | output must include("/testFSShell/small.txt")
205 | output must include("/testFSShell/vsmall.txt")
206 | output must include("/testCpCommand")
207 | output must include("/testCpCommand/small.txt")
208 | output must include("/testCpCommand/vsmall.txt")
209 | }
210 |
211 | /*//moveFromLocal -- docs and behaviour are contradicting
212 | it should "result in not implemented" in {
213 | val source = projectHome + "test/resources/small.txt"
214 | val command = hadoopFSCommands ++ Seq("-moveFromLocal", source, testingDir)
215 | val output = executeAndGetOutput(command)
216 | println(output)
217 | }*/
218 |
219 | it should "move a file" in {
220 | val source = testingDir + "small.txt"
221 | val destination = base + "small.txt"
222 | val command = hadoopFSCommands ++ Seq("-mv", source, destination)
223 | executeAndGetOutput(command)
224 | val output = executeAndGetOutput(listCommand)
225 | output must include("/testFSShell")
226 | output must not include "/testFSShell/small.txt"
227 | output must include("/small.txt")
228 | }
229 |
230 | //put (reading from stdin also works)
231 | it should "copy a file into the filesystem using put" in {
232 | val source = projectHome + "test/resources/vsmall.txt"
233 | val command = hadoopFSCommands ++ Seq("-put", source, base)
234 | executeAndGetOutput(command)
235 | val listOutPut = executeAndGetOutput(listCommand)
236 | listOutPut must include("/vsmall.txt")
237 | }
238 |
239 | it should "copy multiple files into the filesystem using put" in {
240 | val source1 = projectHome + "test/resources/small.txt"
241 | val source2 = projectHome + "test/resources/vsmall.txt"
242 | val destination = base + "testPutCommand/"
243 | val mkdirCommand = hadoopFSCommands ++ Seq("-mkdir", destination)
244 | executeAndGetOutput(mkdirCommand)
245 | val command = hadoopFSCommands ++ Seq("-put", source1, source2, destination)
246 | executeAndGetOutput(command)
247 | val listOutPut = executeAndGetOutput(listCommand)
248 | listOutPut must include("/testPutCommand/vsmall.txt")
249 | listOutPut must include("/testPutCommand/small.txt")
250 | }
251 |
252 | //stat
253 | it should "display stat" in {
254 | val command = hadoopFSCommands ++ Seq("-stat", base)
255 | val output = executeAndGetOutput(command)
256 | output must not be "[]"
257 | }
258 |
259 | //tail
260 | it should "display last KB of a file" in {
261 | val readCommand = hadoopFSCommands ++ Seq("-tail", base + "/vsmall.txt")
262 | val output = executeAndGetOutput(readCommand)
263 | output.length must not be 0
264 | }
265 |
266 | //touchz
267 | it should "create a file of zero length" in {
268 | val command = hadoopFSCommands ++ Seq("-touchz", base + "emptyFile.txt")
269 | executeAndGetOutput(command)
270 | val listOutPut = executeAndGetOutput(listCommand)
271 | listOutPut must include("/emptyFile.txt")
272 | }
273 |
274 | it should "move multiple files" in {
275 | val source1 = base + "small.txt"
276 | val source2 = base + "vsmall.txt"
277 | val destination = base + "testMvCommand/"
278 | val mkdirCommand = hadoopFSCommands ++ Seq("-mkdir", destination)
279 | executeAndGetOutput(mkdirCommand)
280 | val command = hadoopFSCommands ++ Seq("-mv", source1, source2, destination)
281 | executeAndGetOutput(command)
282 | val listOutPut = executeAndGetOutput(listCommand)
283 | listOutPut must include("/testMvCommand/small.txt")
284 | listOutPut must include("/testMvCommand/vsmall.txt")
285 | }
286 |
287 | //rm
288 | it should "remove a file" in {
289 | val command = hadoopFSCommands ++ Seq("-rm", testingDir + "vsmall.txt")
290 | val output = executeAndGetOutput(command)
291 | output must startWith("[Deleted")
292 | output must include("/testFSShell/vsmall.txt")
293 | }
294 |
295 | //rmr
296 | it should "remove a directory and all its contents" in {
297 | val command = hadoopFSCommands ++ Seq("-rmr", base + "testPutCommand/")
298 | val output = executeAndGetOutput(command)
299 | output must startWith("[Deleted")
300 | output must include("/testPutCommand")
301 | }
302 |
303 | override def afterAll() = {
304 | //remove files generated in resources
305 | val rmdirCommand = hadoopFSCommands ++ Seq("-rmr", projectHome + "test/resources/" + basePath)
306 | executeAndGetOutput(rmdirCommand)
307 |
308 | //remove the test directory
309 | val rmTestCommand = hadoopFSCommands ++ Seq("-rmr", base)
310 | executeAndGetOutput(rmTestCommand)
311 | }
312 |
313 | override def beforeAll() = {
314 | //make directory in resources for test
315 | val mkdirCommand = hadoopFSCommands ++ Seq("-mkdir", projectHome + "test/resources/" + basePath)
316 | executeAndGetOutput(mkdirCommand)
317 | }
318 | }
319 |
--------------------------------------------------------------------------------
/src/it/scala/org/apache/hadoop/fs/SnackFSShellSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 |
20 | /**
21 | * To run the tests set projectHome(line no 65) and SNACKFS_HOME
22 | */
23 | package org.apache.hadoop.fs
24 |
25 | import org.scalatest.{BeforeAndAfterAll, FlatSpec}
26 | import org.scalatest.matchers.MustMatchers
27 | import java.io.{File, IOException}
28 | import org.apache.commons.io.IOUtils
29 | import scala.io.Source
30 | import java.util.Date
31 |
32 | class SnackFSShellSpec extends FlatSpec with BeforeAndAfterAll with MustMatchers {
33 |
34 | val snackFS = System.getenv("SNACKFS_HOME")
35 |
36 | assert(snackFS != null && !snackFS.isEmpty, "Must have SNACKFS, and must set SNACKFS_HOME before running these tests")
37 |
38 | def executeAndGetOutput(command: Seq[String]): String = {
39 |
40 | val snackFSHome = new File(snackFS + File.separator + "bin")
41 |
42 | val builder = new ProcessBuilder(command: _*)
43 | .directory(snackFSHome)
44 |
45 | val process = builder.start()
46 | val error = new String(IOUtils.toByteArray(process.getErrorStream))
47 | val output = IOUtils.readLines(process.getInputStream)
48 | if (error.length > 0) {
49 | throw new IOException(error)
50 | }
51 | output.toString
52 | }
53 |
54 | val isTrue = true
55 |
56 | val snackFSCommands = Seq("./snackfs")
57 |
58 | val timestamp = new Date().getTime
59 | val basePath = "testFSShell" + timestamp
60 |
61 | val base = "snackfs://localhost:9000/test" + timestamp + "/"
62 |
63 | val testingDir = base + "testFSShell/"
64 | val projectHome = "/snackfs/src/" //to be set
65 |
66 | val listCommand = snackFSCommands ++ Seq("-lsr", base)
67 |
68 | //mkdir
69 | it should "make a new directory" in {
70 | val command = snackFSCommands ++ Seq("-mkdir", testingDir)
71 | executeAndGetOutput(command)
72 | val listoutPut = executeAndGetOutput(listCommand)
73 | listoutPut must include("/testFSShell")
74 | }
75 |
76 | it should "not make a new directory with the name of an existing one" in {
77 | val command = snackFSCommands ++ Seq("-mkdir", testingDir)
78 | val exception = intercept[IOException] {
79 | executeAndGetOutput(command)
80 | }
81 | val errMsg = "mkdir: cannot create directory"
82 | exception.getMessage must startWith(errMsg)
83 | }
84 |
85 | //copyFromLocal
86 | it should "copy a file into the filesystem using copyFromLocal" in {
87 | val source = projectHome + "test/resources/small.txt"
88 | val command = snackFSCommands ++ Seq("-copyFromLocal", source, testingDir)
89 | executeAndGetOutput(command)
90 |
91 | val listoutPut = executeAndGetOutput(listCommand)
92 | listoutPut must include("small.txt")
93 | }
94 |
95 | it should "not overwrite a file into the filesystem using copyFromLocal" in {
96 | val source = projectHome + "test/resources/small.txt"
97 | val command = snackFSCommands ++ Seq("-copyFromLocal", source, testingDir)
98 | val exception = intercept[IOException] {
99 | executeAndGetOutput(command)
100 | }
101 | exception.getMessage must include("already exists")
102 | }
103 |
104 | //copyToLocal
105 | it should "copy a file from the filesystem using copyToLocal" in {
106 | val destination = projectHome + "test/resources/" + basePath + "/TestSmall.txt"
107 | val source = testingDir + "small.txt"
108 | val command = snackFSCommands ++ Seq("-copyToLocal", source, destination)
109 | executeAndGetOutput(command)
110 |
111 | val copiedFile = new File(destination)
112 | copiedFile.exists() must be(isTrue)
113 | }
114 |
115 | it should "not overwrite a file using copyToLocal" in {
116 | val destination = projectHome + "test/resources/small.txt"
117 | val source = testingDir + "small.txt"
118 | val command = snackFSCommands ++ Seq("-copyToLocal", source, destination)
119 | val exception = intercept[IOException] {
120 | executeAndGetOutput(command)
121 | }
122 | exception.getMessage must include("already exists")
123 | }
124 |
125 | //get
126 | it should "copy a file from the filesystem using get" in {
127 | val destination = projectHome + "test/resources/" + basePath + "/TestGetSmall.txt"
128 | val source = testingDir + "small.txt"
129 | val command = snackFSCommands ++ Seq("-copyToLocal", source, destination)
130 | executeAndGetOutput(command)
131 |
132 | val copiedFile = new File(destination)
133 | copiedFile.exists() must be(isTrue)
134 | }
135 |
136 | //cat
137 | it should "print file content" in {
138 | val source = projectHome + "test/resources/vsmall.txt"
139 | val writeCommand = snackFSCommands ++ Seq("-copyFromLocal", source, testingDir)
140 | executeAndGetOutput(writeCommand)
141 | val readCommand = snackFSCommands ++ Seq("-cat", testingDir + "/vsmall.txt")
142 | val output = executeAndGetOutput(readCommand)
143 | val fileContent = IOUtils.readLines(Source.fromFile(new File(source)).bufferedReader()).toString
144 | output must be(fileContent)
145 | }
146 |
147 | //cp
148 | it should "copy all files from a directory into another" in {
149 | val destName = "testCpCommand"
150 | val destination = base + destName + "/"
151 | val source = testingDir
152 | val command = snackFSCommands ++ Seq("-cp", source, destination)
153 | executeAndGetOutput(command)
154 | val listoutPut = executeAndGetOutput(listCommand)
155 | listoutPut must include("/" + destName + "/small.txt")
156 | listoutPut must include("/" + destName + "/vsmall.txt")
157 | }
158 |
159 | //du
160 | it should "display aggregate length of files in a directory" in {
161 | val command = snackFSCommands ++ Seq("-du", base)
162 | val output = executeAndGetOutput(command)
163 | output must include(base + "testFSShell")
164 | output must include(base + "testCpCommand")
165 | output must startWith("[Found 2 items, 598419")
166 | }
167 |
168 | it should "display aggregate length of file" in {
169 | val command = snackFSCommands ++ Seq("-du", testingDir + "vsmall.txt")
170 | val output = executeAndGetOutput(command)
171 | output must startWith("[Found 1 items, 623 ")
172 | output must endWith("/testFSShell/vsmall.txt]")
173 | }
174 |
175 | //dus
176 | it should "display summary of file lengths" in {
177 | val command = snackFSCommands ++ Seq("-dus", base)
178 | val output = executeAndGetOutput(command)
179 | output must include("/test" + timestamp)
180 | output must include("1196838")
181 | }
182 |
183 | //ls
184 | it should "list children of directory" in {
185 | val command = snackFSCommands ++ Seq("-ls", base)
186 | val output = executeAndGetOutput(command)
187 | output must startWith("[Found 2 items,")
188 | output must include("/testFSShell")
189 | output must include("/testCpCommand")
190 | }
191 |
192 | it should "list stats of a file" in {
193 | val command = snackFSCommands ++ Seq("-ls", testingDir + "vsmall.txt")
194 | val output = executeAndGetOutput(command)
195 | output must startWith("[Found 1 items,")
196 | output must include("/testFSShell/vsmall.txt")
197 | }
198 |
199 | //lsr
200 | it should "list children of directory recursive" in {
201 | val output = executeAndGetOutput(listCommand)
202 | output must include("/testFSShell")
203 | output must include("/testFSShell/small.txt")
204 | output must include("/testFSShell/vsmall.txt")
205 | output must include("/testCpCommand")
206 | output must include("/testCpCommand/small.txt")
207 | output must include("/testCpCommand/vsmall.txt")
208 | }
209 |
210 | /*//moveFromLocal -- docs and behaviour are contradicting
211 | it should "result in not implemented" in {
212 | val source = projectHome + "test/resources/small.txt"
213 | val command = hadoopFSCommands ++ Seq("-moveFromLocal", source, testingDir)
214 | val output = executeAndGetOutput(command)
215 | println(output)
216 | }*/
217 |
218 | it should "move a file" in {
219 | val source = testingDir + "small.txt"
220 | val destination = base + "small.txt"
221 | val command = snackFSCommands ++ Seq("-mv", source, destination)
222 | executeAndGetOutput(command)
223 | val output = executeAndGetOutput(listCommand)
224 | output must include("/testFSShell")
225 | output must not include "/testFSShell/small.txt"
226 | output must include("/small.txt")
227 | }
228 |
229 | //put (reading from stdin also works)
230 | it should "copy a file into the filesystem using put" in {
231 | val source = projectHome + "test/resources/vsmall.txt"
232 | val command = snackFSCommands ++ Seq("-put", source, base)
233 | executeAndGetOutput(command)
234 | val listOutPut = executeAndGetOutput(listCommand)
235 | listOutPut must include("/vsmall.txt")
236 | }
237 |
238 | it should "copy multiple files into the filesystem using put" in {
239 | val source1 = projectHome + "test/resources/small.txt"
240 | val source2 = projectHome + "test/resources/vsmall.txt"
241 | val destination = base + "testPutCommand/"
242 | val mkdirCommand = snackFSCommands ++ Seq("-mkdir", destination)
243 | executeAndGetOutput(mkdirCommand)
244 | val command = snackFSCommands ++ Seq("-put", source1, source2, destination)
245 | executeAndGetOutput(command)
246 | val listOutPut = executeAndGetOutput(listCommand)
247 | listOutPut must include("/testPutCommand/vsmall.txt")
248 | listOutPut must include("/testPutCommand/small.txt")
249 | }
250 |
251 | //stat
252 | it should "display stat" in {
253 | val command = snackFSCommands ++ Seq("-stat", base)
254 | val output = executeAndGetOutput(command)
255 | output must not be "[]"
256 | }
257 |
258 | //tail
259 | it should "display last KB of a file" in {
260 | val readCommand = snackFSCommands ++ Seq("-tail", base + "/vsmall.txt")
261 | val output = executeAndGetOutput(readCommand)
262 | output.length must not be 0
263 | }
264 |
265 | //touchz
266 | it should "create a file of zero length" in {
267 | val command = snackFSCommands ++ Seq("-touchz", base + "emptyFile.txt")
268 | executeAndGetOutput(command)
269 | val listOutPut = executeAndGetOutput(listCommand)
270 | listOutPut must include("/emptyFile.txt")
271 | }
272 |
273 | it should "move multiple files" in {
274 | val source1 = base + "small.txt"
275 | val source2 = base + "vsmall.txt"
276 | val destination = base + "testMvCommand/"
277 | val mkdirCommand = snackFSCommands ++ Seq("-mkdir", destination)
278 | executeAndGetOutput(mkdirCommand)
279 | val command = snackFSCommands ++ Seq("-mv", source1, source2, destination)
280 | executeAndGetOutput(command)
281 | val listOutPut = executeAndGetOutput(listCommand)
282 | listOutPut must include("/testMvCommand/small.txt")
283 | listOutPut must include("/testMvCommand/vsmall.txt")
284 | }
285 |
286 | //rm
287 | it should "remove a file" in {
288 | val command = snackFSCommands ++ Seq("-rm", testingDir + "vsmall.txt")
289 | val output = executeAndGetOutput(command)
290 | output must startWith("[Deleted")
291 | output must include("/testFSShell/vsmall.txt")
292 | }
293 |
294 | //rmr
295 | it should "remove a directory and all its contents" in {
296 | val command = snackFSCommands ++ Seq("-rmr", base + "testPutCommand/")
297 | val output = executeAndGetOutput(command)
298 | output must startWith("[Deleted")
299 | output must include("/testPutCommand")
300 | }
301 |
302 | override def afterAll() = {
303 | //remove files generated in resources
304 | val rmdirCommand = snackFSCommands ++ Seq("-rmr", projectHome + "test/resources/" + basePath)
305 | executeAndGetOutput(rmdirCommand)
306 |
307 | //remove the test directory
308 | val rmTestCommand = snackFSCommands ++ Seq("-rmr", base)
309 | executeAndGetOutput(rmTestCommand)
310 | }
311 |
312 | override def beforeAll() = {
313 | //make directory in resources for test
314 | val mkdirCommand = snackFSCommands ++ Seq("-mkdir", projectHome + "test/resources/" + basePath)
315 | executeAndGetOutput(mkdirCommand)
316 | }
317 | }
318 |
--------------------------------------------------------------------------------
/src/main/resources/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | snackfs.cassandra.host
7 | 127.0.0.1
8 |
9 |
10 | snackfs.cassandra.port
11 | 9160
12 |
13 |
14 | snackfs.consistencyLevel.write
15 | QUORUM
16 |
17 |
18 | snackfs.consistencyLevel.read
19 | QUORUM
20 |
21 |
22 | snackfs.replicationFactor
23 | 3
24 |
25 |
26 | snackfs.replicationStrategy
27 | org.apache.cassandra.locator.SimpleStrategy
28 |
29 |
30 | fs.snackfs.impl
31 | com.tuplejump.snackfs.SnackFS
32 |
33 |
34 | snackfs.keyspace
35 | snackfs
36 |
37 |
38 | snackfs.subblock.size
39 | 8388608
40 |
41 |
42 | snackfs.block.size
43 | 134217728
44 |
45 |
46 | fs.default.name
47 | file:///
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/SnackFS.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs
20 |
21 | import java.net.URI
22 | import org.apache.hadoop.fs.permission.FsPermission
23 | import org.apache.hadoop.util.Progressable
24 | import org.apache.hadoop.conf.Configuration
25 | import scala.concurrent.Await
26 | import scala.concurrent.duration._
27 |
28 | import org.apache.hadoop.fs._
29 | import com.twitter.logging.Logger
30 | import java.util.UUID
31 | import com.tuplejump.snackfs.api.model._
32 | import com.tuplejump.snackfs.fs.model.BlockMeta
33 | import com.tuplejump.snackfs.cassandra.store.ThriftStore
34 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
35 | import com.tuplejump.snackfs.cassandra.model.SnackFSConfiguration
36 |
37 | case class SnackFS() extends FileSystem {
38 |
39 | private lazy val log = Logger.get(getClass)
40 |
41 | private var systemURI: URI = null
42 | private var currentDirectory: Path = null
43 | private var subBlockSize: Long = 0L
44 |
45 | private var atMost: FiniteDuration = null
46 | private var store: FileSystemStore = null
47 | private var customConfiguration: SnackFSConfiguration = _
48 |
49 | val processId = UUID.randomUUID()
50 |
51 | override def initialize(uri: URI, configuration: Configuration) = {
52 | log.debug("Initializing SnackFs")
53 | super.initialize(uri, configuration)
54 | setConf(configuration)
55 |
56 | systemURI = URI.create(uri.getScheme + "://" + uri.getAuthority)
57 |
58 | val directory = new Path("/user", System.getProperty("user.name"))
59 | currentDirectory = makeQualified(directory)
60 |
61 | log.debug("generating required configuration")
62 | customConfiguration = SnackFSConfiguration.get(configuration)
63 |
64 | store = new ThriftStore(customConfiguration)
65 | atMost = customConfiguration.atMost
66 | Await.ready(store.createKeyspace, atMost)
67 | store.init
68 |
69 | log.debug("creating base directory")
70 | mkdirs(new Path("/"))
71 |
72 | subBlockSize = customConfiguration.subBlockSize
73 | }
74 |
75 | private def makeAbsolute(path: Path): Path = {
76 | if (path.isAbsolute) path else new Path(currentDirectory, path)
77 | }
78 |
79 | def getUri: URI = systemURI
80 |
81 | def setWorkingDirectory(newDir: Path) = {
82 | currentDirectory = makeAbsolute(newDir)
83 | }
84 |
85 | def getWorkingDirectory: Path = currentDirectory
86 |
87 | def open(path: Path, bufferSize: Int): FSDataInputStream = {
88 | OpenFileCommand(store, path, bufferSize, atMost)
89 | }
90 |
91 | def mkdirs(path: Path, permission: FsPermission): Boolean = {
92 | val absolutePath = makeAbsolute(path)
93 | MakeDirectoryCommand(store, absolutePath, permission, atMost)
94 | }
95 |
96 | def create(filePath: Path, permission: FsPermission, overwrite: Boolean,
97 | bufferSize: Int, replication: Short, blockSize: Long,
98 | progress: Progressable): FSDataOutputStream = {
99 |
100 | CreateFileCommand(store, filePath, permission, overwrite, bufferSize, replication,
101 | blockSize, progress, processId, statistics, subBlockSize, atMost)
102 | }
103 |
104 | override def getDefaultBlockSize: Long = {
105 | customConfiguration.blockSize
106 | }
107 |
108 | def append(path: Path, bufferSize: Int, progress: Progressable): FSDataOutputStream = {
109 | AppendFileCommand(store, path, bufferSize, progress, atMost)
110 | }
111 |
112 | def getFileStatus(path: Path): FileStatus = {
113 | FileStatusCommand(store, path, atMost)
114 | }
115 |
116 | def delete(path: Path, isRecursive: Boolean): Boolean = {
117 | val absolutePath = makeAbsolute(path)
118 | DeleteCommand(store, absolutePath, isRecursive, atMost)
119 | }
120 |
121 | def rename(src: Path, dst: Path): Boolean = {
122 | val srcPath = makeAbsolute(src)
123 | val dstPath = makeAbsolute(dst)
124 | RenameCommand(store, srcPath, dstPath, atMost)
125 | }
126 |
127 |
128 | def listStatus(path: Path): Array[FileStatus] = {
129 | val absolutePath = makeAbsolute(path)
130 | ListCommand(store, absolutePath, atMost)
131 | }
132 |
133 | def delete(p1: Path): Boolean = delete(p1, isRecursive = false)
134 |
135 | def getFileBlockLocations(path: Path, start: Long, len: Long): Array[BlockLocation] = {
136 | log.debug("fetching block locations for %s", path)
137 | val blocks: Map[BlockMeta, List[String]] = Await.result(store.getBlockLocations(path), atMost)
138 | val locs = blocks.filterNot(x => x._1.offset + x._1.length < start)
139 | val locsMap = locs.map {
140 | case (b, ips) =>
141 | val bl = new BlockLocation()
142 | bl.setHosts(ips.toArray)
143 | bl.setNames(ips.map(i => "%s:%s".format(i, customConfiguration.CassandraPort)).toArray)
144 | bl.setOffset(b.offset)
145 | bl.setLength(b.length)
146 | bl
147 | }
148 | locsMap.toArray
149 | }
150 |
151 | override def getFileBlockLocations(file: FileStatus, start: Long, len: Long): Array[BlockLocation] = {
152 | getFileBlockLocations(file.getPath, start, len)
153 | }
154 | }
155 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/api/model/AppendFileCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.api.model
20 |
21 | import scala.concurrent.duration.FiniteDuration
22 | import java.io.IOException
23 | import org.apache.hadoop.fs.{FSDataOutputStream, Path}
24 | import org.apache.hadoop.util.Progressable
25 | import com.twitter.logging.Logger
26 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
27 | import com.tuplejump.snackfs.api.partial.Command
28 |
29 | object AppendFileCommand extends Command {
30 | private lazy val log = Logger.get(getClass)
31 |
32 | def apply(store: FileSystemStore,
33 | filePath: Path,
34 | bufferSize: Int,
35 | progress: Progressable,
36 | atMost: FiniteDuration): FSDataOutputStream = {
37 |
38 | val ex = new IOException("Appending to existing file is not supported.")
39 | log.error(ex, "Failed to append to file %s as it is not supported", filePath)
40 | throw ex
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/api/model/CreateFileCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.api.model
20 |
21 | import scala.concurrent.Await
22 | import scala.util.{Failure, Success, Try}
23 | import java.io.IOException
24 | import org.apache.hadoop.fs.permission.FsPermission
25 | import com.tuplejump.snackfs.fs.model.{FileType, INode}
26 | import com.tuplejump.snackfs.fs.stream.FileSystemOutputStream
27 | import org.apache.hadoop.fs.{Path, FSDataOutputStream}
28 | import scala.concurrent.duration.FiniteDuration
29 | import org.apache.hadoop.util.Progressable
30 | import java.util.UUID
31 | import org.apache.hadoop.fs.FileSystem.Statistics
32 | import com.twitter.logging.Logger
33 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
34 | import com.tuplejump.snackfs.api.partial.Command
35 |
36 | object CreateFileCommand extends Command {
37 |
38 | private lazy val log = Logger.get(getClass)
39 |
40 | def apply(store: FileSystemStore,
41 | filePath: Path,
42 | filePermission: FsPermission,
43 | overwrite: Boolean,
44 | bufferSize: Int,
45 | replication: Short,
46 | blockSize: Long,
47 | progress: Progressable,
48 | processId: UUID,
49 | statistics: Statistics,
50 | subBlockSize: Long,
51 | atMost: FiniteDuration): FSDataOutputStream = {
52 |
53 | val isCreatePossible = Await.result(store.acquireFileLock(filePath, processId), atMost)
54 | if (isCreatePossible) {
55 |
56 | try {
57 | val mayBeFile = Try(Await.result(store.retrieveINode(filePath), atMost))
58 | mayBeFile match {
59 |
60 | case Success(file: INode) =>
61 | if (file.isFile && !overwrite) {
62 | val ex = new IOException("File exists and cannot be overwritten")
63 | log.error(ex, "Failed to create file %s as it exists and cannot be overwritten", filePath)
64 | throw ex
65 |
66 | } else if (file.isDirectory) {
67 | val ex = new IOException("Directory with same name exists")
68 | log.error(ex, "Failed to create file %s as a directory with that name exists", filePath)
69 | throw ex
70 | }
71 |
72 | case Failure(e: Exception) =>
73 | val parentPath = filePath.getParent
74 |
75 | if (parentPath != null) {
76 | MakeDirectoryCommand(store, parentPath, filePermission, atMost)
77 | }
78 | }
79 |
80 | log.debug("creating file %s", filePath)
81 | val user = System.getProperty("user.name")
82 | val permissions = FsPermission.getDefault
83 | val timestamp = System.currentTimeMillis()
84 | val iNode = INode(user, user, permissions, FileType.FILE, List(), timestamp)
85 | Await.ready(store.storeINode(filePath, iNode), atMost)
86 |
87 | val fileStream = new FileSystemOutputStream(store, filePath, blockSize, subBlockSize, bufferSize, atMost)
88 | val fileDataStream = new FSDataOutputStream(fileStream, statistics)
89 |
90 | fileDataStream
91 | }
92 |
93 | finally {
94 | store.releaseFileLock(filePath)
95 | }
96 | }
97 | else {
98 | val ex = new IOException("Acquire lock failure")
99 | log.error(ex, "Could not get lock on file %s", filePath)
100 | throw ex
101 | }
102 | }
103 |
104 | }
105 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/api/model/DeleteCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.tuplejump.snackfs.api.model
19 |
20 | import scala.util.{Failure, Success, Try}
21 | import scala.concurrent.Await
22 | import com.tuplejump.snackfs.fs.model.INode
23 | import java.io.IOException
24 | import scala.concurrent.duration.FiniteDuration
25 | import org.apache.hadoop.fs.Path
26 | import com.twitter.logging.Logger
27 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
28 | import com.tuplejump.snackfs.api.partial.Command
29 |
30 |
31 | object DeleteCommand extends Command {
32 | private lazy val log = Logger.get(getClass)
33 |
34 | def apply(store: FileSystemStore,
35 | srcPath: Path,
36 | isRecursive: Boolean,
37 | atMost: FiniteDuration): Boolean = {
38 |
39 | val absolutePath = srcPath
40 | val mayBeSrc = Try(Await.result(store.retrieveINode(absolutePath), atMost))
41 | var result = true
42 |
43 | mayBeSrc match {
44 |
45 | case Success(src: INode) =>
46 | if (src.isFile) {
47 | log.debug("deleting file %s", srcPath)
48 | Await.ready(store.deleteINode(absolutePath), atMost)
49 | Await.ready(store.deleteBlocks(src), atMost)
50 |
51 | } else {
52 | val contents = ListCommand(store, srcPath, atMost)
53 |
54 | if (contents.length == 0) {
55 | log.debug("deleting directory %s", srcPath)
56 | Await.ready(store.deleteINode(absolutePath), atMost)
57 |
58 | } else if (!isRecursive) {
59 | val ex = new IOException("Directory is not empty")
60 | log.error(ex, "Failed to delete directory %s as it is not empty", srcPath)
61 | throw ex
62 |
63 | } else {
64 | log.debug("deleting directory %s and all its contents", srcPath)
65 | result = contents.map(p => DeleteCommand(store, p.getPath, isRecursive, atMost)).reduce(_ && _)
66 | Await.ready(store.deleteINode(absolutePath), atMost)
67 | }
68 | }
69 |
70 | case Failure(e) =>
71 | log.debug("failed to delete %s, as it doesn't exist", srcPath)
72 | result = false
73 | }
74 | result
75 | }
76 |
77 | }
78 |
79 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/api/model/FileStatusCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.api.model
20 |
21 | import scala.concurrent.duration.FiniteDuration
22 | import scala.util.{Failure, Success, Try}
23 | import scala.concurrent.Await
24 | import com.tuplejump.snackfs.fs.model.INode
25 | import java.io.FileNotFoundException
26 | import com.twitter.logging.Logger
27 | import org.apache.hadoop.fs.{FileStatus, Path}
28 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
29 | import com.tuplejump.snackfs.api.partial.Command
30 |
31 | object FileStatusCommand extends Command {
32 | private lazy val log = Logger.get(getClass)
33 |
34 | def apply(store: FileSystemStore,
35 | filePath: Path,
36 | atMost: FiniteDuration): FileStatus = {
37 |
38 | log.debug("getting status for %s", filePath)
39 | val maybeFile = Try(Await.result(store.retrieveINode(filePath), atMost))
40 |
41 | maybeFile match {
42 | case Success(file: INode) => SnackFileStatus(file, filePath)
43 | case Failure(e) =>
44 | val ex = new FileNotFoundException("No such file exists")
45 | log.error(ex, "Failed to get status for %s as it doesn't exist", filePath)
46 | throw ex
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/api/model/ListCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.tuplejump.snackfs.api.model
19 |
20 | import scala.concurrent.duration.FiniteDuration
21 | import scala.util.{Failure, Success, Try}
22 | import scala.concurrent.Await
23 | import com.tuplejump.snackfs.fs.model.INode
24 | import java.io.FileNotFoundException
25 | import org.apache.hadoop.fs.{FileStatus, Path}
26 | import com.twitter.logging.Logger
27 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
28 | import com.tuplejump.snackfs.api.partial.Command
29 |
30 | object ListCommand extends Command {
31 | private lazy val log = Logger.get(getClass)
32 |
33 | def apply(store: FileSystemStore,
34 | path: Path,
35 | atMost: FiniteDuration): Array[FileStatus] = {
36 |
37 | var result: Array[FileStatus] = Array()
38 | val absolutePath = path
39 | val mayBeFile = Try(Await.result(store.retrieveINode(absolutePath), atMost))
40 |
41 | mayBeFile match {
42 | case Success(file: INode) =>
43 | if (file.isFile) {
44 | log.debug("fetching file status for %s")
45 | val fileStatus = SnackFileStatus(file, absolutePath)
46 | result = Array(fileStatus)
47 |
48 | } else {
49 | log.debug("fetching status for %s")
50 | val subPaths = Await.result(store.fetchSubPaths(absolutePath, isDeepFetch = false), atMost)
51 | result = subPaths.map(p => FileStatusCommand(store, p, atMost)).toArray
52 | }
53 |
54 | case Failure(e) =>
55 | val ex = new FileNotFoundException("No such file exists")
56 | log.error(ex, "Failed to list status of %s as it doesn't exist", path)
57 | throw ex
58 | }
59 | result
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/api/model/MakeDirectoryCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.api.model
20 |
21 | import org.apache.hadoop.fs.Path
22 | import org.apache.hadoop.fs.permission.FsPermission
23 | import scala.util.{Failure, Success, Try}
24 | import scala.concurrent.Await
25 | import com.tuplejump.snackfs.fs.model.{FileType, INode}
26 | import com.twitter.logging.Logger
27 | import scala.concurrent.duration.FiniteDuration
28 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
29 | import com.tuplejump.snackfs.api.partial.Command
30 |
31 | object MakeDirectoryCommand extends Command {
32 | private lazy val log = Logger.get(getClass)
33 |
34 | private def mkdir(store: FileSystemStore,
35 | filePath: Path,
36 | filePermission: FsPermission,
37 | atMost: FiniteDuration): Boolean = {
38 |
39 | val mayBeFile = Try(Await.result(store.retrieveINode(filePath), atMost))
40 | var result = true
41 |
42 | mayBeFile match {
43 | case Success(file: INode) =>
44 | if (file.isFile) {
45 | log.debug("Failed to make a directory for path %s since its a file", filePath)
46 | result = false
47 | }
48 |
49 | case Failure(e: Exception) =>
50 | val user = System.getProperty("user.name")
51 | val timestamp = System.currentTimeMillis()
52 | val iNode = INode(user, user, filePermission, FileType.DIRECTORY, null, timestamp)
53 | log.debug("Creating directory for path %s", filePath)
54 | Await.ready(store.storeINode(filePath, iNode), atMost)
55 | }
56 | result
57 | }
58 |
59 | def apply(store: FileSystemStore,
60 | filePath: Path,
61 | filePermission: FsPermission,
62 | atMost: FiniteDuration) = {
63 |
64 | var absolutePath = filePath
65 | var paths = List[Path]()
66 | var result = true
67 |
68 | while (absolutePath != null) {
69 | paths = paths :+ absolutePath
70 | absolutePath = absolutePath.getParent
71 | }
72 |
73 | log.debug("Creating directories for path %s", filePath)
74 | result = paths.map(path => mkdir(store, path, filePermission, atMost)).reduce(_ && _)
75 | result
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/api/model/OpenFileCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.api.model
20 |
21 | import scala.util.{Failure, Success, Try}
22 | import scala.concurrent.Await
23 | import java.io.IOException
24 | import org.apache.hadoop.fs.{Path, FSDataInputStream}
25 | import com.tuplejump.snackfs.fs.stream.FileSystemInputStream
26 | import scala.concurrent.duration.FiniteDuration
27 | import com.twitter.logging.Logger
28 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
29 | import com.tuplejump.snackfs.fs.model.INode
30 | import com.tuplejump.snackfs.api.partial.Command
31 |
32 | object OpenFileCommand extends Command {
33 | private lazy val log = Logger.get(getClass)
34 |
35 | //TODO check how bufferSize is applied
36 | def apply(store: FileSystemStore,
37 | filePath: Path,
38 | bufferSize: Int,
39 | atMost: FiniteDuration): FSDataInputStream = {
40 |
41 | val mayBeFile = Try(Await.result(store.retrieveINode(filePath), atMost))
42 |
43 | mayBeFile match {
44 | case Success(file: INode) =>
45 | if (file.isDirectory) {
46 | val ex = new IOException("Path %s is a directory.".format(filePath))
47 | log.error(ex, "Failed to open file %s as a directory exists at that path", filePath)
48 | throw ex
49 |
50 | } else {
51 | log.debug("opening file %s", filePath)
52 | val fileStream = new FSDataInputStream(FileSystemInputStream(store, filePath))
53 | fileStream
54 | }
55 |
56 | case Failure(e: Exception) =>
57 | val ex = new IOException("No such file.")
58 | log.error(ex, "Failed to open file %s as it doesnt exist", filePath)
59 | throw ex
60 | }
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/api/model/RenameCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.api.model
20 |
21 | import org.apache.hadoop.fs.Path
22 | import com.tuplejump.snackfs.fs.model.INode
23 | import scala.concurrent.Await
24 | import scala.util.{Success, Failure, Try}
25 | import java.io.IOException
26 | import com.twitter.logging.Logger
27 | import scala.concurrent.duration.FiniteDuration
28 | import org.apache.hadoop.fs.permission.FsPermission
29 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
30 | import com.tuplejump.snackfs.api.partial.Command
31 |
32 | object RenameCommand extends Command {
33 | private lazy val log = Logger.get(getClass)
34 |
35 | private def renameINode(store: FileSystemStore, originalPath: Path, updatedPath: Path, iNode: INode, atMost: FiniteDuration) = {
36 | log.debug("deleting existing iNode %s", originalPath)
37 | Await.ready(store.deleteINode(originalPath), atMost)
38 | log.debug("storing iNode %s", updatedPath)
39 | Await.ready(store.storeINode(updatedPath, iNode), atMost)
40 | }
41 |
42 | private def renameDir(store: FileSystemStore, src: Path, dst: Path, atMost: FiniteDuration) = {
43 | MakeDirectoryCommand(store, dst, FsPermission.getDefault, atMost)
44 | val contents = Await.result(store.fetchSubPaths(src, isDeepFetch = true), atMost)
45 | if (contents.size > 0) {
46 | log.debug("renaming all child nodes %s", contents)
47 | val srcPathString = src.toUri.getPath
48 | val dstPathString = dst.toUri.getPath
49 | contents.map(path => {
50 | val actualINode = Await.result(store.retrieveINode(path), atMost)
51 | val oldPathString = path.toUri.getPath
52 | val changedPathString = oldPathString.replaceFirst(srcPathString, dstPathString)
53 | val changedPath = new Path(changedPathString)
54 | log.debug("renaming child node %s to %s", path, changedPath)
55 | MakeDirectoryCommand(store, changedPath.getParent, FsPermission.getDefault, atMost)
56 | renameINode(store, path, changedPath, actualINode, atMost)
57 | })
58 | }
59 | }
60 |
61 | /*
62 | * Renames Path src to Path dst
63 | * 1. Fails if the src doesnt exist.
64 | * 2. Fails if the dst already exists.
65 | * 3. Fails if the parent of dst does not exist or is a file.
66 | *
67 | */
68 | def apply(store: FileSystemStore, srcPath: Path, dstPath: Path, atMost: FiniteDuration): Boolean = {
69 | if (srcPath != dstPath) {
70 | val mayBeSrc = Try(Await.result(store.retrieveINode(srcPath), atMost))
71 | mayBeSrc match {
72 | case Failure(e1) =>
73 | val ex = new IOException("No such file or directory.%s".format(srcPath))
74 | log.error(ex, "Failed to rename %s as it doesnt exist", srcPath)
75 | throw ex
76 | case Success(src: INode) =>
77 | val mayBeDst = Try(Await.result(store.retrieveINode(dstPath), atMost))
78 | mayBeDst match {
79 | case Success(dst: INode) =>
80 | val ex = new IOException("Destination already exists")
81 | log.error(ex, "Failed to rename %s as given destination %s exits", srcPath, dstPath)
82 | throw ex
83 | case Failure(e) =>
84 | log.debug("%s does not exist. checking if %s exists", dstPath, dstPath.getParent)
85 | val maybeDstParent = Try(Await.result(store.retrieveINode(dstPath.getParent), atMost))
86 | maybeDstParent match {
87 | case Failure(e2) =>
88 | val ex = new IOException("Destination %s directory does not exist.".format(dstPath.getParent))
89 | log.error(ex, "Failed to rename %s as destination %s doesn't exist", srcPath, dstPath.getParent)
90 | throw ex
91 | case Success(dstParent: INode) =>
92 | if (dstParent.isFile) {
93 | val ex = new IOException("A file exists with parent of destination.")
94 | log.error(ex, "Failed to rename directory %s as given destination's parent %s is a file", srcPath, dstPath.getParent)
95 | throw ex
96 | }
97 | if (src.isDirectory) {
98 | log.debug("renaming directory %s to %s", srcPath, dstPath)
99 | renameDir(store, srcPath, dstPath, atMost)
100 | }
101 | renameINode(store, srcPath, dstPath, src, atMost)
102 | }
103 | }
104 | }
105 | }
106 | true
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/api/model/SnackFileStatus.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.api.model
20 |
21 | import com.tuplejump.snackfs.fs.model.INode
22 | import org.apache.hadoop.fs.{FileStatus, Path}
23 |
24 | case class SnackFileStatus(iNode: INode, path: Path) extends FileStatus(
25 | SnackFileStatusHelper.length(iNode), //length
26 | iNode.isDirectory, //isDir
27 | 0, //block_replication
28 | SnackFileStatusHelper.blockSize(iNode), //blocksize
29 | iNode.timestamp, //modification_time
30 | 0L, //access_time
31 | iNode.permission,
32 | iNode.user,
33 | iNode.group,
34 | path: Path)
35 |
36 | object SnackFileStatusHelper {
37 | def length(iNode: INode): Long = {
38 | var result = 0L
39 | if (iNode.isFile) {
40 | result = iNode.blocks.map(_.length).sum
41 | }
42 | result
43 | }
44 |
45 | def blockSize(iNode: INode): Long = {
46 | var result = 0L
47 | if (iNode.blocks != null && iNode.blocks.length > 0) {
48 | result = iNode.blocks(0).length
49 | }
50 | result
51 | }
52 | }
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/api/partial/Command.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.tuplejump.snackfs.api.partial
19 |
20 | trait Command {
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/cassandra/model/ClientPoolFactory.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 |
20 | package com.tuplejump.snackfs.cassandra.model
21 |
22 | import org.apache.commons.pool.BasePoolableObjectFactory
23 | import org.apache.thrift.async.TAsyncClientManager
24 | import org.apache.thrift.protocol.TBinaryProtocol
25 | import org.apache.cassandra.thrift.Cassandra.AsyncClient
26 | import org.apache.thrift.transport.TNonblockingSocket
27 | import scala.concurrent.Await
28 | import com.tuplejump.snackfs.util.AsyncUtil
29 | import org.apache.cassandra.thrift.Cassandra.AsyncClient.set_keyspace_call
30 | import scala.concurrent.duration._
31 |
32 | import com.twitter.logging.Logger
33 |
34 | class ClientPoolFactory(host: String, port: Int, keyspace: String) extends BasePoolableObjectFactory[ThriftClientAndSocket] {
35 |
36 | private lazy val log = Logger.get(getClass)
37 |
38 | private val clientManager = new TAsyncClientManager()
39 | private val protocolFactory = new TBinaryProtocol.Factory()
40 | private val clientFactory = new AsyncClient.Factory(clientManager, protocolFactory)
41 |
42 | def makeObject(): ThriftClientAndSocket = {
43 | val transport = new TNonblockingSocket(host, port)
44 | val client = clientFactory.getAsyncClient(transport)
45 | val x = Await.result(AsyncUtil.executeAsync[set_keyspace_call](client.set_keyspace(keyspace, _)), 10 seconds)
46 | try {
47 | x.getResult()
48 | log.debug("set keyspace %s for client", keyspace)
49 | ThriftClientAndSocket(client, transport)
50 | } catch {
51 | case e: Exception =>
52 | log.error(e, "failed to set keyspace %s for client ", keyspace)
53 | throw e
54 | }
55 | }
56 |
57 | override def destroyObject(obj: ThriftClientAndSocket) {
58 | obj.socket.close()
59 | super.destroyObject(obj)
60 | }
61 |
62 | def closePool() {
63 | clientManager.stop()
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/cassandra/model/GenericOpSuccess.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.cassandra.model
20 |
21 | case class GenericOpSuccess(){}
22 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/cassandra/model/Keyspace.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.cassandra.model
20 |
21 | case class Keyspace(name:String)
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/cassandra/model/SnackFSConfiguration.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 |
20 | package com.tuplejump.snackfs.cassandra.model
21 |
22 | import org.apache.hadoop.conf.Configuration
23 | import org.apache.cassandra.locator.SimpleStrategy
24 | import org.apache.cassandra.thrift.ConsistencyLevel
25 | import scala.concurrent.duration._
26 |
27 | case class SnackFSConfiguration(CassandraHost: String, CassandraPort: Int,
28 | readConsistencyLevel: ConsistencyLevel, writeConsistencyLevel: ConsistencyLevel,
29 | keySpace: String, blockSize: Long, subBlockSize: Long, atMost: FiniteDuration,
30 | replicationFactor: Int, replicationStrategy: String) {
31 | }
32 |
33 | object SnackFSConfiguration {
34 |
35 | private val CONSISTENCY_LEVEL = ConsistencyLevel.QUORUM
36 | private val REPLICATION_STRATEGY = classOf[SimpleStrategy].getCanonicalName
37 | private val KEYSPACE = "snackfs"
38 | private val HOST = "127.0.0.1"
39 | private val PORT: Int = 9160
40 | private val AT_MOST: Long = 10 * 1000
41 | private val SUB_BLOCK_SIZE: Long = 8 * 1024 * 1024 //8 MB
42 | private val BLOCK_SIZE: Long = 128 * 1024 * 1024 //128MB
43 | private val REPLICATION_FACTOR: Int = 3
44 |
45 | def get(userConf: Configuration): SnackFSConfiguration = {
46 | val cassandraHost = userConf.get("snackfs.cassandra.host")
47 | val host = optIfNull(cassandraHost, HOST)
48 |
49 | val port = userConf.getInt("snackfs.cassandra.port", PORT)
50 |
51 | val consistencyLevelWrite = userConf.get("snackfs.consistencyLevel.write")
52 | val writeLevel = getConsistencyLevel(consistencyLevelWrite)
53 |
54 | val consistencyLevelRead = userConf.get("snackfs.consistencyLevel.read")
55 | val readLevel = getConsistencyLevel(consistencyLevelRead)
56 |
57 | val keyspaceName: String = userConf.get("snackfs.keyspace")
58 | val keyspace = optIfNull(keyspaceName, KEYSPACE)
59 |
60 | val replicationFactor = userConf.getInt("snackfs.replicationFactor", REPLICATION_FACTOR)
61 |
62 | val strategy: String = userConf.get("snackfs.replicationStrategy")
63 | val replicationStrategy = optIfNull(strategy, REPLICATION_STRATEGY)
64 |
65 | val subBlockSize = userConf.getLong("snackfs.subblock.size", SUB_BLOCK_SIZE)
66 | val blockSize = userConf.getLong("snackfs.block.size", BLOCK_SIZE)
67 |
68 | val maxWaitDuration = userConf.getLong("snackfs.waitInterval", AT_MOST)
69 | val waitDuration = FiniteDuration(maxWaitDuration, MILLISECONDS)
70 |
71 | SnackFSConfiguration(host, port, readLevel, writeLevel, keyspace, blockSize,
72 | subBlockSize, waitDuration, replicationFactor, replicationStrategy)
73 | }
74 |
75 | private def getConsistencyLevel(level: String): ConsistencyLevel = {
76 | if (level != null) {
77 | ConsistencyLevel.valueOf(level)
78 | } else {
79 | CONSISTENCY_LEVEL
80 | }
81 | }
82 |
83 | private def optIfNull(valueToCheck: String, alternativeOption: String): String = {
84 | if (valueToCheck == null) {
85 | alternativeOption
86 | } else {
87 | valueToCheck
88 | }
89 | }
90 |
91 | }
92 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/cassandra/model/ThriftClientAndSocket.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.cassandra.model
20 |
21 | import org.apache.cassandra.thrift.Cassandra.AsyncClient
22 | import org.apache.thrift.transport.TNonblockingSocket
23 |
24 | case class ThriftClientAndSocket(client: AsyncClient, socket: TNonblockingSocket)
25 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/cassandra/partial/FileSystemStore.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.cassandra.partial
20 |
21 | import scala.concurrent.Future
22 | import org.apache.hadoop.fs.Path
23 | import java.util.UUID
24 | import java.nio.ByteBuffer
25 | import java.io.InputStream
26 | import com.tuplejump.snackfs.fs.model._
27 | import com.tuplejump.snackfs.cassandra.model.{GenericOpSuccess, Keyspace}
28 |
29 | trait FileSystemStore {
30 |
31 | def createKeyspace: Future[Keyspace]
32 |
33 | def init: Unit
34 |
35 | def storeINode(path: Path, iNode: INode): Future[GenericOpSuccess]
36 |
37 | def retrieveINode(path: Path): Future[INode]
38 |
39 | def storeSubBlock(blockId: UUID, subBlockMeta: SubBlockMeta, data: ByteBuffer): Future[GenericOpSuccess]
40 |
41 | def retrieveSubBlock(blockId: UUID, subBlockId: UUID, byteRangeStart: Long): Future[InputStream]
42 |
43 | def retrieveBlock(blockMeta: BlockMeta): InputStream
44 |
45 | def deleteINode(path: Path): Future[GenericOpSuccess]
46 |
47 | def deleteBlocks(iNode: INode): Future[GenericOpSuccess]
48 |
49 | def fetchSubPaths(path: Path, isDeepFetch: Boolean): Future[Set[Path]]
50 |
51 | def getBlockLocations(path: Path): Future[Map[BlockMeta, List[String]]]
52 |
53 | def acquireFileLock(path:Path,processId:UUID):Future[Boolean]
54 |
55 | def releaseFileLock(path:Path):Future[Boolean]
56 | }
57 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/cassandra/store/ThriftStore.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.cassandra.store
20 |
21 | import org.apache.cassandra.thrift.Cassandra.AsyncClient
22 | import org.apache.cassandra.thrift.Cassandra.AsyncClient._
23 |
24 | import org.apache.cassandra.utils.{UUIDGen, FBUtilities, ByteBufferUtil}
25 | import scala.concurrent._
26 | import scala.concurrent.ExecutionContext.Implicits.global
27 | import org.apache.cassandra.thrift._
28 | import scala.collection.JavaConversions._
29 | import scala.collection.JavaConverters._
30 | import java.nio.ByteBuffer
31 | import scala.util.Try
32 | import org.apache.hadoop.fs.Path
33 | import java.math.BigInteger
34 | import java.util.UUID
35 | import java.io.InputStream
36 | import com.tuplejump.snackfs.util.{LogConfiguration, AsyncUtil}
37 | import org.apache.cassandra.dht.Murmur3Partitioner
38 | import org.apache.thrift.async.TAsyncClientManager
39 | import org.apache.thrift.protocol.TBinaryProtocol
40 | import org.apache.thrift.transport.TNonblockingSocket
41 | import org.apache.commons.pool.ObjectPool
42 | import org.apache.commons.pool.impl.StackObjectPool
43 |
44 | import com.twitter.logging.Logger
45 | import com.tuplejump.snackfs.fs.model._
46 | import com.tuplejump.snackfs.cassandra.model._
47 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
48 | import com.tuplejump.snackfs.fs.model.SubBlockMeta
49 | import com.tuplejump.snackfs.cassandra.model.ThriftClientAndSocket
50 | import com.tuplejump.snackfs.fs.stream.BlockInputStream
51 | import com.tuplejump.snackfs.fs.model.BlockMeta
52 | import com.tuplejump.snackfs.cassandra.model.GenericOpSuccess
53 | import com.tuplejump.snackfs.cassandra.model.Keyspace
54 |
55 | class ThriftStore(configuration: SnackFSConfiguration) extends FileSystemStore {
56 |
57 | LogConfiguration.config()
58 |
59 | private lazy val log = Logger.get(getClass)
60 | private val PATH_COLUMN: ByteBuffer = ByteBufferUtil.bytes("path")
61 | private val PARENT_PATH_COLUMN: ByteBuffer = ByteBufferUtil.bytes("parent_path")
62 | private val SENTINEL_COLUMN: ByteBuffer = ByteBufferUtil.bytes("sentinel")
63 | private val SENTINEL_VALUE: ByteBuffer = ByteBufferUtil.bytes("x")
64 | private val DATA_COLUMN: ByteBuffer = ByteBufferUtil.bytes("data")
65 | private val GRACE_SECONDS: Int = 60
66 |
67 | private val INODE_COLUMN_FAMILY_NAME = "inode"
68 | private val BLOCK_COLUMN_FAMILY_NAME = "sblock"
69 |
70 | private val LOCK_COLUMN_FAMILY_NAME = "createlock"
71 |
72 | private val partitioner = new Murmur3Partitioner()
73 |
74 | private var clientPool: ObjectPool[ThriftClientAndSocket] = _
75 |
76 | private def executeWithClient[T](f: AsyncClient => Future[T])(implicit tm: ClassManifest[T]): Future[T] = {
77 | log.debug("Fetching client from pool")
78 | val thriftClientAndSocket = clientPool.borrowObject()
79 | val ret = f(thriftClientAndSocket.client)
80 |
81 | ret.onComplete {
82 | res =>
83 | clientPool.returnObject(thriftClientAndSocket)
84 | }
85 | ret
86 | }
87 |
88 | def createKeyspace: Future[Keyspace] = {
89 |
90 | val clientManager = new TAsyncClientManager()
91 | val protocolFactory = new TBinaryProtocol.Factory()
92 | val clientFactory = new AsyncClient.Factory(clientManager, protocolFactory)
93 |
94 | val transport = new TNonblockingSocket(configuration.CassandraHost, configuration.CassandraPort)
95 | val client = clientFactory.getAsyncClient(transport)
96 |
97 | val ksDef = buildSchema
98 | val prom = promise[Keyspace]()
99 | val ksDefFuture = AsyncUtil.executeAsync[describe_keyspace_call](client.describe_keyspace(ksDef.getName, _))
100 |
101 | ksDefFuture onSuccess {
102 | case p =>
103 |
104 | val mayBeKsDef: Try[KsDef] = Try(p.getResult)
105 |
106 | if (mayBeKsDef.isSuccess) {
107 | log.debug("Using existing keyspace %s", ksDef.getName)
108 | prom success new Keyspace(ksDef.getName)
109 |
110 | } else {
111 | log.debug("Creating new keyspace %s", ksDef.getName)
112 | val response = AsyncUtil.executeAsync[system_add_keyspace_call](
113 | client.system_add_keyspace(ksDef, _))
114 |
115 | response.onSuccess {
116 | case r =>
117 | log.debug("Created keyspace %s", ksDef.getName)
118 | prom success new Keyspace(r.getResult)
119 |
120 | response.onFailure {
121 | case f =>
122 | log.error(f, "Failed to create keyspace %s", f.getMessage)
123 | prom failure f
124 | }
125 | }
126 | }
127 | }
128 | prom.future
129 | }
130 |
131 | def init {
132 | log.debug("initializing thrift store with configuration %s", configuration.toString)
133 | clientPool = new StackObjectPool[ThriftClientAndSocket](
134 | new ClientPoolFactory(configuration.CassandraHost, configuration.CassandraPort,
135 | configuration.keySpace)) {
136 |
137 | override def close() {
138 | super.close()
139 | getFactory.asInstanceOf[ClientPoolFactory].closePool()
140 | }
141 |
142 | }
143 | }
144 |
145 | def dropKeyspace: Future[Unit] = executeWithClient({
146 | client =>
147 | val prom = promise[Unit]()
148 | val dropFuture = AsyncUtil.executeAsync[system_drop_keyspace_call](client.system_drop_keyspace(configuration.keySpace, _))
149 |
150 | dropFuture onSuccess {
151 | case p =>
152 | log.debug("deleted keyspace %s", configuration.keySpace)
153 | prom success p.getResult
154 | }
155 |
156 | dropFuture onFailure {
157 | case f =>
158 | log.error(f, "failed to delete keyspace %s", configuration.keySpace)
159 | prom failure f
160 | }
161 |
162 | prom.future
163 | })
164 |
165 | def disconnect() = {
166 | clientPool.close()
167 | }
168 |
169 | private def createINodeCF(cfName: String) = {
170 |
171 | val PATH_INDEX_LABEL = "path"
172 | val SENTINEL_INDEX_LABEL = "sentinel"
173 | val PARENT_PATH_INDEX_LABEL = "parent_path"
174 |
175 | val DATA_TYPE = "BytesType"
176 |
177 | val columnFamily = new CfDef(configuration.keySpace, cfName)
178 | columnFamily.setComparator_type(DATA_TYPE)
179 | columnFamily.setGc_grace_seconds(GRACE_SECONDS)
180 | columnFamily.setComment("Stores file meta data")
181 |
182 | val path = generateColumnDefinition(PATH_COLUMN, PATH_INDEX_LABEL)
183 | val sentinel = generateColumnDefinition(SENTINEL_COLUMN, SENTINEL_INDEX_LABEL)
184 | val parentPath = generateColumnDefinition(PARENT_PATH_COLUMN, PARENT_PATH_INDEX_LABEL)
185 |
186 | val metadata = List(path, sentinel, parentPath)
187 | columnFamily.setColumn_metadata(metadata)
188 |
189 | columnFamily
190 | }
191 |
192 | private def generateColumnDefinition(columnName: ByteBuffer, indexName: String): ColumnDef = {
193 | val DATA_TYPE = "BytesType"
194 | val cfDef = new ColumnDef(columnName, DATA_TYPE).setIndex_type(IndexType.KEYS).setIndex_name(indexName)
195 | cfDef
196 | }
197 |
198 | private def createSBlockCF(cfName: String, minCompaction: Int, maxCompaction: Int) = {
199 |
200 | val columnFamily = new CfDef()
201 | columnFamily.setName(cfName)
202 | columnFamily.setComparator_type("BytesType")
203 | columnFamily.setGc_grace_seconds(GRACE_SECONDS)
204 | columnFamily.setComment("Stores blocks of information associated with a inode")
205 | columnFamily.setKeyspace(configuration.keySpace)
206 |
207 | columnFamily.setMin_compaction_threshold(minCompaction)
208 | columnFamily.setMax_compaction_threshold(maxCompaction)
209 |
210 | columnFamily
211 | }
212 |
213 | private def createLockCF(cfName: String, minCompaction: Int, maxCompaction: Int) = {
214 |
215 | val columnFamily = new CfDef()
216 | columnFamily.setName(cfName)
217 | columnFamily.setComparator_type("UUIDType")
218 | columnFamily.setGc_grace_seconds(GRACE_SECONDS)
219 | columnFamily.setComment("Stores information about which process is trying to write a inode")
220 | columnFamily.setKeyspace(configuration.keySpace)
221 |
222 | columnFamily.setMin_compaction_threshold(minCompaction)
223 | columnFamily.setMax_compaction_threshold(maxCompaction)
224 |
225 | columnFamily
226 | }
227 |
228 | private def buildSchema: KsDef = {
229 | val MIN_COMPACTION = 16
230 | val MAX_COMPACTION = 64
231 | val inode = createINodeCF(INODE_COLUMN_FAMILY_NAME)
232 | val sblock = createSBlockCF(BLOCK_COLUMN_FAMILY_NAME, MIN_COMPACTION, MAX_COMPACTION)
233 |
234 | val createLock = createLockCF(LOCK_COLUMN_FAMILY_NAME, MIN_COMPACTION, MAX_COMPACTION)
235 |
236 | val ksDef: KsDef = new KsDef(configuration.keySpace, configuration.replicationStrategy,
237 | List(inode, sblock, createLock))
238 | ksDef.setStrategy_options(Map("replication_factor" -> configuration.replicationFactor.toString))
239 |
240 | ksDef
241 | }
242 |
243 | private def getPathKey(path: Path): ByteBuffer = {
244 | val pathBytes: ByteBuffer = ByteBufferUtil.bytes(path.toUri.getPath)
245 | val pathBytesAsInt: BigInteger = FBUtilities.hashToBigInteger(pathBytes)
246 | ByteBufferUtil.bytes(pathBytesAsInt.toString(16))
247 | }
248 |
249 | private def getParentForIndex(path: Path): String = {
250 | val parent = path.getParent
251 | var result = "null"
252 | if (parent != null) {
253 | result = parent.toUri.getPath
254 | }
255 | result
256 | }
257 |
258 | private def createMutationForCol(colName: ByteBuffer, value: ByteBuffer, ts: Long): Mutation = {
259 | val result = new Mutation().setColumn_or_supercolumn(
260 | new ColumnOrSuperColumn().setColumn(
261 | new Column()
262 | .setName(colName)
263 | .setValue(value)
264 | .setTimestamp(ts)))
265 | result
266 | }
267 |
268 | private def generateMutationforINode(data: ByteBuffer, path: Path, timestamp: Long): Map[ByteBuffer, java.util.Map[String, java.util.List[Mutation]]] = {
269 | val pathColMutation = createMutationForCol(PATH_COLUMN, ByteBufferUtil.bytes(path.toUri.getPath), timestamp)
270 | val parentColMutation = createMutationForCol(PARENT_PATH_COLUMN, ByteBufferUtil.bytes(getParentForIndex(path)), timestamp)
271 | val sentinelMutation = createMutationForCol(SENTINEL_COLUMN, SENTINEL_VALUE, timestamp)
272 | val dataMutation = createMutationForCol(DATA_COLUMN, data, timestamp)
273 | val mutations: java.util.List[Mutation] = List(pathColMutation, parentColMutation, sentinelMutation, dataMutation)
274 |
275 | val pathMutation: java.util.Map[String, java.util.List[Mutation]] = Map(INODE_COLUMN_FAMILY_NAME -> mutations)
276 | val mutationMap: Map[ByteBuffer, java.util.Map[String, java.util.List[Mutation]]] = Map(getPathKey(path) -> pathMutation)
277 |
278 | mutationMap
279 | }
280 |
281 | def storeINode(path: Path, iNode: INode): Future[GenericOpSuccess] = executeWithClient({
282 | client =>
283 | val data: ByteBuffer = iNode.serialize
284 | val timestamp = iNode.timestamp
285 | val mutationMap: Map[ByteBuffer, java.util.Map[String, java.util.List[Mutation]]] = generateMutationforINode(data, path, timestamp)
286 | val iNodeFuture = AsyncUtil.executeAsync[batch_mutate_call](client.batch_mutate(mutationMap, configuration.writeConsistencyLevel, _))
287 | val prom = promise[GenericOpSuccess]()
288 | iNodeFuture.onSuccess {
289 | case p =>
290 | log.debug("stored INode %s", iNode.toString)
291 | prom success GenericOpSuccess()
292 | }
293 |
294 | iNodeFuture.onFailure {
295 | case f =>
296 | log.error(f, "failed to store INode %s", iNode.toString)
297 | prom failure f
298 | }
299 |
300 | prom.future
301 | })
302 |
303 | private def performGet(client: AsyncClient, key: ByteBuffer, columnPath: ColumnPath): Future[ColumnOrSuperColumn] = {
304 | val prom = promise[ColumnOrSuperColumn]()
305 | val getFuture = AsyncUtil.executeAsync[get_call](client.get(key, columnPath, configuration.readConsistencyLevel, _))
306 |
307 | getFuture.onSuccess {
308 | case p =>
309 | try {
310 | val res = p.getResult
311 | log.debug("fetch INode/subblock data")
312 | prom success res
313 | } catch {
314 | case e: Exception =>
315 | log.error(e, "failed to get INode/subblock data")
316 | prom failure e
317 | }
318 | }
319 |
320 | getFuture.onFailure {
321 | case f =>
322 | log.error(f, "failed to get INode/subblock data")
323 | prom failure f
324 | }
325 |
326 | prom.future
327 | }
328 |
329 | def retrieveINode(path: Path): Future[INode] = executeWithClient({
330 | client =>
331 | val pathKey: ByteBuffer = getPathKey(path)
332 | val inodeDataPath = new ColumnPath(INODE_COLUMN_FAMILY_NAME).setColumn(DATA_COLUMN)
333 |
334 | val inodePromise = promise[INode]()
335 | log.debug("fetching Inode for path %s", path)
336 | val pathInfo = performGet(client, pathKey, inodeDataPath)
337 |
338 | pathInfo.onSuccess {
339 | case p =>
340 | log.debug("retrieved Inode for path %s", path)
341 | inodePromise success INode.deserialize(ByteBufferUtil.inputStream(p.column.value), p.column.getTimestamp)
342 | }
343 |
344 | pathInfo.onFailure {
345 | case f =>
346 | log.error(f, "failed to retrieve Inode for path %s", path)
347 | inodePromise failure f
348 | }
349 | inodePromise.future
350 | })
351 |
352 | def storeSubBlock(blockId: UUID, subBlockMeta: SubBlockMeta, data: ByteBuffer): Future[GenericOpSuccess] = executeWithClient({
353 | client =>
354 | val parentBlockId: ByteBuffer = ByteBufferUtil.bytes(blockId)
355 |
356 | val sblockParent = new ColumnParent(BLOCK_COLUMN_FAMILY_NAME)
357 |
358 | val column = new Column()
359 | .setName(ByteBufferUtil.bytes(subBlockMeta.id))
360 | .setValue(data)
361 | .setTimestamp(System.currentTimeMillis)
362 |
363 | val prom = promise[GenericOpSuccess]()
364 | val subBlockFuture = AsyncUtil.executeAsync[insert_call](
365 | client.insert(parentBlockId, sblockParent, column, configuration.writeConsistencyLevel, _))
366 |
367 | subBlockFuture.onSuccess {
368 | case p =>
369 | log.debug("stored subBlock %s for block with id %s", subBlockMeta.toString, blockId.toString)
370 | prom success GenericOpSuccess()
371 | }
372 |
373 | subBlockFuture.onFailure {
374 | case f =>
375 | log.debug(f, " failed to store subBlock %s for block with id %s", subBlockMeta.toString, blockId.toString)
376 | prom failure f
377 | }
378 | prom.future
379 | })
380 |
381 | def retrieveSubBlock(blockId: UUID, subBlockId: UUID, byteRangeStart: Long): Future[InputStream] = executeWithClient({
382 | client =>
383 | val blockIdBuffer: ByteBuffer = ByteBufferUtil.bytes(blockId)
384 | val subBlockIdBuffer = ByteBufferUtil.bytes(subBlockId)
385 | log.debug("fetching subBlock for path %s", subBlockId.toString)
386 |
387 | val subBlockFuture = performGet(client, blockIdBuffer, new ColumnPath(BLOCK_COLUMN_FAMILY_NAME).setColumn(subBlockIdBuffer))
388 | val prom = promise[InputStream]()
389 |
390 | subBlockFuture.onSuccess {
391 | case p =>
392 | val stream: InputStream = ByteBufferUtil.inputStream(p.column.value)
393 | log.debug("retrieved subBlock with id %s and block id %s", subBlockId.toString, blockId.toString)
394 | prom success stream
395 | }
396 |
397 | subBlockFuture.onFailure {
398 | case f =>
399 | log.debug("failed to retrieve subBlock with id %s and block id %s", subBlockId.toString, blockId.toString)
400 | prom failure f
401 | }
402 |
403 | prom.future
404 | })
405 |
406 | def retrieveBlock(blockMeta: BlockMeta): InputStream = {
407 | log.debug("retrieve Block %s", blockMeta.toString)
408 | BlockInputStream(this, blockMeta, configuration.atMost)
409 | }
410 |
411 | def deleteINode(path: Path): Future[GenericOpSuccess] = executeWithClient({
412 | client =>
413 | val pathKey = getPathKey(path)
414 | val iNodeColumnPath = new ColumnPath(INODE_COLUMN_FAMILY_NAME)
415 | val timestamp = System.currentTimeMillis
416 |
417 | val result = promise[GenericOpSuccess]()
418 |
419 | val deleteInodeFuture = AsyncUtil.executeAsync[remove_call](
420 | client.remove(pathKey, iNodeColumnPath, timestamp, configuration.writeConsistencyLevel, _))
421 |
422 | deleteInodeFuture.onSuccess {
423 | case p =>
424 | log.debug("deleted INode with path %s", path)
425 | result success GenericOpSuccess()
426 | }
427 |
428 | deleteInodeFuture.onFailure {
429 | case f =>
430 | log.error(f, "failed to delete INode with path %s", path)
431 | result failure f
432 | }
433 |
434 | result.future
435 | })
436 |
437 | def deleteBlocks(iNode: INode): Future[GenericOpSuccess] = executeWithClient({
438 | client =>
439 | val mutationMap = generateINodeMutationMap(iNode)
440 |
441 | val result = promise[GenericOpSuccess]()
442 |
443 | val deleteFuture = AsyncUtil.executeAsync[batch_mutate_call](
444 | client.batch_mutate(mutationMap, configuration.writeConsistencyLevel, _))
445 |
446 | deleteFuture.onSuccess {
447 | case p =>
448 | log.debug("deleted blocks for INode %s", iNode.toString)
449 | result success GenericOpSuccess()
450 | }
451 |
452 | deleteFuture.onFailure {
453 | case f =>
454 | log.error(f, "failed to delete blocks for INode %s", iNode.toString)
455 | result failure f
456 | }
457 |
458 | result.future
459 | })
460 |
461 | private def generateINodeMutationMap(iNode: INode): Map[ByteBuffer, java.util.Map[String, java.util.List[Mutation]]] = {
462 | val timestamp = System.currentTimeMillis()
463 | val deletion = new Deletion()
464 | deletion.setTimestamp(timestamp)
465 |
466 | iNode.blocks.map {
467 | block =>
468 | (ByteBufferUtil.bytes(block.id), Map(BLOCK_COLUMN_FAMILY_NAME ->
469 | List(new Mutation().setDeletion(deletion)).asJava).asJava)
470 | }.toMap
471 | }
472 |
473 | def fetchSubPaths(path: Path, isDeepFetch: Boolean): Future[Set[Path]] = {
474 | val startPath = path.toUri.getPath
475 | val startPathBuffer = ByteBufferUtil.bytes(startPath)
476 |
477 | val sentinelIndexExpr = new IndexExpression(SENTINEL_COLUMN, IndexOperator.EQ, SENTINEL_VALUE)
478 | var startPathIndexExpr = new IndexExpression()
479 | var indexExpr = List[IndexExpression]()
480 |
481 | if (isDeepFetch) {
482 | startPathIndexExpr = new IndexExpression(PATH_COLUMN, IndexOperator.GT, startPathBuffer)
483 | if (startPath.length > 1) {
484 | indexExpr = indexExprForDeepFetch(startPath)
485 | }
486 | } else {
487 | startPathIndexExpr = new IndexExpression(PARENT_PATH_COLUMN, IndexOperator.EQ, startPathBuffer)
488 | }
489 |
490 | indexExpr = indexExpr ++ List(sentinelIndexExpr, startPathIndexExpr)
491 |
492 | def recursionStrategy: String = {
493 | if (isDeepFetch) {
494 | "recursively"
495 | } else {
496 | "non-recursively"
497 | }
498 | }
499 |
500 | log.debug("fetching subPaths for %s, %s ", path, recursionStrategy)
501 | fetchPaths(indexExpr)
502 | }
503 |
504 | private def fetchPaths(indexExpr: List[IndexExpression]): Future[Set[Path]] = executeWithClient({
505 | client =>
506 | val pathPredicate = new SlicePredicate().setColumn_names(List(PATH_COLUMN))
507 | val iNodeParent = new ColumnParent(INODE_COLUMN_FAMILY_NAME)
508 |
509 | val indexClause = new IndexClause(indexExpr, ByteBufferUtil.EMPTY_BYTE_BUFFER, 100000)
510 | val rowFuture = AsyncUtil.executeAsync[get_indexed_slices_call](
511 | client.get_indexed_slices(iNodeParent, indexClause, pathPredicate, configuration.readConsistencyLevel, _))
512 |
513 | val result = promise[Set[Path]]()
514 |
515 | rowFuture.onSuccess {
516 | case p =>
517 | val paths = p.getResult.flatMap(keySlice =>
518 | keySlice.getColumns.map(columnOrSuperColumn =>
519 | new Path(ByteBufferUtil.string(columnOrSuperColumn.column.value)))
520 | ).toSet
521 | log.debug("fetched subpaths for %s", indexExpr.toString())
522 | result success paths
523 | }
524 |
525 | rowFuture.onFailure {
526 | case f =>
527 | log.error(f, "failed to fetch subpaths for %s", indexExpr.toString())
528 | result failure f
529 | }
530 |
531 | result.future
532 | })
533 |
534 | private def indexExprForDeepFetch(startPath: String): List[IndexExpression] = {
535 | val lastChar = (startPath(startPath.length - 1) + 1).asInstanceOf[Char]
536 | val endPath = startPath.substring(0, startPath.length - 1) + lastChar
537 | val endPathBuffer = ByteBufferUtil.bytes(endPath)
538 | val endPathIndexExpr = new IndexExpression(PATH_COLUMN, IndexOperator.LT, endPathBuffer)
539 | List(endPathIndexExpr)
540 | }
541 |
542 |
543 | def getBlockLocations(path: Path): Future[Map[BlockMeta, List[String]]] = executeWithClient({
544 | client =>
545 |
546 | val result = promise[Map[BlockMeta, List[String]]]()
547 | val inodeFuture = retrieveINode(path)
548 |
549 | var response = Map.empty[BlockMeta, List[String]]
550 |
551 | inodeFuture.onSuccess {
552 | case inode =>
553 | log.debug("found iNode for %s, getting block locations", path)
554 | //Get the ring description from the server
555 | val ringFuture = AsyncUtil.executeAsync[describe_ring_call](
556 | client.describe_ring(configuration.keySpace, _)
557 | )
558 |
559 |
560 | ringFuture.onSuccess {
561 | case r =>
562 | log.debug("fetched ring details for keyspace %s", configuration.keySpace)
563 | val tf = partitioner.getTokenFactory
564 | val ring = r.getResult.map(p => (p.getEndpoints, p.getStart_token.toLong, p.getEnd_token.toLong))
565 |
566 | //For each block in the file, get the owner node
567 | inode.blocks.foreach(b => {
568 | val token = tf.fromByteArray(ByteBufferUtil.bytes(b.id))
569 |
570 | val xr = ring.filter {
571 | p =>
572 | if (p._2 < p._3) {
573 | p._2 <= token.token && p._3 >= token.token
574 | } else {
575 | (p._2 <= token.token && Long.MaxValue >= token.token) || (p._3 >= token.token && Long.MinValue <= token.token)
576 | }
577 | }
578 |
579 |
580 | if (xr.length > 0) {
581 | val endpoints: List[String] = xr.flatMap(_._1).toList
582 | response += (b -> endpoints)
583 | } else {
584 | response += (b -> ring(0)._1.toList)
585 | }
586 | })
587 | log.debug("found block locations for iNode %s", path)
588 | result success response
589 | }
590 |
591 | ringFuture.onFailure {
592 | case f =>
593 | log.error(f, "failed to get ring details for keyspace %s", configuration.keySpace)
594 | result failure f
595 | }
596 | }
597 |
598 | inodeFuture.onFailure {
599 | case e =>
600 | log.error(e, "iNode for %s not found", path)
601 | result failure e
602 | }
603 |
604 | result.future
605 | })
606 |
607 | /* Lock for writing a file
608 | *
609 | * Use case
610 | * one more process of the same cluster attempt writing to the same file
611 | * within a very small fraction of time lag
612 | *
613 | * Algorithm
614 | * 1. Write a column with name timeUUID and value as processId for given file path(rowId).
615 | * 2. Read back all columns for path
616 | * case 1) count>=1 && firstEntry.value == processId
617 | * Got the lock
618 | * case 2) No lock
619 | * 3. Do something in your code assuming the row is locked
620 | * 4. Release the lock by deleting the row
621 | *
622 | */
623 |
624 | private def addLockColumn(path: Path, processId: UUID, client: AsyncClient): Future[insert_call] = {
625 | val key = getPathKey(path)
626 | val columnParent = new ColumnParent(LOCK_COLUMN_FAMILY_NAME)
627 |
628 | val timeStamp = UUIDGen.getTimeUUID
629 |
630 | val column = new Column()
631 | .setName(ByteBufferUtil.bytes(timeStamp))
632 | .setValue(ByteBufferUtil.bytes(processId))
633 | .setTimestamp(System.currentTimeMillis())
634 |
635 | val addColumnFuture = AsyncUtil.executeAsync[insert_call](
636 | client.insert(key, columnParent, column, ConsistencyLevel.QUORUM, _))
637 |
638 | log.debug("adding column")
639 | addColumnFuture
640 | }
641 |
642 | private def getLockRow(path: Path, client: Cassandra.AsyncClient): Future[get_slice_call] = {
643 | val key = getPathKey(path)
644 | val columnParent = new ColumnParent(LOCK_COLUMN_FAMILY_NAME)
645 | val sliceRange = new SliceRange().setStart(Array[Byte]()).setFinish(Array[Byte]())
646 | val slicePredicate = new SlicePredicate().setColumn_names(null).setSlice_range(sliceRange)
647 |
648 | val getRowFuture = AsyncUtil.executeAsync[get_slice_call](
649 | client.get_slice(key, columnParent, slicePredicate, ConsistencyLevel.QUORUM, _))
650 |
651 | log.debug("getting row")
652 | getRowFuture
653 | }
654 |
655 | private def isCreator(processId: UUID, columns: java.util.List[ColumnOrSuperColumn]): Boolean = {
656 | var result = false
657 | log.debug("checking for access to create a file for %s", processId)
658 |
659 | if (columns.length >= 1) {
660 | val firstEntry = columns.head.getColumn
661 | val entryIdString: String = new String(firstEntry.getValue)
662 | val processIdString: String = new String(ByteBufferUtil.bytes(processId).array())
663 | log.debug("value found %s", entryIdString)
664 | log.debug("given value %s", processIdString)
665 |
666 | if (entryIdString == processIdString) {
667 | result = true
668 | }
669 | }
670 | result
671 | }
672 |
673 | def acquireFileLock(path: Path, processId: UUID): Future[Boolean] = executeWithClient({
674 | client =>
675 | val prom = promise[Boolean]()
676 | log.debug("adding column for create lock")
677 | val addColumnFuture = addLockColumn(path, processId, client)
678 | addColumnFuture.onSuccess {
679 | case res =>
680 | log.debug("added column for create lock")
681 | val getRowFuture = getLockRow(path, client)
682 | log.debug("getting row for create lock")
683 |
684 | getRowFuture.onSuccess {
685 | case rowData =>
686 | val result = isCreator(processId, rowData.getResult)
687 | prom success result
688 | }
689 |
690 | getRowFuture.onFailure {
691 | case e =>
692 | log.error(e, "error in getting row")
693 | prom failure e
694 | }
695 | }
696 |
697 | addColumnFuture.onFailure {
698 | case e =>
699 | log.error(e, "error in adding column for create lock")
700 | prom failure e
701 | }
702 |
703 | prom.future
704 | })
705 |
706 | private def deleteLockRow(path: Path, client: Cassandra.AsyncClient): Future[remove_call] = {
707 | val columnPath = new ColumnPath(LOCK_COLUMN_FAMILY_NAME)
708 | val timestamp = System.currentTimeMillis
709 |
710 | val deleteLockFuture = AsyncUtil.executeAsync[remove_call](
711 | client.remove(getPathKey(path), columnPath, timestamp, ConsistencyLevel.QUORUM, _))
712 |
713 | deleteLockFuture
714 | }
715 |
716 | def releaseFileLock(path: Path): Future[Boolean] = executeWithClient({
717 | client =>
718 | val prom = promise[Boolean]()
719 | val deleteLockFuture = deleteLockRow(path, client)
720 |
721 | deleteLockFuture.onSuccess {
722 | case res =>
723 | log.debug("deleted lock")
724 | prom success true
725 | }
726 |
727 | deleteLockFuture.onFailure {
728 | case e =>
729 | log.error(e, "failed to delete lock")
730 | prom success false
731 | }
732 |
733 | prom.future
734 | })
735 |
736 | }
737 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/fs/model/BlockMeta.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.fs.model
20 |
21 | import java.util.UUID
22 |
23 | case class BlockMeta(id: UUID, offset: Long, length: Long, subBlocks: Seq[SubBlockMeta]) {
24 | override def toString = {
25 | val result = "Block[" + (id,offset,length).toString() + "]"
26 | result
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/fs/model/INode.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 |
20 | package com.tuplejump.snackfs.fs.model
21 |
22 | import org.apache.hadoop.fs.permission.FsPermission
23 | import java.io._
24 | import java.nio.ByteBuffer
25 | import java.util.UUID
26 | import com.twitter.logging.Logger
27 |
28 | object FileType extends Enumeration {
29 | val DIRECTORY, FILE = Value
30 | }
31 |
32 | case class INode(user: String, group: String, permission: FsPermission,
33 | fileType: FileType.Value, blocks: Seq[BlockMeta], timestamp: Long) {
34 |
35 | private lazy val log = Logger.get(getClass)
36 |
37 | def isDirectory: Boolean = this.fileType == FileType.DIRECTORY
38 |
39 | def isFile: Boolean = this.fileType == FileType.FILE
40 |
41 | def serialize: ByteBuffer = {
42 |
43 | log.debug("serializing iNode")
44 | // Write INode Header
45 | val byteStream: ByteArrayOutputStream = new ByteArrayOutputStream
46 | val outputStream: DataOutputStream = new DataOutputStream(byteStream)
47 |
48 | outputStream.writeInt(user.getBytes.length)
49 | outputStream.writeBytes(user)
50 | outputStream.writeInt(group.getBytes.length)
51 | outputStream.writeBytes(group)
52 | outputStream.writeShort(permission.toShort)
53 | outputStream.writeByte(fileType.id)
54 | if (isFile) {
55 |
56 | log.debug("serializing data for file iNode")
57 | //Write Blocks
58 | outputStream.writeInt(blocks.length)
59 | blocks.foreach(b => {
60 | outputStream.writeLong(b.id.getMostSignificantBits)
61 | outputStream.writeLong(b.id.getLeastSignificantBits)
62 | outputStream.writeLong(b.offset)
63 | outputStream.writeLong(b.length)
64 |
65 | outputStream.writeInt(b.subBlocks.length)
66 | // Write SubBlocks for this block
67 | b.subBlocks.foreach(sb => {
68 | outputStream.writeLong(sb.id.getMostSignificantBits)
69 | outputStream.writeLong(sb.id.getLeastSignificantBits)
70 | outputStream.writeLong(sb.offset)
71 | outputStream.writeLong(sb.length)
72 | })
73 | })
74 | }
75 | outputStream.close()
76 | ByteBuffer.wrap(byteStream.toByteArray)
77 | }
78 | }
79 |
80 | object INode {
81 | def deserialize(inputStream: InputStream, timestamp: Long): INode = {
82 | val log = Logger.get(getClass)
83 |
84 | var result: INode = null
85 | if (inputStream != null) {
86 | val dataInputStream: DataInputStream = new DataInputStream(inputStream)
87 |
88 | val userLength: Int = dataInputStream.readInt
89 | val userBuffer: Array[Byte] = new Array[Byte](userLength)
90 | dataInputStream.readFully(userBuffer)
91 |
92 | val groupLength: Int = dataInputStream.readInt
93 | val groupBuffer: Array[Byte] = new Array[Byte](groupLength)
94 | dataInputStream.readFully(groupBuffer)
95 |
96 | val perms: FsPermission = new FsPermission(dataInputStream.readShort)
97 |
98 | val fType: FileType.Value = FileType(dataInputStream.readByte)
99 |
100 | fType match {
101 | case FileType.DIRECTORY => {
102 | log.debug("deserializing inode directory")
103 | result = INode(new String(userBuffer), new String(groupBuffer), perms, fType, null, timestamp)
104 | }
105 | case FileType.FILE => {
106 | log.debug("deserializing data for file")
107 | val blockLength = dataInputStream.readInt
108 | var fileBlocks: Seq[BlockMeta] = Nil
109 | val blockRange = 0 until blockLength
110 | blockRange.foreach(_ => {
111 | val mostSigBits: Long = dataInputStream.readLong
112 | val leastSigBits: Long = dataInputStream.readLong
113 | val offset: Long = dataInputStream.readLong
114 | val length: Long = dataInputStream.readLong
115 |
116 | // Deserialize SubBlocks for this block
117 | val numSubBlocks: Int = dataInputStream.readInt
118 |
119 | var subBlocks: Seq[SubBlockMeta] = Nil
120 | val subBlockRange = 0 until numSubBlocks
121 | subBlockRange.foreach(_ => {
122 | val subMostSigBits: Long = dataInputStream.readLong
123 | val subLeastSigBits: Long = dataInputStream.readLong
124 | val subOffset: Long = dataInputStream.readLong
125 | val subLength: Long = dataInputStream.readLong
126 | subBlocks = subBlocks :+ SubBlockMeta(new UUID(subMostSigBits, subLeastSigBits), subOffset, subLength)
127 | })
128 | fileBlocks = fileBlocks :+ BlockMeta(new UUID(mostSigBits, leastSigBits), offset, length, subBlocks)
129 | })
130 | result = INode(new String(userBuffer), new String(groupBuffer), perms, fType, fileBlocks, timestamp)
131 | }
132 | case _ => {
133 | val ex = new IllegalArgumentException("Cannot deserialize INode.")
134 | log.error(ex, "Invalid data cannot deserailize")
135 | throw ex
136 | }
137 | }
138 | }
139 | result
140 | }
141 | }
142 |
143 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/fs/model/SubBlockMeta.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.fs.model
20 |
21 | import java.util.UUID
22 |
23 | case class SubBlockMeta(id:UUID,offset:Long,length:Long) {
24 | override def toString= {
25 | val result = "SubBlock["+(id,offset,length).toString()+"]"
26 | result
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/fs/stream/BlockInputStream.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.fs.stream
20 |
21 | import java.io.{IOException, InputStream}
22 | import scala.concurrent.Await
23 | import scala.concurrent.duration._
24 | import com.twitter.logging.Logger
25 | import com.tuplejump.snackfs.fs.model._
26 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
27 |
28 | case class
29 | BlockInputStream(store: FileSystemStore, blockMeta: BlockMeta, atMost: FiniteDuration) extends InputStream {
30 | private lazy val log = Logger.get(getClass)
31 |
32 | private val LENGTH = blockMeta.length
33 |
34 | private var isClosed: Boolean = false
35 | private var inputStream: InputStream = null
36 | private var currentPosition: Long = 0
37 |
38 | private var targetSubBlockSize = 0L
39 | private var targetSubBlockOffset = 0L
40 |
41 |
42 | private def findSubBlock(targetPosition: Long): InputStream = {
43 | val subBlockLengthTotals = blockMeta.subBlocks.scanLeft(0L)(_ + _.length).tail
44 | val subBlockIndex = subBlockLengthTotals.indexWhere(p => targetPosition < p)
45 | if (subBlockIndex == -1) {
46 | val ex = new IOException("Impossible situation: could not find position " + targetPosition)
47 | log.error(ex, "Position %s could not be located", targetPosition.toString)
48 | throw ex
49 | }
50 | var offset = targetPosition
51 | if (subBlockIndex != 0) {
52 | offset -= subBlockLengthTotals(subBlockIndex - 1)
53 | }
54 | val subBlock = blockMeta.subBlocks(subBlockIndex)
55 | targetSubBlockSize = subBlock.length
56 | targetSubBlockOffset = subBlock.offset
57 | log.debug("fetching subBlock for block %s and position %s", blockMeta.id.toString, targetPosition.toString)
58 | Await.result(store.retrieveSubBlock(blockMeta.id, subBlock.id, offset), atMost)
59 | }
60 |
61 | def read: Int = {
62 | if (isClosed) {
63 | val ex = new IOException("Stream closed")
64 | log.error(ex,"Failed to read as stream is closed")
65 | throw ex
66 | }
67 | var result = -1
68 | if (currentPosition <= LENGTH - 1) {
69 | if (currentPosition > (targetSubBlockOffset + targetSubBlockSize - 1)) {
70 | if (inputStream != null) {
71 | inputStream.close()
72 | }
73 | log.debug("fetching next subblock")
74 | inputStream = findSubBlock(currentPosition)
75 | }
76 | log.debug("reading from subblock")
77 | result = inputStream.read()
78 | currentPosition += 1
79 | }
80 | result
81 | }
82 |
83 | override def read(buf: Array[Byte], off: Int, len: Int): Int = {
84 | if (isClosed) {
85 | val ex = new IOException("Stream closed")
86 | log.error(ex,"Failed to read as stream is closed")
87 | throw ex
88 | }
89 | if (buf == null) {
90 | val ex = new NullPointerException
91 | log.error(ex,"Failed to read as output buffer is null")
92 | throw ex
93 | }
94 | if ((off < 0) || (len < 0) || (len > buf.length - off)) {
95 | val ex = new IndexOutOfBoundsException
96 | log.error(ex,"Failed to read as one of offset,length or output buffer length is invalid")
97 | throw ex
98 | }
99 | var result = 0
100 | if (len > 0) {
101 | while ((result < len) && (currentPosition <= LENGTH - 1)) {
102 | if (currentPosition > (targetSubBlockOffset + targetSubBlockSize - 1)) {
103 | if (inputStream != null) {
104 | inputStream.close()
105 | }
106 | log.debug("fetching next subblock")
107 | inputStream = findSubBlock(currentPosition)
108 | }
109 | val remaining = len - result
110 | val size = math.min(remaining, targetSubBlockSize)
111 |
112 | log.debug("reading from subblock")
113 | val readSize = inputStream.read(buf, off + result, size.asInstanceOf[Int])
114 | result += readSize
115 | currentPosition += readSize
116 | }
117 | if (result == 0) {
118 | result = -1
119 | }
120 | }
121 | result
122 | }
123 |
124 | override def close() = {
125 | if (!isClosed) {
126 | if (inputStream != null) {
127 | log.debug("closing stream")
128 | inputStream.close()
129 | }
130 | super.close()
131 | isClosed = true
132 | }
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/fs/stream/FileSystemInputStream.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.fs.stream
20 |
21 | import org.apache.hadoop.fs.{Path, FSInputStream}
22 | import java.io.{IOException, InputStream}
23 | import scala.concurrent.Await
24 | import scala.concurrent.duration._
25 | import com.twitter.logging.Logger
26 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
27 |
28 | case class FileSystemInputStream(store: FileSystemStore, path: Path) extends FSInputStream {
29 |
30 | private lazy val log = Logger.get(getClass)
31 |
32 | private val INODE = Await.result(store.retrieveINode(path), 10 seconds)
33 | private val FILE_LENGTH: Long = INODE.blocks.map(_.length).sum
34 |
35 | private var currentPosition: Long = 0L
36 |
37 | private var blockStream: InputStream = null
38 |
39 | private var currentBlockSize: Long = -1
40 |
41 | private var currentBlockOffset: Long = 0
42 |
43 | private var isClosed: Boolean = false
44 |
45 | def seek(target: Long) = {
46 | if (target > FILE_LENGTH) {
47 | val ex = new IOException("Cannot seek after EOF")
48 | log.error(ex, "EOF reached earlier")
49 | throw ex
50 | }
51 | currentPosition = target
52 | currentBlockSize = -1
53 | currentBlockOffset = 0
54 | }
55 |
56 | def getPos: Long = currentPosition
57 |
58 | def seekToNewSource(targetPos: Long): Boolean = false
59 |
60 | private def findBlock(targetPosition: Long): InputStream = {
61 | val blockIndex = INODE.blocks.indexWhere(b => b.offset + b.length > targetPosition)
62 | if (blockIndex == -1) {
63 | val ex = new IOException("Impossible situation: could not find position " + targetPosition)
64 | log.error(ex, "Position %s could not be located", targetPosition.toString)
65 | throw ex
66 | }
67 | val block = INODE.blocks(blockIndex)
68 | currentBlockSize = block.length
69 | currentBlockOffset = block.offset
70 |
71 | val offset = targetPosition - currentBlockOffset
72 | log.debug("fetching block at position %s", targetPosition.toString)
73 | val bis = store.retrieveBlock(block)
74 | bis.skip(offset)
75 | bis
76 | }
77 |
78 | def read(): Int = {
79 | if (isClosed) {
80 | val ex = new IOException("Stream closed")
81 | log.error(ex, "Failed to read as stream is closed")
82 | throw ex
83 | }
84 | var result: Int = -1
85 |
86 | if (currentPosition < FILE_LENGTH) {
87 | if (currentPosition > currentBlockOffset + currentBlockSize) {
88 | if (blockStream != null) {
89 | blockStream.close()
90 | }
91 | log.debug("fetching next block")
92 | blockStream = findBlock(currentPosition)
93 | }
94 | log.debug("reading from block")
95 | result = blockStream.read
96 | if (result >= 0) {
97 | currentPosition += 1
98 | }
99 | }
100 | result
101 | }
102 |
103 | override def available: Int = (FILE_LENGTH - currentPosition).asInstanceOf[Int]
104 |
105 | override def read(buf: Array[Byte], off: Int, len: Int): Int = {
106 | if (isClosed) {
107 | val ex = new IOException("Stream closed")
108 | log.error(ex, "Failed to read as stream is closed")
109 | throw ex
110 | }
111 | if (buf == null) {
112 | val ex = new NullPointerException
113 | log.error(ex, "Failed to read as output buffer is null")
114 | throw ex
115 | }
116 | if ((off < 0) || (len < 0) || (len > buf.length - off)) {
117 | val ex = new IndexOutOfBoundsException
118 | log.error(ex, "Failed to read as one of offset,length or output buffer length is invalid")
119 | throw ex
120 | }
121 |
122 | var result: Int = 0
123 | if (len > 0) {
124 | while ((result < len) && (currentPosition <= FILE_LENGTH - 1)) {
125 | if (currentPosition > currentBlockOffset + currentBlockSize - 1) {
126 |
127 | if (blockStream != null) {
128 | blockStream.close()
129 | }
130 | log.debug("fetching next block")
131 | blockStream = findBlock(currentPosition)
132 | }
133 | val realLen: Int = math.min(len - result, currentBlockSize + 1).asInstanceOf[Int]
134 | log.debug("reading from block")
135 | var readSize = blockStream.read(buf, off + result, realLen)
136 | result += readSize
137 | currentPosition += readSize
138 | }
139 | if (result == 0) {
140 | result = -1
141 | }
142 | }
143 | result
144 | }
145 |
146 | override def close() = {
147 | if (!isClosed) {
148 | if (blockStream != null) {
149 | log.debug("closing stream")
150 | blockStream.close()
151 | }
152 | super.close()
153 | isClosed = true
154 | }
155 | }
156 | }
157 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/fs/stream/FileSystemOutputStream.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.fs.stream
20 |
21 | import java.io.{IOException, OutputStream}
22 | import org.apache.hadoop.fs.Path
23 | import org.apache.cassandra.utils.UUIDGen
24 | import java.util.UUID
25 | import java.nio.ByteBuffer
26 | import org.apache.hadoop.fs.permission.FsPermission
27 | import scala.concurrent.Await
28 | import scala.concurrent.duration._
29 | import com.tuplejump.snackfs.fs.model._
30 | import com.tuplejump.snackfs.cassandra.partial.FileSystemStore
31 | import com.twitter.logging.Logger
32 |
33 | case class FileSystemOutputStream(store: FileSystemStore, path: Path,
34 | blockSize: Long, subBlockSize: Long,
35 | bufferSize: Long, atMost: FiniteDuration) extends OutputStream {
36 |
37 | private lazy val log = Logger.get(getClass)
38 |
39 | private var isClosed: Boolean = false
40 |
41 | private var blockId: UUID = UUIDGen.getTimeUUID
42 |
43 | private var subBlockOffset = 0
44 | private var blockOffset = 0
45 | private var position = 0
46 | private var outBuffer = Array.empty[Byte]
47 |
48 | private var subBlocksMeta = List[SubBlockMeta]()
49 | private var blocksMeta = List[BlockMeta]()
50 |
51 | private var isClosing = false
52 |
53 | private var bytesWrittenToBlock = 0
54 |
55 | def write(p1: Int) = {
56 | if (isClosed) {
57 | val ex = new IOException("Stream closed")
58 | log.error(ex, "Failed to write as stream is closed")
59 | throw ex
60 | }
61 | outBuffer = outBuffer ++ Array(p1.toByte)
62 | position += 1
63 | if (position == subBlockSize) {
64 | flush()
65 | }
66 | }
67 |
68 | override def write(buf: Array[Byte], offset: Int, length: Int) = {
69 | if (isClosed) {
70 | val ex = new IOException("Stream closed")
71 | log.error(ex, "Failed to write as stream is closed")
72 | throw ex
73 | }
74 | var lengthTemp = length
75 | var offsetTemp = offset
76 | while (lengthTemp > 0) {
77 | val lengthToWrite = math.min(subBlockSize - position, lengthTemp).asInstanceOf[Int]
78 | val slice: Array[Byte] = buf.slice(offsetTemp, offsetTemp + lengthToWrite)
79 | outBuffer = outBuffer ++ slice
80 | lengthTemp -= lengthToWrite
81 | offsetTemp += lengthToWrite
82 | position += lengthToWrite
83 | if (position == subBlockSize) {
84 | flush()
85 | }
86 | }
87 | }
88 |
89 | private def endSubBlock() = {
90 | if (position != 0) {
91 | val subBlockMeta = SubBlockMeta(UUIDGen.getTimeUUID, subBlockOffset, position)
92 | log.debug("storing subblock")
93 | Await.ready(store.storeSubBlock(blockId, subBlockMeta, ByteBuffer.wrap(outBuffer)), atMost)
94 |
95 | subBlockOffset += position
96 | bytesWrittenToBlock += position
97 | subBlocksMeta = subBlocksMeta :+ subBlockMeta
98 | position = 0
99 | outBuffer = Array.empty[Byte]
100 | }
101 | }
102 |
103 | private def endBlock() = {
104 | val subBlockLengths = subBlocksMeta.map(_.length).sum
105 | val block = BlockMeta(blockId, blockOffset, subBlockLengths, subBlocksMeta)
106 | blocksMeta = blocksMeta :+ block
107 | val user = System.getProperty("user.name")
108 | val permissions = FsPermission.getDefault
109 | val timestamp = System.currentTimeMillis()
110 | val iNode = INode(user, user, permissions, FileType.FILE, blocksMeta, timestamp)
111 |
112 | log.debug("storing/updating block details for INode at %s", path)
113 | Await.ready(store.storeINode(path, iNode), atMost)
114 |
115 | blockOffset += subBlockLengths.asInstanceOf[Int]
116 | subBlocksMeta = List()
117 | subBlockOffset = 0
118 | blockId = UUIDGen.getTimeUUID
119 | bytesWrittenToBlock = 0
120 | }
121 |
122 | override def flush() = {
123 | if (isClosed) {
124 | val ex = new IOException("Stream closed")
125 | log.error(ex, "Failed to write as stream is closed")
126 | throw ex
127 | }
128 | log.debug("flushing data at %s", position)
129 | endSubBlock()
130 | if (bytesWrittenToBlock >= blockSize || isClosing) {
131 | endBlock()
132 | }
133 | }
134 |
135 | override def close() = {
136 | if (!isClosed) {
137 | log.debug("closing stream")
138 | isClosing = true
139 | flush()
140 | super.close()
141 | isClosed = true
142 | }
143 | }
144 | }
145 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/util/AsyncUtil.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.util
20 |
21 | import org.apache.thrift.async.AsyncMethodCallback
22 |
23 | import scala.concurrent.{Future, promise}
24 |
25 | object AsyncUtil {
26 | /**
27 | * A method that takes in a (partially applied) method which takes AsyncMethodCallback
28 | * and invokes it on completion or failure.
29 | *
30 | * @param f
31 | * @tparam T
32 | * @return
33 | */
34 | def executeAsync[T](f: AsyncMethodCallback[T] => Unit): Future[T] = {
35 |
36 | class PromisingHandler extends AsyncMethodCallback[T] {
37 | val p = promise[T]()
38 |
39 | def onComplete(p1: T) {
40 | p success p1
41 | }
42 |
43 | def onError(p1: Exception) {
44 | p failure p1
45 | }
46 | }
47 |
48 | val promisingHandler: PromisingHandler = new PromisingHandler()
49 |
50 | f(promisingHandler)
51 |
52 | promisingHandler.p.future
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/scala/com/tuplejump/snackfs/util/LogConfiguration.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.util
20 |
21 | import com.twitter.logging.{FileHandler, Level, LoggerFactory}
22 |
23 | object LogConfiguration {
24 |
25 | val level = System.getenv("SNACKFS_LOG_LEVEL") match {
26 | case "DEBUG" => Level.DEBUG
27 | case "INFO" => Level.INFO
28 | case "ERROR" => Level.ERROR
29 | case "ALL" => Level.ALL
30 | case "OFF" => Level.OFF
31 | case _ => Level.ERROR
32 | }
33 | val config = new LoggerFactory("", Some(level), List(FileHandler("snackfs.log")), true)
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/scripts/hadoop:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | BASE=`dirname "${BASH_SOURCE-$0}"`/..
4 |
5 | CONF=conf
6 | CONF_DIR=${CONF_DIR:-$BASE/$CONF}
7 |
8 | if [ -z $JAVA_HOME ]; then
9 | echo JAVA_HOME is not set, trying to determine it
10 | JAVA_HOME=`realpath $(dirname $(realpath $(which java)))/..`
11 | JAVA_HOME_SET=true
12 | fi
13 |
14 | if [ $JAVA_HOME_SET ]; then
15 | echo Java found at $JAVA_HOME
16 | fi
17 |
18 | if [ -z $JAVA_HOME ]; then
19 | echo You should install Java and set JAVA_HOME
20 | exit
21 | fi
22 |
23 | # if no args specified, show usage
24 | if [ $# = 0 ]; then
25 | echo "Usage: snackfs COMMAND"
26 | echo "where COMMAND is:"
27 | echo " fs run a generic filesystem user client"
28 | exit 1
29 | fi
30 |
31 | # get arguments
32 | COMMAND=$1
33 | shift
34 |
35 | JAVA=$JAVA_HOME/bin/java
36 | JAVA_HEAP_MAX=-Xmx1000m
37 |
38 | CLASSPATH="${CONF_DIR}"
39 | CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
40 |
41 | # add libs to CLASSPATH
42 | for f in $BASE/lib/*.jar; do
43 | CLASSPATH=${CLASSPATH}:$f;
44 | done
45 |
46 | if [ "$COMMAND" = "fs" ] ; then
47 | CLASS=org.apache.hadoop.fs.SnackFSShell
48 | else exit 1
49 | fi
50 |
51 | OPTS=""
52 | exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $OPTS -classpath "$CLASSPATH" $CLASS "$@"
53 |
54 |
55 |
--------------------------------------------------------------------------------
/src/main/scripts/snackfs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | BASE=`realpath $(dirname "${BASH_SOURCE-$0}")/..`
4 |
5 | CONF=conf
6 | CONF_DIR=${CONF_DIR:-$BASE/$CONF}
7 |
8 | if [ -z $JAVA_HOME ]; then
9 | echo JAVA_HOME is not set, trying to determine it
10 | JAVA_HOME=`realpath $(dirname $(realpath $(which java)))/..`
11 | JAVA_HOME_SET=true
12 | fi
13 |
14 | if [ $JAVA_HOME_SET ]; then
15 | echo Java found at $JAVA_HOME
16 | fi
17 |
18 | if [ -z $JAVA_HOME ]; then
19 | echo You should install Java and set JAVA_HOME
20 | exit
21 | fi
22 |
23 | JAVA=$JAVA_HOME/bin/java
24 | JAVA_HEAP_MAX=-Xmx1000m
25 |
26 | CLASSPATH="${CONF_DIR}"
27 | CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
28 |
29 | # add libs to CLASSPATH
30 | for f in $BASE/lib/*.jar; do
31 | CLASSPATH=${CLASSPATH}:$f;
32 | done
33 |
34 | OPTS=""
35 | exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $OPTS -classpath "$CLASSPATH" org.apache.hadoop.fs.SnackFSShell "$@"
36 |
37 |
38 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/hadoop/fs/TestFileSystem.java:
--------------------------------------------------------------------------------
1 | /*
2 | Test for FileSystem from Hadoop codebase
3 | */
4 | /*
5 | package org.apache.hadoop.fs;
6 |
7 | import java.io.DataInputStream;
8 | import java.io.IOException;
9 | import java.io.OutputStream;
10 | import java.security.PrivilegedExceptionAction;
11 | import java.util.Arrays;
12 | import java.util.Collections;
13 | import java.util.Random;
14 | import java.util.List;
15 | import java.util.ArrayList;
16 | import java.util.Set;
17 | import java.util.HashSet;
18 | import java.util.Map;
19 | import java.util.HashMap;
20 | import java.net.InetSocketAddress;
21 | import java.net.URI;
22 |
23 | import junit.framework.TestCase;
24 |
25 | import org.apache.commons.logging.Log;
26 | import org.apache.hadoop.conf.Configuration;
27 | import org.apache.hadoop.conf.Configured;
28 | import org.apache.hadoop.fs.shell.CommandFormat;
29 | import org.apache.hadoop.hdfs.MiniDFSCluster;
30 | import org.apache.hadoop.hdfs.server.namenode.NameNode;
31 | import org.apache.hadoop.io.LongWritable;
32 | import org.apache.hadoop.io.SequenceFile;
33 | import org.apache.hadoop.io.Text;
34 | import org.apache.hadoop.io.SequenceFile.CompressionType;
35 | import org.apache.hadoop.mapred.*;
36 | import org.apache.hadoop.mapred.lib.LongSumReducer;
37 | import org.apache.hadoop.security.UserGroupInformation;
38 |
39 | public class TestFileSystem extends TestCase {
40 | private static final Log LOG = FileSystem.LOG;
41 |
42 | private static Configuration conf = new Configuration();
43 | private static int BUFFER_SIZE = conf.getInt("io.file.buffer.size", 4096);
44 |
45 | private static final long MEGA = 1024 * 1024;
46 | private static final int SEEKS_PER_FILE = 4;
47 |
48 | private static String ROOT = System.getProperty("test.build.data", "fs_test");
49 | private static Path CONTROL_DIR = new Path(ROOT, "fs_control");
50 | private static Path WRITE_DIR = new Path(ROOT, "fs_write");
51 | private static Path READ_DIR = new Path(ROOT, "fs_read");
52 | private static Path DATA_DIR = new Path(ROOT, "fs_data");
53 |
54 | public void testFs() throws Exception {
55 | testFs(10 * MEGA, 100, 0);
56 | }
57 |
58 | public static void testFs(long megaBytes, int numFiles, long seed)
59 | throws Exception {
60 |
61 | FileSystem fs = SnackFS.get(conf);
62 |
63 | if (seed == 0)
64 | seed = new Random().nextLong();
65 |
66 | LOG.info("seed = " + seed);
67 |
68 | createControlFile(fs, megaBytes, numFiles, seed);
69 | writeTest(fs, false);
70 | readTest(fs, false);
71 | seekTest(fs, false);
72 | fs.delete(CONTROL_DIR, true);
73 | fs.delete(DATA_DIR, true);
74 | fs.delete(WRITE_DIR, true);
75 | fs.delete(READ_DIR, true);
76 | }
77 |
78 | public static void testCommandFormat() throws Exception {
79 | // This should go to TestFsShell.java when it is added.
80 | CommandFormat cf;
81 | cf = new CommandFormat("copyToLocal", 2, 2, "crc", "ignoreCrc");
82 | assertEquals(cf.parse(new String[]{"-get", "file", "-"}, 1).get(1), "-");
83 | try {
84 | cf.parse(new String[]{"-get", "file", "-ignoreCrc", "/foo"}, 1);
85 | fail("Expected parsing to fail as it should stop at first non-option");
86 | } catch (Exception e) {
87 | // Expected
88 | }
89 | cf = new CommandFormat("tail", 1, 1, "f");
90 | assertEquals(cf.parse(new String[]{"-tail", "fileName"}, 1).get(0), "fileName");
91 | assertEquals(cf.parse(new String[]{"-tail", "-f", "fileName"}, 1).get(0), "fileName");
92 | cf = new CommandFormat("setrep", 2, 2, "R", "w");
93 | assertEquals(cf.parse(new String[]{"-setrep", "-R", "2", "/foo/bar"}, 1).get(1), "/foo/bar");
94 | cf = new CommandFormat("put", 2, 10000);
95 | assertEquals(cf.parse(new String[]{"-put", "-", "dest"}, 1).get(1), "dest");
96 | }
97 |
98 | public static void createControlFile(FileSystem fs,
99 | long megaBytes, int numFiles,
100 | long seed) throws Exception {
101 |
102 | LOG.info("creating control file: " + megaBytes + " bytes, " + numFiles + " files");
103 |
104 | Path controlFile = new Path(CONTROL_DIR, "files");
105 | fs.delete(controlFile, true);
106 | Random random = new Random(seed);
107 |
108 | SequenceFile.Writer writer =
109 | SequenceFile.createWriter(fs, conf, controlFile,
110 | Text.class, LongWritable.class, CompressionType.NONE);
111 |
112 | long totalSize = 0;
113 | long maxSize = ((megaBytes / numFiles) * 2) + 1;
114 | try {
115 | while (totalSize < megaBytes) {
116 | Text name = new Text(Long.toString(random.nextLong()));
117 |
118 | long size = random.nextLong();
119 | if (size < 0)
120 | size = -size;
121 | size = size % maxSize;
122 |
123 | //LOG.info(" adding: name="+name+" size="+size);
124 |
125 | writer.append(name, new LongWritable(size));
126 |
127 | totalSize += size;
128 | }
129 | } finally {
130 | writer.close();
131 | }
132 | LOG.info("created control file for: " + totalSize + " bytes");
133 | }
134 |
135 | public static class WriteMapper extends Configured
136 | implements Mapper {
137 |
138 | private Random random = new Random();
139 | private byte[] buffer = new byte[BUFFER_SIZE];
140 | private FileSystem fs;
141 | private boolean fastCheck;
142 |
143 | // a random suffix per task
144 | private String suffix = "-" + random.nextLong();
145 |
146 | {
147 | try {
148 | fs = SnackFS.get(conf);
149 | } catch (IOException e) {
150 | throw new RuntimeException(e);
151 | }
152 | }
153 |
154 | public WriteMapper() {
155 | super(null);
156 | }
157 |
158 | public WriteMapper(Configuration conf) {
159 | super(conf);
160 | }
161 |
162 | public void configure(JobConf job) {
163 | setConf(job);
164 | fastCheck = job.getBoolean("fs.test.fastCheck", false);
165 | }
166 |
167 | public void map(Text key, LongWritable value,
168 | OutputCollector collector,
169 | Reporter reporter)
170 | throws IOException {
171 |
172 | String name = key.toString();
173 | long size = value.get();
174 | long seed = Long.parseLong(name);
175 |
176 | random.setSeed(seed);
177 | reporter.setStatus("creating " + name);
178 |
179 | // write to temp file initially to permit parallel execution
180 | Path tempFile = new Path(DATA_DIR, name + suffix);
181 | OutputStream out = fs.create(tempFile);
182 |
183 | long written = 0;
184 | try {
185 | while (written < size) {
186 | if (fastCheck) {
187 | Arrays.fill(buffer, (byte) random.nextInt(Byte.MAX_VALUE));
188 | } else {
189 | random.nextBytes(buffer);
190 | }
191 | long remains = size - written;
192 | int length = (remains <= buffer.length) ? (int) remains : buffer.length;
193 | out.write(buffer, 0, length);
194 | written += length;
195 | reporter.setStatus("writing " + name + "@" + written + "/" + size);
196 | }
197 | } finally {
198 | out.close();
199 | }
200 | // rename to final location
201 | fs.rename(tempFile, new Path(DATA_DIR, name));
202 |
203 | collector.collect(new Text("bytes"), new LongWritable(written));
204 |
205 | reporter.setStatus("wrote " + name);
206 | }
207 |
208 | public void close() {
209 | }
210 |
211 | }
212 |
213 | public static void writeTest(FileSystem fs, boolean fastCheck)
214 | throws Exception {
215 |
216 | fs.delete(DATA_DIR, true);
217 | fs.delete(WRITE_DIR, true);
218 |
219 | JobConf job = new JobConf(conf, TestFileSystem.class);
220 | job.setBoolean("fs.test.fastCheck", fastCheck);
221 |
222 | FileInputFormat.setInputPaths(job, CONTROL_DIR);
223 | job.setInputFormat(SequenceFileInputFormat.class);
224 |
225 | job.setMapperClass(WriteMapper.class);
226 | job.setReducerClass(LongSumReducer.class);
227 |
228 | FileOutputFormat.setOutputPath(job, WRITE_DIR);
229 | job.setOutputKeyClass(Text.class);
230 | job.setOutputValueClass(LongWritable.class);
231 | job.setNumReduceTasks(1);
232 | JobClient.runJob(job);
233 | }
234 |
235 | public static class ReadMapper extends Configured
236 | implements Mapper {
237 |
238 | private Random random = new Random();
239 | private byte[] buffer = new byte[BUFFER_SIZE];
240 | private byte[] check = new byte[BUFFER_SIZE];
241 | private FileSystem fs;
242 | private boolean fastCheck;
243 |
244 | {
245 | try {
246 | fs = SnackFS.get(conf);
247 | } catch (IOException e) {
248 | throw new RuntimeException(e);
249 | }
250 | }
251 |
252 | public ReadMapper() {
253 | super(null);
254 | }
255 |
256 | public ReadMapper(Configuration conf) {
257 | super(conf);
258 | }
259 |
260 | public void configure(JobConf job) {
261 | setConf(job);
262 | fastCheck = job.getBoolean("fs.test.fastCheck", false);
263 | }
264 |
265 | public void map(Text key, LongWritable value,
266 | OutputCollector collector,
267 | Reporter reporter)
268 | throws IOException {
269 |
270 | String name = key.toString();
271 | long size = value.get();
272 | long seed = Long.parseLong(name);
273 |
274 | random.setSeed(seed);
275 | reporter.setStatus("opening " + name);
276 |
277 | DataInputStream in =
278 | new DataInputStream(fs.open(new Path(DATA_DIR, name)));
279 |
280 | long read = 0;
281 | try {
282 | while (read < size) {
283 | long remains = size - read;
284 | int n = (remains <= buffer.length) ? (int) remains : buffer.length;
285 | in.readFully(buffer, 0, n);
286 | read += n;
287 | if (fastCheck) {
288 | Arrays.fill(check, (byte) random.nextInt(Byte.MAX_VALUE));
289 | } else {
290 | random.nextBytes(check);
291 | }
292 | if (n != buffer.length) {
293 | Arrays.fill(buffer, n, buffer.length, (byte) 0);
294 | Arrays.fill(check, n, check.length, (byte) 0);
295 | }
296 | assertTrue(Arrays.equals(buffer, check));
297 |
298 | reporter.setStatus("reading " + name + "@" + read + "/" + size);
299 |
300 | }
301 | } finally {
302 | in.close();
303 | }
304 |
305 | collector.collect(new Text("bytes"), new LongWritable(read));
306 |
307 | reporter.setStatus("read " + name);
308 | }
309 |
310 | public void close() {
311 | }
312 |
313 | }
314 |
315 | public static void readTest(FileSystem fs, boolean fastCheck)
316 | throws Exception {
317 |
318 | fs.delete(READ_DIR, true);
319 |
320 | JobConf job = new JobConf(conf, TestFileSystem.class);
321 | job.setBoolean("fs.test.fastCheck", fastCheck);
322 |
323 |
324 | FileInputFormat.setInputPaths(job, CONTROL_DIR);
325 | job.setInputFormat(SequenceFileInputFormat.class);
326 |
327 | job.setMapperClass(ReadMapper.class);
328 | job.setReducerClass(LongSumReducer.class);
329 |
330 | FileOutputFormat.setOutputPath(job, READ_DIR);
331 | job.setOutputKeyClass(Text.class);
332 | job.setOutputValueClass(LongWritable.class);
333 | job.setNumReduceTasks(1);
334 | JobClient.runJob(job);
335 | }
336 |
337 |
338 | public static class SeekMapper extends Configured
339 | implements Mapper {
340 |
341 | private Random random = new Random();
342 | private byte[] check = new byte[BUFFER_SIZE];
343 | private FileSystem fs;
344 | private boolean fastCheck;
345 |
346 | {
347 | try {
348 | fs = SnackFS.get(conf);
349 | } catch (IOException e) {
350 | throw new RuntimeException(e);
351 | }
352 | }
353 |
354 | public SeekMapper() {
355 | super(null);
356 | }
357 |
358 | public SeekMapper(Configuration conf) {
359 | super(conf);
360 | }
361 |
362 | public void configure(JobConf job) {
363 | setConf(job);
364 | fastCheck = job.getBoolean("fs.test.fastCheck", false);
365 | }
366 |
367 | public void map(Text key, LongWritable value,
368 | OutputCollector collector,
369 | Reporter reporter)
370 | throws IOException {
371 | String name = key.toString();
372 | long size = value.get();
373 | long seed = Long.parseLong(name);
374 |
375 | if (size == 0) return;
376 |
377 | reporter.setStatus("opening " + name);
378 |
379 | FSDataInputStream in = fs.open(new Path(DATA_DIR, name));
380 |
381 | try {
382 | for (int i = 0; i < SEEKS_PER_FILE; i++) {
383 | // generate a random position
384 | long position = Math.abs(random.nextLong()) % size;
385 |
386 | // seek file to that position
387 | reporter.setStatus("seeking " + name);
388 | in.seek(position);
389 | byte b = in.readByte();
390 |
391 | // check that byte matches
392 | byte checkByte = 0;
393 | // advance random state to that position
394 | random.setSeed(seed);
395 | for (int p = 0; p <= position; p += check.length) {
396 | reporter.setStatus("generating data for " + name);
397 | if (fastCheck) {
398 | checkByte = (byte) random.nextInt(Byte.MAX_VALUE);
399 | } else {
400 | random.nextBytes(check);
401 | checkByte = check[(int) (position % check.length)];
402 | }
403 | }
404 | assertEquals(b, checkByte);
405 | }
406 | } finally {
407 | in.close();
408 | }
409 | }
410 |
411 | public void close() {
412 | }
413 |
414 | }
415 |
416 | public static void seekTest(FileSystem fs, boolean fastCheck)
417 | throws Exception {
418 |
419 | fs.delete(READ_DIR, true);
420 |
421 | JobConf job = new JobConf(conf, TestFileSystem.class);
422 | job.setBoolean("fs.test.fastCheck", fastCheck);
423 |
424 | FileInputFormat.setInputPaths(job, CONTROL_DIR);
425 | job.setInputFormat(SequenceFileInputFormat.class);
426 |
427 | job.setMapperClass(SeekMapper.class);
428 | job.setReducerClass(LongSumReducer.class);
429 |
430 | FileOutputFormat.setOutputPath(job, READ_DIR);
431 | job.setOutputKeyClass(Text.class);
432 | job.setOutputValueClass(LongWritable.class);
433 | job.setNumReduceTasks(1);
434 | JobClient.runJob(job);
435 | }
436 |
437 |
438 | public static void main(String[] args) throws Exception {
439 | int megaBytes = 10;
440 | int files = 100;
441 | boolean noRead = false;
442 | boolean noWrite = false;
443 | boolean noSeek = false;
444 | boolean fastCheck = false;
445 | long seed = new Random().nextLong();
446 |
447 | String usage = "Usage: TestFileSystem -files N -megaBytes M [-noread] [-nowrite] [-noseek] [-fastcheck]";
448 |
449 | if (args.length == 0) {
450 | System.err.println(usage);
451 | System.exit(-1);
452 | }
453 | for (int i = 0; i < args.length; i++) { // parse command line
454 | if (args[i].equals("-files")) {
455 | files = Integer.parseInt(args[++i]);
456 | } else if (args[i].equals("-megaBytes")) {
457 | megaBytes = Integer.parseInt(args[++i]);
458 | } else if (args[i].equals("-noread")) {
459 | noRead = true;
460 | } else if (args[i].equals("-nowrite")) {
461 | noWrite = true;
462 | } else if (args[i].equals("-noseek")) {
463 | noSeek = true;
464 | } else if (args[i].equals("-fastcheck")) {
465 | fastCheck = true;
466 | }
467 | }
468 |
469 | LOG.info("seed = " + seed);
470 | LOG.info("files = " + files);
471 | LOG.info("megaBytes = " + megaBytes);
472 |
473 | FileSystem fs = SnackFS.get(conf);
474 |
475 | if (!noWrite) {
476 | createControlFile(fs, megaBytes * MEGA, files, seed);
477 | writeTest(fs, fastCheck);
478 | }
479 | if (!noRead) {
480 | readTest(fs, fastCheck);
481 | }
482 | if (!noSeek) {
483 | seekTest(fs, fastCheck);
484 | }
485 | }
486 |
487 | public void testFsCache() throws Exception {
488 | {
489 | long now = System.currentTimeMillis();
490 | String[] users = new String[]{"foo", "bar"};
491 | final Configuration conf = new Configuration();
492 | FileSystem[] fs = new FileSystem[users.length];
493 |
494 | for (int i = 0; i < users.length; i++) {
495 | UserGroupInformation ugi = UserGroupInformation.createRemoteUser(users[i]);
496 | fs[i] = ugi.doAs(new PrivilegedExceptionAction() {
497 | public FileSystem run() throws IOException {
498 | return SnackFS.get(conf);
499 | }
500 | });
501 | for (int j = 0; j < i; j++) {
502 | assertFalse(fs[j] == fs[i]);
503 | }
504 | }
505 | FileSystem.closeAll();
506 | }
507 |
508 | {
509 | try {
510 | runTestCache(NameNode.DEFAULT_PORT);
511 | } catch (java.net.BindException be) {
512 | LOG.warn("Cannot test NameNode.DEFAULT_PORT (="
513 | + NameNode.DEFAULT_PORT + ")", be);
514 | }
515 |
516 | runTestCache(0);
517 | }
518 | }
519 |
520 | static void runTestCache(int port) throws Exception {
521 | Configuration conf = new Configuration();
522 | MiniDFSCluster cluster = null;
523 | try {
524 | cluster = new MiniDFSCluster(port, conf, 2, true, true, null, null);
525 | URI uri = cluster.getFileSystem().getUri();
526 | LOG.info("uri=" + uri);
527 |
528 | {
529 | FileSystem fs = SnackFS.get(uri, new Configuration());
530 | checkPath(cluster, fs);
531 | for (int i = 0; i < 100; i++) {
532 | assertTrue(fs == SnackFS.get(uri, new Configuration()));
533 | }
534 | }
535 |
536 | if (port == NameNode.DEFAULT_PORT) {
537 | //test explicit default port
538 | URI uri2 = new URI(uri.getScheme(), uri.getUserInfo(),
539 | uri.getHost(), NameNode.DEFAULT_PORT, uri.getPath(),
540 | uri.getQuery(), uri.getFragment());
541 | LOG.info("uri2=" + uri2);
542 | FileSystem fs = SnackFS.get(uri2, conf);
543 | checkPath(cluster, fs);
544 | for (int i = 0; i < 100; i++) {
545 | assertTrue(fs == SnackFS.get(uri2, new Configuration()));
546 | }
547 | }
548 | } finally {
549 | if (cluster != null) cluster.shutdown();
550 | }
551 | }
552 |
553 | static void checkPath(MiniDFSCluster cluster, FileSystem fileSys) throws IOException {
554 | InetSocketAddress add = cluster.getNameNode().getNameNodeAddress();
555 | // Test upper/lower case
556 | fileSys.checkPath(new Path("hdfs://" + add.getHostName().toUpperCase() + ":" + add.getPort()));
557 | }
558 |
559 | public void testFsClose() throws Exception {
560 | {
561 | Configuration conf = new Configuration();
562 | new Path("file:///").getFileSystem(conf);
563 | FileSystem.closeAll();
564 | }
565 |
566 | {
567 | Configuration conf = new Configuration();
568 | new Path("hftp://localhost:12345/").getFileSystem(conf);
569 | FileSystem.closeAll();
570 | }
571 |
572 | {
573 | Configuration conf = new Configuration();
574 | FileSystem fs = new Path("hftp://localhost:12345/").getFileSystem(conf);
575 | FileSystem.closeAll();
576 | }
577 | }
578 |
579 | public void testFsShutdownHook() throws Exception {
580 | final Set closed = Collections.synchronizedSet(new HashSet());
581 | Configuration conf = new Configuration();
582 | Configuration confNoAuto = new Configuration();
583 |
584 | conf.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class);
585 | confNoAuto.setClass("fs.test.impl", TestShutdownFileSystem.class, FileSystem.class);
586 | confNoAuto.setBoolean("fs.automatic.close", false);
587 |
588 | TestShutdownFileSystem fsWithAuto =
589 | (TestShutdownFileSystem) (new Path("test://a/").getFileSystem(conf));
590 | TestShutdownFileSystem fsWithoutAuto =
591 | (TestShutdownFileSystem) (new Path("test://b/").getFileSystem(confNoAuto));
592 |
593 | fsWithAuto.setClosedSet(closed);
594 | fsWithoutAuto.setClosedSet(closed);
595 |
596 | // Different URIs should result in different FS instances
597 | assertNotSame(fsWithAuto, fsWithoutAuto);
598 |
599 | FileSystem.CACHE.closeAll(true);
600 | assertEquals(1, closed.size());
601 | assertTrue(closed.contains(fsWithAuto));
602 |
603 | closed.clear();
604 |
605 | FileSystem.closeAll();
606 | assertEquals(1, closed.size());
607 | assertTrue(closed.contains(fsWithoutAuto));
608 | }
609 |
610 |
611 | public void testCacheKeysAreCaseInsensitive()
612 | throws Exception {
613 | Configuration conf = new Configuration();
614 |
615 | // check basic equality
616 | FileSystem.Cache.Key lowercaseCachekey1 = new SnackFS.Cache.Key(new URI("hftp://localhost:12345/"), conf);
617 | FileSystem.Cache.Key lowercaseCachekey2 = new SnackFS.Cache.Key(new URI("hftp://localhost:12345/"), conf);
618 | assertEquals(lowercaseCachekey1, lowercaseCachekey2);
619 |
620 | // check insensitive equality
621 | FileSystem.Cache.Key uppercaseCachekey = new SnackFS.Cache.Key(new URI("HFTP://Localhost:12345/"), conf);
622 | assertEquals(lowercaseCachekey2, uppercaseCachekey);
623 |
624 | // check behaviour with collections
625 | List list = new ArrayList();
626 | list.add(uppercaseCachekey);
627 | assertTrue(list.contains(uppercaseCachekey));
628 | assertTrue(list.contains(lowercaseCachekey2));
629 |
630 | Set set = new HashSet();
631 | set.add(uppercaseCachekey);
632 | assertTrue(set.contains(uppercaseCachekey));
633 | assertTrue(set.contains(lowercaseCachekey2));
634 |
635 | Map map = new HashMap();
636 | map.put(uppercaseCachekey, "");
637 | assertTrue(map.containsKey(uppercaseCachekey));
638 | assertTrue(map.containsKey(lowercaseCachekey2));
639 |
640 | }
641 |
642 | public static void testFsUniqueness(long megaBytes, int numFiles, long seed)
643 | throws Exception {
644 |
645 | // multiple invocations of FileSystem.get return the same object.
646 | FileSystem fs1 = SnackFS.get(conf);
647 | FileSystem fs2 = SnackFS.get(conf);
648 | assertTrue(fs1 == fs2);
649 |
650 | // multiple invocations of FileSystem.newInstance return different objects
651 | fs1 = SnackFS.newInstance(conf);
652 | fs2 = SnackFS.newInstance(conf);
653 | assertTrue(fs1 != fs2 && !fs1.equals(fs2));
654 | fs1.close();
655 | fs2.close();
656 | }
657 |
658 | public static class TestShutdownFileSystem extends RawLocalFileSystem {
659 | private Set closedSet;
660 |
661 | public void setClosedSet(Set closedSet) {
662 | this.closedSet = closedSet;
663 | }
664 |
665 | public void close() throws IOException {
666 | if (closedSet != null) {
667 | closedSet.add(this);
668 | }
669 | super.close();
670 | }
671 | }
672 | }*/
--------------------------------------------------------------------------------
/src/test/resources/vsmall.txt:
--------------------------------------------------------------------------------
1 | The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete
2 | by Mark Twain (Samuel Clemens)
3 |
4 | This eBook is for the use of anyone anywhere at no cost and with almost
5 | no restrictions whatsoever. You may copy it, give it away or re-use
6 | it under the terms of the Project Gutenberg License included with this
7 | eBook or online at www.gutenberg.net
8 |
9 | Title: Adventures of Huckleberry Finn, Complete
10 |
11 | Author: Mark Twain (Samuel Clemens)
12 |
13 | Release Date: August 20, 2006 [EBook #76]
14 |
15 | Last Updated: October 20, 2012]
16 |
17 | Language: English
18 |
19 |
20 | *** START OF THIS PROJECT GUTENBERG EBOOK HUCKLEBERRY FINN ***
21 |
22 | Produced by David Widger
--------------------------------------------------------------------------------
/src/test/scala/com/tuplejump/snackfs/SnackFSSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs
20 |
21 | import org.scalatest.{BeforeAndAfterAll, FlatSpec}
22 | import org.scalatest.matchers.MustMatchers
23 | import java.net.URI
24 | import org.apache.hadoop.conf.Configuration
25 | import org.apache.hadoop.fs.Path
26 | import java.io.{FileNotFoundException, IOException}
27 | import java.util.Date
28 | import org.apache.commons.lang3.RandomStringUtils
29 |
30 | class SnackFSSpec extends FlatSpec with BeforeAndAfterAll with MustMatchers {
31 |
32 | val isTrue = true
33 | val isFalse = false
34 |
35 | val fs = SnackFS()
36 | val uri = URI.create("snackfs://localhost:9000")
37 | fs.initialize(uri, new Configuration())
38 |
39 | val timestamp = new Date()
40 | val basePath = "/test" + timestamp.getTime
41 |
42 | it should "create a new filesystem with given store" in {
43 | fs.getUri must be(uri)
44 | val user = System.getProperty("user.name", "none")
45 | fs.getWorkingDirectory must be(new Path("snackfs://localhost:9000/user/" + user))
46 | }
47 |
48 | it should "add a directory" in {
49 | val result = fs.mkdirs(new Path(basePath + "/mytestdir"))
50 | assert(result === isTrue)
51 | }
52 |
53 | it should "create an entry for a file" in {
54 | val fsData = fs.create(new Path(basePath + "/home/Downloads/JSONParser.js"))
55 | fsData.write("SOME CONTENT".getBytes)
56 | val position = fsData.getPos
57 | position must be(12)
58 | }
59 |
60 | it should "result in false when trying to add an existing file as a directory" in {
61 | val fsData = fs.create(new Path(basePath + "/home/Downloads/someTest"))
62 | fsData.write("SOME CONTENT".getBytes)
63 | fsData.close()
64 | val path = new Path(basePath + "/home/Downloads/someTest")
65 | fs.mkdirs(path) must be(isFalse)
66 | }
67 |
68 | it should "allow to read from a file" in {
69 | val fsData = fs.create(new Path(basePath + "/home/Downloads/random"))
70 | fsData.write("SOME CONTENT".getBytes)
71 | fsData.close()
72 |
73 | val is = fs.open(new Path(basePath + "/home/Downloads/random"))
74 | var dataArray = new Array[Byte](12)
75 | is.readFully(0, dataArray)
76 | is.close()
77 |
78 | val result = new String(dataArray)
79 | result must be("SOME CONTENT")
80 | }
81 |
82 | it should "throw an exception when trying to open a directory" in {
83 | val path = new Path(basePath + "/test")
84 | fs.mkdirs(path)
85 | val exception = intercept[IOException] {
86 | fs.open(path)
87 | }
88 | exception.getMessage must be("Path %s is a directory.".format(path))
89 | }
90 |
91 | it should "throw an exception when trying to open a file which doesn't exist" in {
92 | val path = new Path(basePath + "/newFile")
93 | val exception = intercept[IOException] {
94 | fs.open(path)
95 | }
96 | exception.getMessage must be("No such file.")
97 | }
98 |
99 | it should "get file status" in {
100 | val path = new Path(basePath + "/home/Downloads/testStatus")
101 | val fsData = fs.create(path)
102 | fsData.write("SOME CONTENT".getBytes)
103 | fsData.close()
104 |
105 | val status = fs.getFileStatus(path)
106 | !status.isDir must be(isTrue)
107 | status.getLen must be(12)
108 | status.getPath must be(path)
109 | }
110 |
111 | /* it should "get file block locations" in {
112 | val path = new Path("/home/Downloads/testLocations")
113 | val fsData = fs.create(path)
114 | fsData.write("This is a test to check the block location details".getBytes)
115 | fsData.write("This is a test to check the block location details".getBytes)
116 | fsData.write("This is a test to check the block location details".getBytes)
117 | fsData.write("This is a test to check the block location details".getBytes)
118 | fsData.write("This is a test to check the block location details".getBytes)
119 |
120 | fsData.close()
121 |
122 | val status = fs.getFileStatus(path)
123 | val locations = fs.getFileBlockLocations(status, 0, 10)
124 | assert(locations(0).getLength === 250)
125 | } */
126 |
127 | it should "list all files/directories within the given directory" in {
128 | val dirPath1 = new Path(basePath + "/tmp/user")
129 | fs.mkdirs(dirPath1)
130 | val dirPath2 = new Path(basePath + "/tmp/local")
131 | fs.mkdirs(dirPath2)
132 |
133 | val filePath1 = new Path(basePath + "/tmp/testFile")
134 | val fileData1 = fs.create(filePath1)
135 | fileData1.write("This is a test to check list functionality".getBytes)
136 | fileData1.close()
137 |
138 | val filePath2 = new Path(basePath + "/tmp/user/file")
139 | val fileData2 = fs.create(filePath2)
140 | fileData2.write("This is a test to check list functionality".getBytes)
141 | fileData2.close()
142 |
143 | val baseDirPath = new Path(basePath + "/tmp")
144 | val result = fs.listStatus(baseDirPath)
145 | result.length must be(3)
146 | result.filter(!_.isDir).length must be(1)
147 | result.filter(_.isDir).length must be(2)
148 | }
149 |
150 | it should "delete all files/directories within the given directory" in {
151 | val dirPath1 = new Path(basePath + "/tmp1/user1")
152 | fs.mkdirs(dirPath1)
153 | val dirPath2 = new Path(basePath + "/tmp1/local1")
154 | fs.mkdirs(dirPath2)
155 |
156 | val filePath1 = new Path(basePath + "/tmp1/testFile1")
157 | val fileData1 = fs.create(filePath1)
158 | fileData1.write("This is a test to check delete functionality".getBytes)
159 | fileData1.close()
160 |
161 | val filePath2 = new Path(basePath + "/tmp1/user1/file")
162 | val fileData2 = fs.create(filePath2)
163 | fileData2.write("This is a test to check delete functionality".getBytes)
164 | fileData2.close()
165 |
166 | val dirStatus = fs.getFileStatus(dirPath2)
167 | dirStatus.isDir must be(isTrue)
168 |
169 | val baseDirPath = new Path(basePath + "/tmp1")
170 | val result = fs.delete(baseDirPath, isTrue)
171 | result must be(isTrue)
172 |
173 | val exception1 = intercept[FileNotFoundException] {
174 | fs.getFileStatus(dirPath2)
175 | }
176 | exception1.getMessage must be("No such file exists")
177 |
178 | val exception2 = intercept[FileNotFoundException] {
179 | fs.getFileStatus(filePath2)
180 | }
181 | exception2.getMessage must be("No such file exists")
182 |
183 | val exception3 = intercept[FileNotFoundException] {
184 | fs.getFileStatus(baseDirPath)
185 | }
186 | exception3.getMessage must be("No such file exists")
187 |
188 | }
189 |
190 | it should "rename a file" in {
191 | val filePath1 = new Path(basePath + "/tmp2/testRename")
192 | val fileData1 = fs.create(filePath1)
193 | fileData1.write("This is a test to check rename functionality".getBytes)
194 | fileData1.close()
195 |
196 | val filePath2 = new Path(basePath + "/tmp2/newName")
197 |
198 | val result = fs.rename(filePath1, filePath2)
199 |
200 | result must be(isTrue)
201 |
202 | val exception2 = intercept[FileNotFoundException] {
203 | fs.getFileStatus(filePath1)
204 | }
205 | exception2.getMessage must be("No such file exists")
206 |
207 | val fileStatus = fs.getFileStatus(filePath2)
208 | !fileStatus.isDir must be(isTrue)
209 | }
210 |
211 | it should "rename a directory" in {
212 | val dirPath1 = new Path(basePath + "/abc/user")
213 | fs.mkdirs(dirPath1)
214 | val dirPath2 = new Path(basePath + "/abc/local")
215 | fs.mkdirs(dirPath2)
216 |
217 | val filePath1 = new Path(basePath + "/abc/testfile")
218 | val fileData1 = fs.create(filePath1)
219 | fileData1.write("This is a test to check rename functionality".getBytes)
220 | fileData1.close()
221 |
222 | val filePath2 = new Path(basePath + "/abc/jkl/testfile")
223 | val fileData2 = fs.create(filePath2)
224 | fileData2.write("This is a test to check rename functionality".getBytes)
225 | fileData2.close()
226 |
227 | val baseDirPath = new Path(basePath + "/abc")
228 | val dirStatus1 = fs.listStatus(baseDirPath)
229 | dirStatus1.filter(!_.isDir).length must be(1)
230 |
231 | fs.mkdirs(new Path(basePath + "/pqr"))
232 | fs.rename(baseDirPath, new Path(basePath + "/pqr/lmn"))
233 |
234 | val dirStatus = fs.listStatus(new Path(basePath + "/pqr/lmn"))
235 | dirStatus.filter(!_.isDir).length must be(1)
236 | dirStatus.filter(_.isDir).length must be(3)
237 |
238 | val fileStatus2 = fs.getFileStatus(new Path(basePath + "/pqr/lmn/jkl/testfile"))
239 | !fileStatus2.isDir must be(isTrue)
240 | }
241 |
242 | it should "be able to get locations for all blocks in a file" in {
243 | val path = new Path("/home/Downloads/testBlockLocations")
244 | val fsData = fs.create(path)
245 |
246 | println("Generating test data, this may take a few minutes, please wait . . .")
247 |
248 | val fileSize = 254 * 1024 * 1024
249 | val iters = fileSize / 20000
250 | val strToWrite: String = RandomStringUtils.randomAscii(20000) + "\n"
251 |
252 | 1L to iters foreach {
253 | i =>
254 | fsData.write(strToWrite.getBytes())
255 | }
256 |
257 | fsData.close()
258 |
259 | println("Data generated!")
260 |
261 | val status = fs.getFileStatus(path)
262 | val locations = fs.getFileBlockLocations(status, 0, status.getLen)
263 |
264 | assert(locations.size === 2)
265 | locations.foreach {
266 | block =>
267 | block.getHosts.size must be(3)
268 | }
269 | }
270 |
271 |
272 | override def afterAll() = {
273 | //remove the test directory
274 | fs.delete(new Path(basePath), isRecursive = true)
275 | }
276 | }
277 |
--------------------------------------------------------------------------------
/src/test/scala/com/tuplejump/snackfs/cassandra/store/ThriftStoreSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.cassandra.store
20 |
21 | import scala.concurrent.Await
22 |
23 | import org.scalatest.{BeforeAndAfterAll, FlatSpec}
24 | import org.apache.hadoop.fs.permission.FsPermission
25 | import java.util.UUID
26 | import java.net.URI
27 | import org.apache.hadoop.fs.Path
28 | import org.apache.cassandra.utils.ByteBufferUtil
29 | import org.apache.commons.io.IOUtils
30 | import org.scalatest.matchers.MustMatchers
31 | import org.apache.cassandra.thrift.NotFoundException
32 | import org.apache.hadoop.conf.Configuration
33 | import com.tuplejump.snackfs.cassandra.model.{SnackFSConfiguration, GenericOpSuccess, Keyspace}
34 | import com.tuplejump.snackfs.fs.model._
35 |
36 | class ThriftStoreSpec extends FlatSpec with BeforeAndAfterAll with MustMatchers {
37 |
38 | val configuration = new Configuration()
39 | configuration.set("snackfs.keyspace", "STORE")
40 | val snackFSConfiguration = SnackFSConfiguration.get(configuration)
41 | val store = new ThriftStore(snackFSConfiguration)
42 | store.init
43 |
44 | val timestamp = System.currentTimeMillis()
45 | val subBlocks = List(SubBlockMeta(UUID.randomUUID, 0, 128), SubBlockMeta(UUID.randomUUID, 128, 128))
46 | val block1 = BlockMeta(UUID.randomUUID, 0, 256, subBlocks)
47 | val block2 = BlockMeta(UUID.randomUUID, 0, 256, subBlocks)
48 | val blocks = List(block1, block2)
49 | val pathURI = URI.create("testFile.txt")
50 | val path = new Path(pathURI)
51 | val iNode = INode("user", "group", FsPermission.getDefault, FileType.FILE, blocks, timestamp)
52 |
53 | val subBlockMeta1 = SubBlockMeta(UUID.randomUUID, 0, 128)
54 | val data = ByteBufferUtil.bytes("Test to store subBLock")
55 |
56 | it should "create a keyspace with name STORE" in {
57 | val ks = store.createKeyspace
58 | val status = Await.result(ks, snackFSConfiguration.atMost)
59 | assert(status.isInstanceOf[Keyspace])
60 | }
61 |
62 | /* it should "set keyspace to STORE" in {
63 | val result = Await.result(store.init, snackFSConfiguration.atMost)
64 | assert(result.isInstanceOf[Unit])
65 | } */
66 |
67 | it should "create a INode" in {
68 | val response = store.storeINode(path, iNode)
69 | val responseValue: GenericOpSuccess = Await.result(response, snackFSConfiguration.atMost)
70 | assert(responseValue === GenericOpSuccess())
71 | }
72 |
73 |
74 | it should "fetch created INode" in {
75 | val response = store.retrieveINode(path)
76 | val result: INode = Await.result(response, snackFSConfiguration.atMost)
77 | assert(result === iNode)
78 | }
79 |
80 | it should "fetch created subBlock" in {
81 | Await.ready(store.storeSubBlock(block1.id, subBlockMeta1, data), snackFSConfiguration.atMost)
82 | val storeResponse = store.retrieveSubBlock(block1.id, subBlockMeta1.id, 0)
83 | val response = Await.result(storeResponse, snackFSConfiguration.atMost)
84 | val responseString = new String(IOUtils.toByteArray(response))
85 | responseString must be(new String(data.array()))
86 | }
87 |
88 | it should "delete all the blocks of an Inode" in {
89 | val blockId = UUID.randomUUID
90 | val blockIdSecond = UUID.randomUUID
91 |
92 | val subBlock = SubBlockMeta(UUID.randomUUID, 0, 128)
93 | val subBlockSecond = SubBlockMeta(UUID.randomUUID, 0, 128)
94 |
95 | Await.result(store.storeSubBlock(blockId, subBlock, ByteBufferUtil.bytes("Random test data")), snackFSConfiguration.atMost)
96 | Await.result(store.storeSubBlock(blockIdSecond, subBlockSecond, ByteBufferUtil.bytes("Random test data")), snackFSConfiguration.atMost)
97 |
98 | val blockMeta = BlockMeta(blockId, 0, 0, List(subBlock))
99 | val blockMetaSecond = BlockMeta(blockId, 0, 0, List(subBlock))
100 |
101 | val subBlockData = Await.result(store.retrieveSubBlock(blockMeta.id, subBlock.id, 0), snackFSConfiguration.atMost)
102 | val dataString = new String(IOUtils.toByteArray(subBlockData))
103 | dataString must be("Random test data")
104 |
105 | val iNode = INode("user", "group", FsPermission.getDefault, FileType.FILE, List(blockMeta, blockMetaSecond), timestamp)
106 |
107 | Await.ready(store.deleteBlocks(iNode), snackFSConfiguration.atMost)
108 |
109 | val exception = intercept[NotFoundException] {
110 | Await.result(store.retrieveSubBlock(blockMeta.id, subBlock.id, 0), snackFSConfiguration.atMost)
111 | }
112 | assert(exception.getMessage === null)
113 | }
114 |
115 | it should "fetch all sub-paths" in {
116 | val path1 = new Path("/tmp")
117 | val iNode1 = INode("user", "group", FsPermission.getDefault, FileType.DIRECTORY, null, timestamp)
118 | Await.ready(store.storeINode(path1, iNode1), snackFSConfiguration.atMost)
119 |
120 | val path2 = new Path("/tmp/user")
121 | Await.ready(store.storeINode(path2, iNode1), snackFSConfiguration.atMost)
122 |
123 | val path3 = new Path("/tmp/user/file")
124 | Await.ready(store.storeINode(path3, iNode), snackFSConfiguration.atMost)
125 |
126 | val result = Await.result(store.fetchSubPaths(path1, isDeepFetch = true), snackFSConfiguration.atMost)
127 | //println(result.toString())
128 |
129 | result.size must be(2)
130 | }
131 |
132 | it should "fetch sub-paths" in {
133 | val path1 = new Path("/tmp")
134 | val iNode1 = INode("user", "group", FsPermission.getDefault, FileType.DIRECTORY, null, timestamp)
135 | Await.ready(store.storeINode(path1, iNode1), snackFSConfiguration.atMost)
136 |
137 | val path2 = new Path("/tmp/user")
138 | Await.ready(store.storeINode(path2, iNode1), snackFSConfiguration.atMost)
139 |
140 | val path3 = new Path("/tmp/user/file")
141 | Await.ready(store.storeINode(path3, iNode), snackFSConfiguration.atMost)
142 |
143 | val result = Await.result(store.fetchSubPaths(path1, isDeepFetch = false), snackFSConfiguration.atMost)
144 | //println(result.toString())
145 |
146 | result.size must be(1)
147 | }
148 |
149 | it should "get block locations" in {
150 | val path1: Path = new Path("/tmp/user/file")
151 |
152 | val inode = Await.result(store.retrieveINode(path1), snackFSConfiguration.atMost)
153 |
154 | val map = Await.result(store.getBlockLocations(path1), snackFSConfiguration.atMost)
155 |
156 | map.size must be(inode.blocks.size)
157 |
158 | }
159 |
160 | /* createlock related -- locking a file so that another process cannot write to it*/
161 | it should "get lock when attempting for a file for the first time" in {
162 | val processId = UUID.randomUUID()
163 | val lockFuture = store.acquireFileLock(new Path("/testLock1"), processId)
164 | val result = Await.result(lockFuture, snackFSConfiguration.atMost)
165 | result must be(true)
166 | }
167 |
168 | it should "not get lock when attempting for a file from another process if lock is not released" in {
169 | val processId = UUID.randomUUID()
170 |
171 | val lockFuture = store.acquireFileLock(new Path("/testLock2"), processId)
172 | val result = Await.result(lockFuture, snackFSConfiguration.atMost)
173 | result must be(true)
174 |
175 | val processId2 = UUID.randomUUID()
176 | val lockFuture2 = store.acquireFileLock(new Path("/testLock2"), processId2)
177 | val result2 = Await.result(lockFuture2, snackFSConfiguration.atMost)
178 | result2 must be(false)
179 | }
180 |
181 | it should "release lock on which was acquired" in {
182 | val processId = UUID.randomUUID()
183 | val lockFuture = store.acquireFileLock(new Path("/testLock3"), processId)
184 | val lockResult = Await.result(lockFuture, snackFSConfiguration.atMost)
185 | lockResult must be(true)
186 |
187 | val releaseFuture = store.releaseFileLock(new Path("/testLock3"))
188 | val releaseResult = Await.result(releaseFuture, snackFSConfiguration.atMost)
189 | releaseResult must be(true)
190 | }
191 |
192 | it should "get lock after a process has acquired and released it" in {
193 | val processId = UUID.randomUUID()
194 |
195 | val lockFuture = store.acquireFileLock(new Path("/testLock4"), processId )
196 | val lockResult = Await.result(lockFuture, snackFSConfiguration.atMost)
197 | lockResult must be(true)
198 |
199 | val releaseFuture = store.releaseFileLock(new Path("/testLock4"))
200 | val releaseResult = Await.result(releaseFuture, snackFSConfiguration.atMost)
201 | releaseResult must be(true)
202 |
203 | val processId2 = UUID.randomUUID()
204 | val lockFuture2 = store.acquireFileLock(new Path("/testLock4"), processId2)
205 | val lockResult2 = Await.result(lockFuture2, snackFSConfiguration.atMost)
206 | lockResult2 must be(true)
207 | }
208 |
209 | override def afterAll() = {
210 | Await.ready(store.dropKeyspace, snackFSConfiguration.atMost)
211 | store.disconnect()
212 | }
213 |
214 | }
215 |
--------------------------------------------------------------------------------
/src/test/scala/com/tuplejump/snackfs/fs/model/INodeSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.fs.model
20 |
21 | import java.util.UUID
22 | import org.apache.hadoop.fs.Path
23 | import java.net.URI
24 | import org.apache.hadoop.fs.permission.FsPermission
25 | import org.scalatest.FlatSpec
26 | import java.io.ByteArrayInputStream
27 |
28 | class INodeSpec extends FlatSpec {
29 |
30 | val timestamp = System.currentTimeMillis()
31 | val subBlocks = List(SubBlockMeta(UUID.randomUUID, 0, 128), SubBlockMeta(UUID.randomUUID, 128, 128))
32 | val blocks = List(BlockMeta(UUID.randomUUID, 0, 256, subBlocks), BlockMeta(UUID.randomUUID, 0, 256, subBlocks))
33 | val path = new Path(URI.create("jquery.fixedheadertable.min.js"))
34 | val iNode = INode("user", "group", FsPermission.getDefault, FileType.FILE, blocks, timestamp)
35 |
36 | it should "result in correct serialization for a file" in {
37 | val input = new ByteArrayInputStream(iNode.serialize.array)
38 | assert(iNode === INode.deserialize(input, timestamp))
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/test/scala/com/tuplejump/snackfs/fs/stream/FileSystemStreamSpec.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to Tuplejump Software Pvt. Ltd. under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. Tuplejump Software Pvt. Ltd. licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | *
18 | */
19 | package com.tuplejump.snackfs.fs.stream
20 |
21 | import org.scalatest.{BeforeAndAfterAll, FlatSpec}
22 | import java.net.URI
23 | import org.apache.hadoop.fs.Path
24 | import org.apache.cassandra.utils.ByteBufferUtil
25 | import scala.concurrent.Await
26 | import java.nio.file.{FileSystems, Files}
27 | import org.apache.commons.io.IOUtils
28 | import org.scalatest.matchers.MustMatchers
29 | import org.apache.cassandra.locator.SimpleStrategy
30 | import org.apache.hadoop.conf.Configuration
31 | import com.tuplejump.snackfs.cassandra.store.ThriftStore
32 | import com.tuplejump.snackfs.cassandra.model.SnackFSConfiguration
33 |
34 | class FileSystemStreamSpec extends FlatSpec with BeforeAndAfterAll with MustMatchers {
35 | val configuration = new Configuration()
36 | configuration.set("snackfs.keyspace", "STREAM")
37 | val snackFSConfiguration = SnackFSConfiguration.get(configuration)
38 |
39 | val store = new ThriftStore(snackFSConfiguration)
40 | store.init
41 |
42 | val replicationStrategy = classOf[SimpleStrategy].getCanonicalName
43 | Await.result(store.createKeyspace, snackFSConfiguration.atMost)
44 | //Await.result(store.init, snackFSConfiguration.atMost)
45 |
46 | it should "fetch data which is equal to actual data" in {
47 | val pathURI = URI.create("outputStream.txt")
48 | val path = new Path(pathURI)
49 | val dataString: String = "Test Subblock insertion"
50 | val data = ByteBufferUtil.bytes(dataString)
51 |
52 | val outputStream = FileSystemOutputStream(store, path, 30, 10, 10,snackFSConfiguration.atMost)
53 |
54 | outputStream.write(data.array(), 0, data.array().length)
55 | outputStream.close()
56 |
57 | val inode = Await.result(store.retrieveINode(path), snackFSConfiguration.atMost)
58 | assert(inode.blocks.length === 1)
59 |
60 | val blockData = store.retrieveBlock(inode.blocks(0))
61 | var outBuf: Array[Byte] = new Array[Byte](23)
62 | blockData.read(outBuf, 0, 23)
63 | assert(outBuf != null)
64 | new String(outBuf) must be(dataString)
65 | }
66 |
67 | it should "fetch data loaded from smaller(<2KB) file" in {
68 | val nioPath = FileSystems.getDefault.getPath("src/test/resources/vsmall.txt")
69 | val data = Files.readAllBytes(nioPath)
70 |
71 | //println("file size=" + data.length)
72 | val pathURI = URI.create("vsmall.txt")
73 | val path = new Path(pathURI)
74 | val maxBlockSize = 500
75 | val maxSubBlockSize = 50
76 | val outputStream = FileSystemOutputStream(store, path, maxBlockSize, maxSubBlockSize, data.length,snackFSConfiguration.atMost)
77 | outputStream.write(data, 0, data.length)
78 | outputStream.close()
79 |
80 | val inode = Await.result(store.retrieveINode(path), snackFSConfiguration.atMost)
81 | ////println("blocks=" + inode.blocks.length)
82 | val minSize: Int = data.length / maxBlockSize
83 | ////println(minSize)
84 | assert(inode.blocks.length >= minSize)
85 | var fetchedData: Array[Byte] = new Array[Byte](data.length)
86 | var offset = 0
87 | inode.blocks.foreach(block => {
88 | val blockData = store.retrieveBlock(block)
89 | val source = IOUtils.toByteArray(blockData)
90 | System.arraycopy(source, 0, fetchedData, offset, source.length)
91 | blockData.close()
92 | offset += block.length.asInstanceOf[Int]
93 | })
94 | //println("completed copy")
95 | new String(fetchedData) must be(new String(data))
96 | }
97 |
98 | it should "fetch data loaded from medium(~600KB) file" in {
99 | val nioPath = FileSystems.getDefault.getPath("src/test/resources/small.txt")
100 | val data = Files.readAllBytes(nioPath)
101 |
102 | val dataString = new java.lang.String(data)
103 |
104 | //println("file size=" + data.length)
105 | val pathURI = URI.create("small.txt")
106 | val path = new Path(pathURI)
107 | val maxBlockSize: Int = 30000
108 | val maxSubBlockSize = 3000
109 | val outputStream = FileSystemOutputStream(store, path, maxBlockSize, maxSubBlockSize, data.length,snackFSConfiguration.atMost)
110 | outputStream.write(data, 0, data.length)
111 | outputStream.close()
112 |
113 | val inode = Await.result(store.retrieveINode(path), snackFSConfiguration.atMost)
114 | //println("blocks=" + inode.blocks.length)
115 | val minSize: Int = data.length / maxBlockSize
116 | //println(minSize)
117 | assert(inode.blocks.length >= minSize)
118 |
119 | var fetchedData: Array[Byte] = Array[Byte]()
120 | var offset = 0
121 | inode.blocks.foreach(block => {
122 | val blockData = store.retrieveBlock(block)
123 | val source = IOUtils.toByteArray(blockData)
124 | blockData.close()
125 | fetchedData = fetchedData ++ source
126 | offset += source.length
127 | })
128 | //println("completed copy")
129 | val fetchedDataString = new String(fetchedData)
130 | fetchedData.length must be(data.length)
131 | fetchedDataString must be(dataString)
132 | }
133 |
134 | it should "result in small file (<2KB) data stored through outputstream when fetched from input stream " in {
135 | val nioPath = FileSystems.getDefault.getPath("src/test/resources/vsmall.txt")
136 | val data = Files.readAllBytes(nioPath)
137 |
138 | //println("file size=" + data.length)
139 | val pathURI = URI.create("vsmall.txt")
140 | val path = new Path(pathURI)
141 |
142 | val inode = FileSystemInputStream(store, path)
143 | var inodeData = new Array[Byte](data.length)
144 | inode.read(inodeData, 0, data.length)
145 | inode.close()
146 | //println("completed copy")
147 | //println(inodeData.length)
148 | new String(inodeData) must be(new String(data))
149 | }
150 |
151 | it should "result in medium file (~600KB)data stored through outputstream when fetched from input stream " in {
152 | val nioPath = FileSystems.getDefault.getPath("src/test/resources/small.txt")
153 | val data = Files.readAllBytes(nioPath)
154 |
155 | //println("file size=" + data.length)
156 | val pathURI = URI.create("small.txt")
157 | val path = new Path(pathURI)
158 |
159 | val inode = FileSystemInputStream(store, path)
160 | var inodeData = new Array[Byte](data.length)
161 | inode.read(inodeData, 0, data.length)
162 | inode.close()
163 | //println("completed copy")
164 | //println(inodeData.length)
165 | inodeData must be(data)
166 | }
167 |
168 | it should "result in small file (<2KB) data stored through outputstream when fetched from input stream using readFully" in {
169 | val nioPath = FileSystems.getDefault.getPath("src/test/resources/vsmall.txt")
170 | val data = Files.readAllBytes(nioPath)
171 |
172 | //println("file size=" + data.length)
173 | val pathURI = URI.create("vsmall.txt")
174 | val path = new Path(pathURI)
175 |
176 | val inode = FileSystemInputStream(store, path)
177 | var inodeData = new Array[Byte](data.length)
178 | inode.readFully(0, inodeData)
179 | inode.close()
180 | //println("completed copy")
181 | //println(inodeData.length)
182 | inodeData.length must be(data.length)
183 | inodeData must be(data)
184 | }
185 |
186 | it should "result in small file (<2KB) data stored through outputstream when fetched from input stream using IOUtils.toByteArray" in {
187 | val nioPath = FileSystems.getDefault.getPath("src/test/resources/vsmall.txt")
188 | val data = Files.readAllBytes(nioPath)
189 |
190 | //println("file size=" + data.length)
191 | val pathURI = URI.create("vsmall.txt")
192 | val path = new Path(pathURI)
193 |
194 | val inode = FileSystemInputStream(store, path)
195 | var inodeData = IOUtils.toByteArray(inode)
196 | inode.close()
197 |
198 | //println("completed copy")
199 | //println(inodeData.length)
200 | inodeData.length must be(data.length)
201 | inodeData must be(data)
202 | }
203 |
204 | override def afterAll() = {
205 | Await.ready(store.dropKeyspace, snackFSConfiguration.atMost)
206 | store.disconnect()
207 | }
208 |
209 | }
210 |
--------------------------------------------------------------------------------