├── .gitignore ├── LICENSE ├── README.md ├── build.sbt ├── project ├── build.properties └── plugins.sbt ├── src ├── main │ ├── resources │ │ └── logback.xml │ └── scala │ │ └── io │ │ └── github │ │ └── starofall │ │ └── s3hypersync │ │ ├── JobDefinition.scala │ │ ├── MainApp.scala │ │ ├── PekkoFileSyncCompareStage.scala │ │ ├── S3Connector.scala │ │ ├── SyncCommand.scala │ │ ├── SyncLogging.scala │ │ ├── SyncModel.scala │ │ ├── SyncS3Settings.scala │ │ ├── SyncStatistics.scala │ │ └── SyncUtil.scala └── test │ ├── resources │ └── logback-test.xml │ └── scala │ └── io │ └── github │ └── starofall │ └── s3hypersync │ └── SyncCommandTest.scala └── tests └── simple ├── .storage ├── 7209184516538105857 ├── 7209184677322555393 ├── 7209184710918930433 └── 7209184749493944321 ├── bucket-a.bucket.meta ├── bucket-b.bucket.meta └── version /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 5 | hs_err_pid* 6 | 7 | /.bsp/** 8 | /.idea/** 9 | /project/project/** 10 | /project/target/** 11 | /target/** 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Starofall 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # S3HyperSync 2 | S3HyperSync is a high-performance, memory-efficient, and cost-effective tool for synchronizing files between S3-compatible storage services. Optimized for speed, reliability, and minimizing AWS costs, it's ideal for large-scale data synchronization and backup tasks. Utilizing Pekko, it adopts a stream-only approach to maintain low memory requirements. 3 | 4 | ## Origin 5 | Developed for creating daily backups of huge S3 buckets with millions of files and terabyte of data to an seperate AWS account. 6 | 7 | ## Cost Effective 8 | To sync large S3 buckets, S3HyperSync compares directories using two iterator streams for the source and target, reducing the need for costly GetObject requests, especially for DEEP_ARCHIVE storage. 9 | It minimizes expensive MultiPart uploads, as they count as multiple PutObject calls. 10 | 11 | ## Performance 12 | Performance tests on AWS Fargate show iteration speeds between 8,000 to 100,000 files per second with the UUID booster feature. 13 | Copy speeds reach around 600MB/s on a c6gn.4xlarge or 800 files per second for smaller files. 14 | 15 | ## UUID Booster 16 | The UUID booster feature can be used if data is suffixed with a uuid. E.g. s3://bucket/videos/$UUID 17 | In this case the tool creates 16 iterators and processes them in parallel for extremly fast bucket comparison. 18 | 19 | ## Installation 20 | 21 | Download the JAR file from the Release Section or build it yourself with sbt assembly. 22 | 23 | ## Usage 24 | 25 | ``` 26 | S3HyperSync.jar 0.1.5 27 | Usage: java -jar S3HyperSync.jar [OPTIONS] 28 | A fast, efficient, cost-reducing, and memory-efficient S3 sync tool. 29 | Options: 30 | --dry-run Show what would be copied without actually 31 | copying 32 | --multipart-size Size of each part in a multipart upload (in 33 | bytes) 34 | --no-color Disable colored output 35 | --put-cutoff-size Files larger than this size (in bytes) are 36 | uploaded using multipart 37 | --source-bucket Source S3 Bucket 38 | --source-endpoint Source S3 Endpoint 39 | --source-key Source S3 Key 40 | --source-path-style Use path style for source S3 41 | --source-prefix Source S3 Prefix (must end with /) 42 | --source-region Source S3 Region 43 | --source-secret Source S3 Secret 44 | --storage-tier Storage tier: STANDARD, INTELLIGENT_TIERING, 45 | GLACIER_IR, GLACIER_IR_AUTO, DEEP_ARCHIVE, 46 | DEEP_ARCHIVE_AUTO 47 | --sync Sync mode: ALWAYS, MISSING, CHANGED 48 | --target-bucket Target S3 Bucket 49 | --target-endpoint Target S3 Endpoint 50 | --target-key Target S3 Key 51 | --target-path-style Use path style for target S3 52 | --target-prefix Target S3 Prefix (must end with /) 53 | --target-region Target S3 Region 54 | --target-secret Target S3 Secret 55 | --timeout Kills the process after N seconds 56 | --uuid-boost Increase index speed if source prefix contains 57 | UUIDs 58 | -v, --verbose Verbose level (use multiple -v for increased 59 | verbosity) 60 | --workers Number of workers 61 | -h, --help Show help message 62 | --version Show version of this program 63 | ``` 64 | ## Contributing 65 | 66 | We welcome contributions from the community. If you find a bug or have a feature request, please open an issue on 67 | GitHub. If you want to contribute code, please fork the repository and submit a pull request. 68 | 69 | ## License 70 | 71 | S3HyperSync is released under the MIT License. See the LICENSE file for more details. 72 | 73 | ## Acknowledgements 74 | 75 | We would like to thank all the contributors and the open-source community for their support and contributions to this 76 | project. 77 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | import sbtassembly.MergeStrategy.defaultMergeStrategy 2 | 3 | import scala.collection.Seq 4 | 5 | lazy val root = (project in file(".")) 6 | .settings( 7 | name := "S3HyperSync", 8 | organization := "io.github.starofall", 9 | version := "0.1.6", 10 | scalaVersion := "2.13.16", 11 | assembly / assemblyMergeStrategy := { 12 | case x if x.contains("META-INF") && x.contains("module-info.class") => MergeStrategy.discard 13 | case x if x.contains("META-INF") && x.contains("license") => MergeStrategy.first 14 | case "application.conf" => MergeStrategy.concat 15 | case "logback.xml" => MergeStrategy.preferProject 16 | case sbtassembly.PathList("module-info.java") => MergeStrategy.discard 17 | case sbtassembly.PathList("module-info.class") => MergeStrategy.discard 18 | case x => defaultMergeStrategy(x) 19 | }, 20 | assembly / test := {}, 21 | fork := true, 22 | scalacOptions ++= Seq( 23 | "-Xasync", 24 | "-deprecation", // Emit warning and location for usages of deprecated APIs 25 | "-unchecked", // Enable additional warnings where generated code depends on assumptions 26 | "-Xfatal-warnings", // Fail the compilation if there are any warnings 27 | "-feature", 28 | "-Ybackend-parallelism", "8", 29 | "-Ybackend-worker-queue", "8", 30 | "-encoding", "utf8"), 31 | libraryDependencies ++= Seq( 32 | "org.rogach" %% "scallop" % "5.1.0", 33 | "org.apache.pekko" %% "pekko-connectors-s3" % "1.1.0", 34 | "org.apache.pekko" %% "pekko-stream" % "1.1.3", 35 | "ch.qos.logback" % "logback-core" % "1.4.14", 36 | "ch.qos.logback" % "logback-classic" % "1.4.14", 37 | // testing dependencies 38 | "org.scalatest" %% "scalatest" % "3.2.19" % "test", 39 | "io.github.robothy" % "local-s3-rest" % "1.15" % "test" 40 | ) 41 | ) 42 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.10.0 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | // packs app into a single jar 2 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.2.0") 3 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | %-5level | %msg%n 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/main/scala/io/github/starofall/s3hypersync/JobDefinition.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | import org.rogach.scallop.{ScallopConf, ScallopOption} 4 | 5 | /** CLI parser and config */ 6 | class JobDefinition(arguments: Seq[String]) extends ScallopConf(arguments) { 7 | version("S3HyperSync.jar 0.1.5") 8 | noshort = true 9 | banner( 10 | """Usage: java -jar S3HyperSync.jar [OPTIONS] 11 | |A fast, efficient, cost-reducing, and memory-efficient S3 sync tool. 12 | |Options: 13 | |""".stripMargin) 14 | footer("\n") 15 | 16 | /** Number of workers */ 17 | val numWorkers: ScallopOption[Int] = opt[Int](name = "workers", descr = "Number of workers", default = Some(64)) 18 | 19 | /** Files larger than this size (in bytes) are uploaded using multipart */ 20 | val putCutoffSize: ScallopOption[Int] = opt[Int](name = "put-cutoff-size", descr = "Files larger than this size (in bytes) are uploaded using multipart", default = Some(52428800)) 21 | 22 | /** Size of each part in a multipart upload (in bytes) */ 23 | val multipartSize: ScallopOption[Int] = opt[Int](name = "multipart-size", descr = "Size of each part in a multipart upload (in bytes)", default = Some(52428800)) 24 | 25 | /** Sync mode: ALWAYS, IF_NOT_EXISTS, IF_SIZE_CHANGED */ 26 | val syncMode: ScallopOption[String] = opt[String](name = "sync", descr = "Sync mode: ALWAYS, MISSING, CHANGED", validate = Set("ALWAYS", "MISSING", "CHANGED"), default = Some("MISSING")) 27 | 28 | /** Source S3 Key */ 29 | val sourceKey: ScallopOption[String] = opt[String](name = "source-key", required = true, descr = "Source S3 Key") 30 | 31 | /** Source S3 Secret */ 32 | val sourceSecret: ScallopOption[String] = opt[String](name = "source-secret", required = true, descr = "Source S3 Secret") 33 | 34 | /** Source S3 Region */ 35 | val sourceRegion: ScallopOption[String] = opt[String](name = "source-region", required = true, descr = "Source S3 Region") 36 | 37 | /** Source S3 Bucket */ 38 | val sourceBucket: ScallopOption[String] = opt[String](name = "source-bucket", required = true, descr = "Source S3 Bucket") 39 | 40 | /** Source S3 Prefix (must end with /) */ 41 | val sourcePrefix: ScallopOption[String] = opt[String](name = "source-prefix", descr = "Source S3 Prefix (must end with /)", validate = _.endsWith("/")) 42 | 43 | /** Source S3 Endpoint */ 44 | val sourceEndpoint: ScallopOption[String] = opt[String](name = "source-endpoint", descr = "Source S3 Endpoint") 45 | 46 | /** Use path style for source S3 */ 47 | val sourcePathStyle: ScallopOption[Boolean] = opt[Boolean](name = "source-path-style", descr = "Use path style for source S3") 48 | 49 | /** Target S3 Key */ 50 | val targetKey: ScallopOption[String] = opt[String](name = "target-key", required = true, descr = "Target S3 Key") 51 | 52 | /** Target S3 Secret */ 53 | val targetSecret: ScallopOption[String] = opt[String](name = "target-secret", required = true, descr = "Target S3 Secret") 54 | 55 | /** Target S3 Region */ 56 | val targetRegion: ScallopOption[String] = opt[String](name = "target-region", required = true, descr = "Target S3 Region") 57 | 58 | /** Target S3 Bucket */ 59 | val targetBucket: ScallopOption[String] = opt[String](name = "target-bucket", required = true, descr = "Target S3 Bucket") 60 | 61 | /** Target S3 Prefix (must end with /) */ 62 | val targetPrefix: ScallopOption[String] = opt[String](name = "target-prefix", descr = "Target S3 Prefix (must end with /)", validate = _.endsWith("/")) 63 | 64 | /** Target S3 Endpoint */ 65 | val targetEndpoint: ScallopOption[String] = opt[String](name = "target-endpoint", descr = "Target S3 Endpoint") 66 | 67 | /** Use path style for target S3 */ 68 | val targetPathStyle: ScallopOption[Boolean] = opt[Boolean](name = "target-path-style", descr = "Use path style for target S3") 69 | 70 | /** Storage tier: STANDARD, INTELLIGENT_TIERING, GLACIER_IR, GLACIER_IR_AUTO, DEEP_ARCHIVE, DEEP_ARCHIVE_AUTO */ 71 | val storageTier: ScallopOption[String] = opt[String](name = "storage-tier", descr = "Storage tier: STANDARD, INTELLIGENT_TIERING, GLACIER_IR, GLACIER_IR_AUTO, DEEP_ARCHIVE, DEEP_ARCHIVE_AUTO", validate = Set("STANDARD", "INTELLIGENT_TIERING", "GLACIER_IR", "GLACIER_IR_AUTO", "DEEP_ARCHIVE", "DEEP_ARCHIVE_AUTO"), default = Some("STANDARD")) 72 | 73 | /** Verbose level (use multiple -v for increased verbosity) */ 74 | val verbose: ScallopOption[Int] = tally(descr = "Verbose level (use multiple -v for increased verbosity)", short = 'v',noshort=false) 75 | 76 | /** Increase index speed if source prefix contains UUIDs */ 77 | val uuidBoost: ScallopOption[Boolean] = opt[Boolean](name = "uuid-boost", descr = "Increase index speed if source prefix contains UUIDs", default = Some(false)) 78 | 79 | /** Show what would be copied without actually copying */ 80 | val dryRun: ScallopOption[Boolean] = opt[Boolean](name = "dry-run", descr = "Show what would be copied without actually copying", default = Some(false)) 81 | 82 | /** Disable colored output */ 83 | val noColor: ScallopOption[Boolean] = opt[Boolean](name = "no-color", descr = "Disable colored output", default = Some(false)) 84 | 85 | /** Kill the process after N seconds */ 86 | val timeout: ScallopOption[Int] = opt[Int](name = "timeout", descr = "Kills the process after N seconds") 87 | 88 | verify() 89 | } 90 | -------------------------------------------------------------------------------- /src/main/scala/io/github/starofall/s3hypersync/MainApp.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | import io.github.starofall.s3hypersync.SyncLogging.Logger 4 | import io.github.starofall.s3hypersync.SyncUtil._ 5 | import org.apache.pekko.actor.ActorSystem 6 | 7 | import scala.concurrent.ExecutionContextExecutor 8 | 9 | 10 | object MainApp extends Logger { 11 | 12 | /** main method for CLI interaction */ 13 | def main(args: Array[String]): Unit = { 14 | val conf = new JobDefinition(args.toIndexedSeq) 15 | implicit val actorSystem: ActorSystem = ActorSystem( 16 | "SyncSystem", buildConfig(conf)) 17 | implicit val exc : ExecutionContextExecutor = actorSystem.dispatcher 18 | SyncLogging.initLogger(conf) 19 | addTimeoutIfNeeded(conf) 20 | new SyncCommand(conf) 21 | .runSyncJob() 22 | .onComplete(handleFinalResult) 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/io/github/starofall/s3hypersync/PekkoFileSyncCompareStage.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | import io.github.starofall.s3hypersync.SyncLogging.Logger 4 | import io.github.starofall.s3hypersync.SyncModel.{FileSyncState, SyncFile, SyncStatus} 5 | import org.apache.pekko.NotUsed 6 | import org.apache.pekko.stream._ 7 | import org.apache.pekko.stream.scaladsl.{GraphDSL, Source} 8 | import org.apache.pekko.stream.stage.{GraphStage, GraphStageLogic, InHandler, OutHandler} 9 | 10 | class PekkoFileSyncCompareStage extends GraphStage[FanInShape2[SyncFile, SyncFile, FileSyncState]] with Logger { 11 | 12 | //@formatter:disable 13 | val inA : Inlet[SyncFile] = Inlet("CompareAndFilterStage.inA") 14 | val inB : Inlet[SyncFile] = Inlet("CompareAndFilterStage.inB") 15 | val out : Outlet[FileSyncState] = Outlet("CompareAndFilterStage.out") 16 | val shape: FanInShape2[SyncFile, SyncFile, FileSyncState] = new FanInShape2(inA, inB, out) 17 | //@formatter:enable 18 | 19 | override def createLogic(inheritedAttributes: Attributes): GraphStageLogic = new GraphStageLogic(shape) { 20 | 21 | var aBuffer: Option[SyncFile] = None 22 | var bBuffer: Option[SyncFile] = None 23 | var aFinished = false 24 | var bFinished = false 25 | 26 | setHandler(inA, new InHandler { 27 | override def onPush(): Unit = { 28 | aBuffer = Some(grab(inA)) 29 | log.trace(s"A Pushed - $aBuffer") 30 | compareAndPush() 31 | } 32 | 33 | override def onUpstreamFinish(): Unit = { 34 | log.trace("A upstream finished") 35 | aFinished = true 36 | compareAndPush() 37 | } 38 | }) 39 | 40 | setHandler(inB, new InHandler { 41 | override def onPush(): Unit = { 42 | bBuffer = Some(grab(inB)) 43 | log.trace(s"B Pushed - $bBuffer") 44 | compareAndPush() 45 | } 46 | 47 | override def onUpstreamFinish(): Unit = { 48 | log.trace("B upstream finished") 49 | bFinished = true 50 | compareAndPush() 51 | } 52 | }) 53 | 54 | setHandler(out, new OutHandler { 55 | override def onPull(): Unit = { 56 | log.trace("OUT pull") 57 | if (aBuffer.isEmpty && !hasBeenPulled(inA) && !aFinished) { 58 | pull(inA) 59 | } 60 | if (bBuffer.isEmpty && !hasBeenPulled(inB) && !bFinished) { 61 | pull(inB) 62 | } 63 | compareAndPush() 64 | } 65 | }) 66 | 67 | def compareAndPush(): Unit = { 68 | if (isAvailable(out)) { 69 | (aBuffer, bBuffer) match { 70 | // if a is slower then b, then a is definitely missing 71 | case (Some(a), Some(b)) if a.relativeKey < b.relativeKey => 72 | log.trace("-> missing") 73 | push(out, FileSyncState(SyncStatus.Missing, a)) 74 | clearAandPull() 75 | 76 | // if a == b and size is different, call changed 77 | case (Some(a), Some(b)) if a.relativeKey == b.relativeKey && a.size != b.size => 78 | log.trace("-> sizechanged") 79 | // same but changing etags 80 | push(out, FileSyncState(SyncStatus.SizeChanged, a)) 81 | clearAandPull() 82 | 83 | case (Some(a), Some(b)) if a.relativeKey == b.relativeKey => // aka same size 84 | log.trace("-> exists") 85 | // same key, same size 86 | push(out, FileSyncState(SyncStatus.Exists, a)) 87 | clearAandPull() 88 | 89 | // this means a.relativeKey > b.relativeKey aka we need more B to continue 90 | case (Some(a), Some(b)) if !bFinished => 91 | log.trace("-> we need more b") 92 | clearBandPull() 93 | 94 | // there is a last element in B, but b is done so we just drop it 95 | case (Some(a), Some(b)) => // aka if bFinished 96 | log.trace("-> ignore the last b") 97 | bBuffer = None 98 | push(out, FileSyncState(SyncStatus.Missing, a)) 99 | clearAandPull() 100 | 101 | // if b is empty AND finished, all other A's are missing 102 | case (Some(a), None) if bFinished => 103 | log.trace("-> b is empty") 104 | push(out, FileSyncState(SyncStatus.Missing, a)) 105 | clearAandPull() 106 | 107 | // if b is empty but not finished, call for more Bs 108 | case (Some(a), None) => // aka if !bFinished 109 | log.trace("-> b empty (but not finished) need more bs") 110 | clearBandPull() 111 | 112 | // we still have As to call 113 | case (None, _) if !aFinished => 114 | log.trace("-> a empty and more needed") 115 | clearAandPull() 116 | 117 | // a finished, so we are done 118 | case (None, _) => 119 | log.trace("-> done a empty") 120 | completeStage() 121 | } 122 | } 123 | } 124 | 125 | private def clearBandPull(): Unit = { 126 | bBuffer = None 127 | if (!bFinished && !hasBeenPulled(inB)) pull(inB) 128 | } 129 | 130 | private def clearAandPull(): Unit = { 131 | aBuffer = None 132 | if (!aFinished && !hasBeenPulled(inA)) pull(inA) 133 | } 134 | 135 | override def preStart(): Unit = { 136 | pull(inA) 137 | pull(inB) 138 | } 139 | } 140 | } 141 | 142 | object PekkoFileSyncCompareStage { 143 | 144 | /** compares the files from the given sources against each other */ 145 | def compareFilesToTarget(syncSource: Source[SyncFile, NotUsed], 146 | syncTarget: Source[SyncFile, NotUsed]): Source[FileSyncState, NotUsed] = { 147 | Source.fromGraph(GraphDSL.create() { implicit builder => 148 | import GraphDSL.Implicits._ 149 | val compareAndFilter = builder.add(new PekkoFileSyncCompareStage) 150 | syncSource ~> compareAndFilter.in0 151 | syncTarget ~> compareAndFilter.in1 152 | SourceShape(compareAndFilter.out) 153 | }) 154 | } 155 | 156 | /** creates our source of files to sync from the job definition*/ 157 | def createSyncSource(conf: JobDefinition, additionalPrefix: Option[String]): Source[FileSyncState, NotUsed] = { 158 | PekkoFileSyncCompareStage 159 | .compareFilesToTarget( 160 | S3Connector.listBucket(conf.sourceBucket.toOption.get, 161 | conf.sourcePrefix.toOption, 162 | additionalPrefix, 163 | SyncS3Settings.sourceConfig(conf)) 164 | .buffer(10000, OverflowStrategy.backpressure).async, 165 | S3Connector.listBucket(conf.targetBucket.toOption.get, 166 | conf.targetPrefix.toOption, 167 | additionalPrefix, 168 | SyncS3Settings.targetConfig(conf)) 169 | .buffer(10000, OverflowStrategy.backpressure).async) 170 | } 171 | 172 | } 173 | -------------------------------------------------------------------------------- /src/main/scala/io/github/starofall/s3hypersync/S3Connector.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | import io.github.starofall.s3hypersync.SyncLogging.Logger 4 | import io.github.starofall.s3hypersync.SyncModel.SyncFile 5 | import org.apache.pekko.NotUsed 6 | import org.apache.pekko.actor.ActorSystem 7 | import org.apache.pekko.stream.connectors.s3._ 8 | import org.apache.pekko.stream.connectors.s3.scaladsl.S3 9 | import org.apache.pekko.stream.scaladsl.{Sink, Source} 10 | import org.apache.pekko.util.ByteString 11 | 12 | import scala.concurrent.Future 13 | 14 | object S3Connector extends Logger { 15 | 16 | /** copies a file from the source to the target bucket */ 17 | def copyFile(job: JobDefinition, 18 | sKey: String, 19 | tKey: String, 20 | fileSize: Long) 21 | (implicit actorSystem: ActorSystem, 22 | statistics: SyncStatistics): Future[Any] = { 23 | val s3Source: Source[ByteString, Future[ObjectMetadata]] = S3 24 | .getObject(job.sourceBucket.toOption.get, sKey) 25 | .withAttributes(S3Attributes.settings(SyncS3Settings.sourceConfig(job))) 26 | 27 | val storageString = { 28 | job.storageTier.getOrElse("STANDARD") match { 29 | case "STANDARD" => "STANDARD" 30 | case "INTELLIGENT_TIERING" => "INTELLIGENT_TIERING" 31 | case "GLACIER_IA" => "GLACIER_IA" 32 | case "GLACIER_IA_AUTO" => 33 | if (fileSize < 128 * 1024) { 34 | "STANDARD" 35 | } else { 36 | "GLACIER_IA" 37 | } 38 | case "DEEP_ARCHIVE" => "DEEP_ARCHIVE" 39 | case "DEEP_ARCHIVE_AUTO" => 40 | if (fileSize < 128 * 1024) { 41 | "STANDARD" 42 | } else { 43 | "DEEP_ARCHIVE" 44 | } 45 | case _ => throw new Exception("INVALID_STORAGE_TIER") 46 | } 47 | } 48 | 49 | if (fileSize < job.putCutoffSize.toOption.getOrElse(52428800)) { 50 | log.trace(s"[COPY-PUT] ${job.sourceBucket.toOption.get} / $sKey -> ${job.targetBucket.toOption.get} / $tKey") 51 | statistics.incrementAwsPutRequests(1) 52 | S3.putObject(job.targetBucket.toOption.get, tKey, 53 | s3Headers = S3Headers().withCustomHeaders(Map("x-amz-storage-class" -> storageString)), 54 | data = s3Source, 55 | contentLength = fileSize) 56 | .withAttributes(S3Attributes.settings(SyncS3Settings.targetConfig(job))) 57 | .run() 58 | } else { 59 | log.trace(s"[COPY-MULTIPART] ${job.sourceBucket.toOption.get} / $sKey -> ${job.targetBucket.toOption.get} / $tKey") 60 | val multiPartChunkSize = job.multipartSize.getOrElse(52428800) 61 | statistics.incrementAwsPutRequests(2 + Math.max(1, (fileSize / multiPartChunkSize).toInt)) 62 | val s3Sink: Sink[ByteString, Future[MultipartUploadResult]] = S3 63 | .multipartUploadWithHeaders( 64 | job.targetBucket.toOption.get, tKey, 65 | chunkSize = multiPartChunkSize, 66 | s3Headers = S3Headers().withCustomHeaders(Map("x-amz-storage-class" -> storageString))) 67 | .withAttributes(S3Attributes.settings(SyncS3Settings.targetConfig(job, isHugeFile = true))) 68 | s3Source.runWith(s3Sink) 69 | } 70 | } 71 | 72 | def listBucket(bucket: String, 73 | prefix: Option[String], 74 | subPrefix: Option[String], // an additional prefix that does not influence relative DIR 75 | s3Settings: S3Settings): Source[SyncFile, NotUsed] = { 76 | val searchPrefix = (prefix, subPrefix) match { 77 | case (Some(x), Some(y)) => Some(x + y) 78 | case (None, Some(y)) => Some(y) 79 | case (Some(x), None) => Some(x) 80 | case _ => None 81 | } 82 | S3.listBucket(bucket, searchPrefix).withAttributes(S3Attributes.settings(s3Settings)) 83 | .filterNot(x => x.size == 0 && x.key.endsWith("/")) // drop folders 84 | .map(x => SyncFile( 85 | x.bucketName, x.key, x.size, 86 | prefix match { 87 | case Some(value) => x.key.stripPrefix(value) 88 | case None => x.key 89 | })) 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/main/scala/io/github/starofall/s3hypersync/SyncCommand.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | import io.github.starofall.s3hypersync.PekkoFileSyncCompareStage.createSyncSource 4 | import io.github.starofall.s3hypersync.SyncLogging.Logger 5 | import io.github.starofall.s3hypersync.SyncModel.{FileSyncState, SyncStatus} 6 | import io.github.starofall.s3hypersync.SyncUtil.retry 7 | import org.apache.pekko.actor.ActorSystem 8 | import org.apache.pekko.stream.scaladsl.{Merge, Source} 9 | 10 | import scala.concurrent.duration.DurationInt 11 | import scala.concurrent.{ExecutionContext, Future} 12 | 13 | class SyncCommand(conf: JobDefinition) 14 | (implicit actorSystem: ActorSystem, exc: ExecutionContext) 15 | extends Logger { 16 | 17 | implicit val statistics: SyncStatistics = new SyncStatistics(conf) 18 | 19 | /** runs the main sync job */ 20 | def runSyncJob(): Future[Unit] = { 21 | createSource() 22 | .wireTap(x=>statistics.statCall(x)) 23 | .filter(syncFilter) 24 | .mapAsyncUnordered(conf.numWorkers())(x=>handleFileSync(x)) 25 | .run() 26 | .map(_ => statistics.printFinalStatistics()) 27 | } 28 | 29 | private def createSource() = { 30 | if (conf.uuidBoost.toOption.getOrElse(false)) { 31 | createUUIDBoosterSource() 32 | } else { 33 | createSyncSource(conf, None) 34 | } 35 | } 36 | 37 | /** if the prefix contains only UUIDs, we can just create 16 sources and merge them */ 38 | private def createUUIDBoosterSource() = { 39 | assert(conf.sourcePrefix.isDefined, "UUID booster requires source prefix") 40 | assert(conf.targetPrefix.isDefined, "UUID booster requires target prefix") 41 | // if we know that the folder contains UUIDs, 42 | // we can active the iteration booster, 43 | // which will spawn multiple iteration calls for each 44 | // first character -> multiplexing the blocking calls for next1k 45 | // as we still compare the same prefix in the code itself, 46 | // the code should still work 47 | val extraPrefixedSources = "0123456789abcdef" 48 | .toCharArray.toList.map(c => createSyncSource(conf, Some(c.toString))) 49 | Source.combine(Source.empty, extraPrefixedSources.head, extraPrefixedSources.tail: _*)(Merge(_)) 50 | } 51 | 52 | 53 | /** handles the sync of an individual file */ 54 | private def handleFileSync(x: FileSyncState) = { 55 | retry(retries = 3, delay = 2.seconds) { 56 | if (conf.dryRun.getOrElse(false)) { 57 | log.info("[DRYRUN-COPY]".yellow + s" ${x.file.key.green} ${"->"} " + 58 | s"${conf.targetPrefix.getOrElse("").magenta + x.file.relativeKey.cyan}") 59 | Future.successful(()) 60 | } else { 61 | log.debug("[COPY-START] " + x.file.key + " -> " 62 | + conf.targetPrefix.getOrElse("") + x.file.relativeKey) 63 | S3Connector.copyFile( 64 | conf, 65 | x.file.key, 66 | conf.targetPrefix.getOrElse("") + x.file.relativeKey, 67 | x.file.size).map { _ => 68 | log.debug("[COPY-SUCCESS] " + x.file.key) 69 | statistics.incrementFilesCopied(x.file.size) 70 | } 71 | } 72 | }(actorSystem.dispatcher, actorSystem.scheduler) 73 | } 74 | 75 | 76 | /** A utility method to filter file synchronization based on the configuration job definitions */ 77 | private def syncFilter(x: FileSyncState): Boolean = { 78 | conf.syncMode() match { 79 | case "ALWAYS" => true // take all 80 | case "CHANGED" => x.status == SyncStatus.SizeChanged || x.status == SyncStatus.Missing 81 | case "MISSING" => x.status == SyncStatus.Missing 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/scala/io/github/starofall/s3hypersync/SyncLogging.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | import ch.qos.logback.classic.{Level, LoggerContext} 4 | import org.slf4j.LoggerFactory 5 | 6 | import scala.language.implicitConversions 7 | 8 | 9 | object SyncLogging { 10 | 11 | var colorActive: Boolean = true 12 | 13 | def initLogger(conf: JobDefinition): Unit = { 14 | colorActive = !conf.noColor.getOrElse(false) 15 | setRootLogLevel(conf.verbose.getOrElse(0)) 16 | } 17 | 18 | /** sets the log level based on -vvv amount */ 19 | def setRootLogLevel(levelInt: Int): Unit = { 20 | val level = levelInt match { 21 | case 0 => Level.INFO 22 | case 1 => Level.DEBUG 23 | case _ => Level.TRACE 24 | } 25 | val iLoggerFactory = LoggerFactory.getILoggerFactory 26 | iLoggerFactory match { 27 | case loggerContext: LoggerContext => 28 | val rootLogger = loggerContext.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME) 29 | rootLogger.setLevel(level) 30 | case _ => 31 | throw new IllegalStateException(s"Unexpected ILoggerFactory implementation: ${iLoggerFactory.getClass}") 32 | } 33 | } 34 | 35 | 36 | trait Logger { 37 | lazy val log = org.slf4j.LoggerFactory.getLogger(getClass) 38 | 39 | implicit def hasRainbow(s: String): RainbowString = new RainbowString(s) 40 | 41 | } 42 | 43 | class RainbowString(s: String) { 44 | 45 | import Console._ 46 | 47 | /** Colorize the given string foreground to ANSI black */ 48 | def black = if (colorActive) { 49 | BLACK + s + RESET 50 | } else { 51 | s 52 | } 53 | 54 | /** Colorize the given string foreground to ANSI red */ 55 | def red = if (colorActive) { 56 | RED + s + RESET 57 | } else { 58 | s 59 | } 60 | 61 | /** Colorize the given string foreground to ANSI red */ 62 | def green = if (colorActive) { 63 | GREEN + s + RESET 64 | } else { 65 | s 66 | } 67 | 68 | /** Colorize the given string foreground to ANSI red */ 69 | def yellow = if (colorActive) { 70 | YELLOW + s + RESET 71 | } else { 72 | s 73 | } 74 | 75 | /** Colorize the given string foreground to ANSI red */ 76 | def blue = if (colorActive) { 77 | BLUE + s + RESET 78 | } else { 79 | s 80 | } 81 | 82 | /** Colorize the given string foreground to ANSI red */ 83 | def magenta = if (colorActive) { 84 | MAGENTA + s + RESET 85 | } else { 86 | s 87 | } 88 | 89 | /** Colorize the given string foreground to ANSI red */ 90 | def cyan = if (colorActive) { 91 | CYAN + s + RESET 92 | } else { 93 | s 94 | } 95 | 96 | /** Make the given string bold */ 97 | def bold = if (colorActive) { 98 | BOLD + s + RESET 99 | } else { 100 | s 101 | } 102 | } 103 | 104 | } 105 | -------------------------------------------------------------------------------- /src/main/scala/io/github/starofall/s3hypersync/SyncModel.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | object SyncModel { 4 | 5 | /** Represents the status of sync */ 6 | trait SyncStatus 7 | 8 | /** Describes the sync status of a given file */ 9 | case class FileSyncState(status: SyncStatus, 10 | file: SyncFile) 11 | 12 | /** 13 | * references a file on s3 14 | * @param bucket the bucket this file exists on 15 | * @param key the full s3 key 16 | * @param size the byteSize of the file 17 | * @param relativeKey the key relative to the root dir of the job 18 | */ 19 | case class SyncFile(bucket: String, 20 | key: String, 21 | size: Long, 22 | relativeKey: String) 23 | 24 | /** Object contains statuses of sync */ 25 | object SyncStatus { 26 | 27 | /** Object exists in S3 */ 28 | case object Exists extends SyncStatus 29 | 30 | /** Size of the object has changed in S3 */ 31 | case object SizeChanged extends SyncStatus 32 | 33 | /** Object is missing in S3 */ 34 | case object Missing extends SyncStatus 35 | 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/io/github/starofall/s3hypersync/SyncS3Settings.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import org.apache.pekko.stream.connectors.s3.S3Settings 5 | 6 | import java.nio.file.Files 7 | import scala.jdk.CollectionConverters.MapHasAsJava 8 | 9 | object SyncS3Settings { 10 | 11 | /** extracts the job definition source as s3 settings */ 12 | def sourceConfig(d: JobDefinition): S3Settings = { 13 | buildS3Settings(d.sourceKey.toOption.get, 14 | d.sourceSecret.toOption.get, 15 | d.sourceRegion.toOption.get, 16 | d.sourceEndpoint.toOption, 17 | d.sourcePathStyle.toOption.getOrElse(false)) 18 | } 19 | 20 | /** extracts the job definition target as s3 settings */ 21 | def targetConfig(d: JobDefinition, 22 | isHugeFile: Boolean = false): S3Settings = { 23 | buildS3Settings(d.targetKey.toOption.get, 24 | d.targetSecret.toOption.get, 25 | d.targetRegion.toOption.get, 26 | d.targetEndpoint.toOption, 27 | d.targetPathStyle.toOption.getOrElse(false), 28 | isHugeFile) 29 | } 30 | 31 | /** creates a pekko config object */ 32 | private def buildS3Settings(keyId: String, 33 | accessKey: String, 34 | region: String, 35 | endpointOverwrite: Option[String], 36 | usePathAccessStyle: Boolean, 37 | isHugeFile: Boolean = false): S3Settings = { 38 | val settingMap = scala.collection.mutable.Map( 39 | "buffer" -> "memory", 40 | "validate-object-key" -> "true", 41 | "retry-settings.max-retries" -> 6, 42 | "retry-settings.min-backoff" -> "200ms", 43 | "retry-settings.max-backoff" -> "10s", 44 | "retry-settings.random-factor" -> 0.0, 45 | "multipart-upload.retry-settings.max-retries" -> 6, 46 | "multipart-upload.retry-settings.min-backoff" -> "200ms", 47 | "multipart-upload.retry-settings.max-backoff" -> "10s", 48 | "multipart-upload.retry-settings.random-factor" -> 0.0, 49 | "sign-anonymous-requests" -> true, 50 | "access-style" -> "virtual", 51 | "aws.region.provider" -> "static", 52 | "aws.region.default-region" -> region, 53 | "aws.credentials.provider" -> "static", 54 | "aws.credentials.access-key-id" -> keyId, 55 | "aws.credentials.secret-access-key" -> accessKey 56 | ) 57 | // on huge files we use file buffering (else we might run OOM) 58 | if (isHugeFile) { 59 | settingMap.update("buffer", "disk") 60 | settingMap.update("disk-buffer-path", Files.createTempDirectory("s3hypersync").toAbsolutePath.toString) 61 | } 62 | // allow setting a custom endpoint 63 | if (endpointOverwrite.isDefined) { 64 | settingMap.update("endpoint-url", endpointOverwrite.get) 65 | } 66 | // for legacy storage systems like minio, we can use path style 67 | if (usePathAccessStyle) { 68 | settingMap.update("access-style", "path") 69 | } 70 | S3Settings.create(ConfigFactory.parseMap(settingMap.asJava)) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/scala/io/github/starofall/s3hypersync/SyncStatistics.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | import io.github.starofall.s3hypersync.SyncLogging.Logger 4 | import io.github.starofall.s3hypersync.SyncModel.{FileSyncState, SyncStatus} 5 | import org.apache.pekko.actor.{ActorSystem, Cancellable} 6 | 7 | import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} 8 | import scala.concurrent.ExecutionContext 9 | import scala.concurrent.duration.DurationInt 10 | 11 | class SyncStatistics(conf: JobDefinition) 12 | (implicit actorSystem: ActorSystem, executionContext: ExecutionContext) 13 | extends Logger { 14 | 15 | val filesScanned = new AtomicInteger(0) 16 | val filesMissing = new AtomicInteger(0) 17 | val filesChanged = new AtomicInteger(0) 18 | val filesCopied = new AtomicInteger(0) 19 | val filesExisting = new AtomicInteger(0) 20 | val filesScannedLastSecond = new AtomicInteger(0) 21 | val awsPutRequests = new AtomicInteger(0) 22 | val bytesTransferredLastSecond: AtomicLong = new AtomicLong(0L) 23 | val totalBytesTransferred : AtomicLong = new AtomicLong(0L) 24 | var lastUpdateTime : Long = System.currentTimeMillis() 25 | var started = false 26 | 27 | initStatistics() 28 | 29 | def statCall(x: FileSyncState): Unit = { 30 | setStarted() 31 | x.status match { 32 | case SyncStatus.Missing => incrementFilesMissing() 33 | case SyncStatus.SizeChanged => incrementFilesChanged() 34 | case SyncStatus.Exists => incrementFilesExisting() 35 | } 36 | incrementFilesScanned() 37 | log.trace(x.status.toString + "->" + x.file.key) 38 | } 39 | 40 | def incrementFilesScanned(): Unit = { 41 | filesScanned.incrementAndGet() 42 | filesScannedLastSecond.incrementAndGet() 43 | } 44 | 45 | def incrementFilesMissing(): Unit = filesMissing.incrementAndGet() 46 | 47 | def incrementFilesChanged(): Unit = filesChanged.incrementAndGet() 48 | 49 | def incrementFilesExisting(): Unit = filesExisting.incrementAndGet() 50 | 51 | def setStarted(): Unit = started = true 52 | 53 | def incrementFilesCopied(size: Long): Unit = { 54 | filesCopied.incrementAndGet() 55 | bytesTransferredLastSecond.addAndGet(size) 56 | totalBytesTransferred.addAndGet(size) 57 | } 58 | 59 | def incrementAwsPutRequests(l: Int): Int = awsPutRequests.addAndGet(l) 60 | 61 | def printFinalStatistics(): Unit = { 62 | log.info("##############") 63 | log.info("## Sync Stats") 64 | log.info(s"# Missing | ${filesMissing.get().toString.yellow}") 65 | log.info(s"# Changed | ${filesChanged.get().toString.yellow}") 66 | log.info(s"# Exists | ${filesExisting.get().toString.yellow}") 67 | log.info("##############") 68 | log.info("## Copy Stats") 69 | log.info(s"# Files | ${filesCopied.get().toString.yellow}") 70 | log.info(s"# MB | ${(totalBytesTransferred.get() / 1024.0 / 1024.0).round.toString.yellow}") 71 | log.info("##############") 72 | log.info("## Cost Stats") 73 | log.info(s"# Puts | ${awsPutRequests.get().toString.yellow}") 74 | log.info("##############") 75 | } 76 | 77 | def initStatistics(): Cancellable = { 78 | log.info(s"[INIT] ".yellow + 79 | s"${conf.sourceBucket.toOption.get}/${conf.sourcePrefix.getOrElse("")} ".green + 80 | s"-> " + 81 | s"${conf.targetBucket.toOption.get}/${conf.targetPrefix.getOrElse("")}".cyan) 82 | // Schedule a task to print statistics every second 83 | actorSystem.scheduler.scheduleAtFixedRate(1.second, 5.second) { 84 | () => printStatistics() 85 | } 86 | } 87 | 88 | def printStatistics(): Unit = { 89 | val currentTime = System.currentTimeMillis() 90 | val duration = (currentTime - lastUpdateTime) / 1000.0 91 | val speed = filesScannedLastSecond.get() / duration 92 | val MBspeed = bytesTransferredLastSecond.get() / (1024.0 * 1024.0 * duration) // in MB/s 93 | lastUpdateTime = currentTime 94 | filesScannedLastSecond.set(0) 95 | bytesTransferredLastSecond.set(0) // Reset for the next interval 96 | if (started) { 97 | log.info(f"[STATS] ".yellow + 98 | f"Bandwidth: $MBspeed%.2f MB/s | ".magenta + 99 | f"Files: $speed%.2f f/sec".red + 100 | f" | Scanned: $filesScanned".cyan + 101 | f" | Copied: $filesCopied".cyan + 102 | f" | Missing: $filesMissing".cyan + 103 | f" | Changed: $filesChanged".cyan + 104 | f" | Existing: $filesExisting".cyan) 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/main/scala/io/github/starofall/s3hypersync/SyncUtil.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | import com.typesafe.config.{Config, ConfigFactory} 4 | import io.github.starofall.s3hypersync.MainApp.log 5 | import io.github.starofall.s3hypersync.SyncLogging._ 6 | import io.github.starofall.s3hypersync.SyncModel.{FileSyncState, SyncStatus} 7 | import org.apache.pekko.actor.{ActorSystem, Cancellable, Scheduler, Terminated} 8 | import org.apache.pekko.pattern.after 9 | 10 | import scala.concurrent.duration.{DurationInt, FiniteDuration} 11 | import scala.concurrent.{ExecutionContext, Future} 12 | import scala.util.{Failure, Success, Try} 13 | 14 | object SyncUtil extends Logger { 15 | 16 | def handleFinalResult(result: Try[_]) 17 | (implicit actorSystem: ActorSystem): Future[Terminated] = { 18 | result match { 19 | case Failure(exception) => 20 | log.error("Error Running Sync") 21 | exception.printStackTrace(System.err) 22 | actorSystem.registerOnTermination(() => System.exit(1)) 23 | actorSystem.terminate() 24 | case Success(_) => 25 | actorSystem.registerOnTermination(() => System.exit(0)) 26 | actorSystem.terminate() 27 | } 28 | 29 | } 30 | 31 | /** if defined, we add a time bomb/timeout to our execution */ 32 | def addTimeoutIfNeeded(conf: JobDefinition) 33 | (implicit actorSystem: ActorSystem, exc: ExecutionContext): Option[Cancellable] = { 34 | // build a time-bomb for timeout 35 | conf.timeout.toOption.map(timeoutSeconds => { 36 | log.info("[TIMEOUT]".yellow + s" Set a timeout of $timeoutSeconds seconds".magenta) 37 | actorSystem.scheduler.scheduleOnce(timeoutSeconds.seconds) { 38 | log.error("[CRITICAL ERROR] THE PROCESS DID NOT FINISH - HAD TO SELF-KILL".red) 39 | System.exit(2) 40 | } 41 | }) 42 | } 43 | 44 | 45 | /** simple retry util in the pekko world */ 46 | def retry[T](retries: Int, delay: FiniteDuration) 47 | (f: => Future[T])(implicit ec: ExecutionContext, scheduler: Scheduler): Future[T] = { 48 | f.recoverWith { 49 | case _ if retries > 0 => 50 | log.debug("[RETRY] Had to retry...") 51 | after(delay, scheduler)(retry(retries - 1, delay)(f)) 52 | } 53 | } 54 | 55 | 56 | /** creates an adjusted config for pekko for our desired worker pool size */ 57 | def buildConfig(conf: JobDefinition): Config = { 58 | ConfigFactory.parseString( 59 | s"""pekko { 60 | loglevel = "ERROR" 61 | stdout-loglevel = "ERROR" 62 | actor { 63 | default-dispatcher { 64 | type = Dispatcher 65 | executor = "thread-pool-executor" 66 | thread-pool-executor { 67 | fixed-pool-size = ${conf.numWorkers.getOrElse(64) + 4} 68 | } 69 | throughput = 1000 70 | } 71 | } 72 | coordinated-shutdown.log-info = off 73 | http.host-connection-pool = { 74 | max-connections = ${conf.numWorkers.getOrElse(64) * 2 + 10} 75 | max-open-requests = ${conf.numWorkers.getOrElse(64) * 4 + 10} 76 | } 77 | }""") 78 | } 79 | 80 | 81 | 82 | } 83 | -------------------------------------------------------------------------------- /src/test/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | %-5level | %msg%n 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/test/scala/io/github/starofall/s3hypersync/SyncCommandTest.scala: -------------------------------------------------------------------------------- 1 | package io.github.starofall.s3hypersync 2 | 3 | import com.robothy.s3.rest.LocalS3 4 | import com.robothy.s3.rest.bootstrap.LocalS3Mode 5 | import org.apache.pekko.actor.ActorSystem 6 | import org.scalatest.funsuite.AnyFunSuiteLike 7 | import org.scalatest.matchers.must.Matchers.convertToAnyMustWrapper 8 | 9 | import scala.concurrent.duration.DurationInt 10 | import scala.concurrent.{Await, ExecutionContextExecutor} 11 | 12 | class SyncCommandTest extends AnyFunSuiteLike { 13 | 14 | implicit val actorSystem: ActorSystem = ActorSystem("TestSyncSystem") 15 | implicit val exc : ExecutionContextExecutor = actorSystem.dispatcher 16 | 17 | def createConfig(dryRun: Boolean): JobDefinition = { 18 | new JobDefinition(List( 19 | "--source-key", "DUMMY", 20 | "--source-secret", "DUMMY", 21 | "--source-region", "region", 22 | "--source-path-style", 23 | "--source-endpoint", "http://localhost:19090", 24 | "--target-key", "DUMMY2", 25 | "--target-secret", "DUMMY2", 26 | "--target-region", "region", 27 | "--target-endpoint", "http://localhost:19090", 28 | "--target-path-style", 29 | "--source-bucket", "bucket-a", 30 | "--target-bucket", "bucket-b") ++ (if (dryRun) List("--dry-run") else List())) 31 | } 32 | 33 | 34 | test("simple sync") { 35 | val localS3 = LocalS3.builder 36 | .mode(LocalS3Mode.IN_MEMORY) 37 | .dataPath("./tests/simple") 38 | .port(19090).build 39 | localS3.start() 40 | 41 | val dryRunConfig = createConfig(dryRun = true) 42 | SyncLogging.initLogger(dryRunConfig) 43 | val dryRunCommand = new SyncCommand(dryRunConfig) 44 | dryRunCommand.statistics.filesScanned.get() mustBe 0 45 | Await.result(dryRunCommand.runSyncJob(), 30.seconds) 46 | dryRunCommand.statistics.filesScanned.get() mustBe 3 47 | 48 | val syncConfig = createConfig(dryRun = false) 49 | val syncCommand = new SyncCommand(syncConfig) 50 | Await.result(syncCommand.runSyncJob(), 30.seconds) 51 | syncCommand.statistics.filesCopied.get() mustBe 3 52 | 53 | val checkConfig = createConfig(dryRun = true) 54 | SyncLogging.initLogger(checkConfig) 55 | val checkCommand = new SyncCommand(checkConfig) 56 | Await.result(checkCommand.runSyncJob(), 30.seconds) 57 | checkCommand.statistics.filesMissing.get() mustBe 0 58 | 59 | localS3.shutdown() 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /tests/simple/.storage/7209184516538105857: -------------------------------------------------------------------------------- 1 | test -------------------------------------------------------------------------------- /tests/simple/.storage/7209184677322555393: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Starofall/S3HyperSync/723a4a273938f12805c677898f4adff7047a1694/tests/simple/.storage/7209184677322555393 -------------------------------------------------------------------------------- /tests/simple/.storage/7209184710918930433: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Starofall/S3HyperSync/723a4a273938f12805c677898f4adff7047a1694/tests/simple/.storage/7209184710918930433 -------------------------------------------------------------------------------- /tests/simple/.storage/7209184749493944321: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Starofall/S3HyperSync/723a4a273938f12805c677898f4adff7047a1694/tests/simple/.storage/7209184749493944321 -------------------------------------------------------------------------------- /tests/simple/bucket-a.bucket.meta: -------------------------------------------------------------------------------- 1 | {"objectMap":{"dummy.flac":{"versionedObjectMap":{"7209184677322555392":{"etag":"a0c4882a57074887748f62eaf4bcbc94","contentType":"audio/x-flac","creationDate":1720449404961,"size":716938,"fileId":7209184677322555393,"tagging":null,"userMetadata":{},"deleted":false}},"virtualVersion":"7209184677322555392"},"subdir/":{"versionedObjectMap":{"7209184710918930432":{"etag":"d41d8cd98f00b204e9800998ecf8427e","contentType":null,"creationDate":1720449412971,"size":0,"fileId":7209184710918930433,"tagging":null,"userMetadata":{},"deleted":false}},"virtualVersion":"7209184710918930432"},"subdir/dummy2.flac":{"versionedObjectMap":{"7209184749493944320":{"etag":"a0c4882a57074887748f62eaf4bcbc94","contentType":"audio/x-flac","creationDate":1720449422168,"size":716938,"fileId":7209184749493944321,"tagging":null,"userMetadata":{},"deleted":false}},"virtualVersion":"7209184749493944320"},"test.file":{"versionedObjectMap":{"7209184516538105856":{"etag":"098f6bcd4621d373cade4e832627b4f6","contentType":null,"creationDate":1720449366627,"size":4,"fileId":7209184516538105857,"tagging":null,"userMetadata":{},"deleted":false}},"virtualVersion":"7209184516538105856"}},"creationDate":1720449248883,"region":"local","uploads":{},"versioningEnabled":null,"bucketName":"bucket-a","tagging":null,"acl":null,"policy":null,"replication":null,"encryption":null} -------------------------------------------------------------------------------- /tests/simple/bucket-b.bucket.meta: -------------------------------------------------------------------------------- 1 | {"objectMap":{},"creationDate":1720449254106,"region":"local","uploads":{},"versioningEnabled":null,"bucketName":"bucket-b","tagging":null,"acl":null,"policy":null,"replication":null,"encryption":null} -------------------------------------------------------------------------------- /tests/simple/version: -------------------------------------------------------------------------------- 1 | 1 --------------------------------------------------------------------------------