├── java ├── .gitignore ├── build ├── run-tests ├── hash-file ├── ensure-built └── src │ ├── HashFile.java │ ├── RunTests.java │ └── DropboxContentHasher.java ├── rust ├── .gitignore ├── Cargo.toml ├── src │ ├── hash_file.rs │ ├── lib.rs │ └── run_tests.rs └── Cargo.lock ├── csharp ├── .gitignore ├── ReadMe.md ├── NuGet.config ├── csharp.csproj └── src │ ├── CLI.cs │ ├── HashFile.cs │ ├── DropboxContentHasher.cs │ └── RunTests.cs ├── python ├── .gitignore ├── hash_file.py ├── run_tests.py └── dropbox_content_hasher.py ├── ReadMe.md ├── License.txt └── js-node ├── hash-file.js ├── run-tests.js └── dropbox-content-hasher.js /java/.gitignore: -------------------------------------------------------------------------------- 1 | /out 2 | /.idea 3 | -------------------------------------------------------------------------------- /rust/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.idea 3 | -------------------------------------------------------------------------------- /csharp/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /obj 3 | /.vscode 4 | -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | /__pycache__ 2 | /*.pyc 3 | -------------------------------------------------------------------------------- /java/build: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | set -euo pipefail 3 | 4 | [[ ! -e out ]] || rm -r out 5 | mkdir -p out 6 | touch out/stamp 7 | javac -d out src/*.java 8 | -------------------------------------------------------------------------------- /csharp/ReadMe.md: -------------------------------------------------------------------------------- 1 | Requirements: 2 | - Core CLR 2.x 3 | 4 | To build: 5 | 6 | # dotnet restore 7 | # dotnet build 8 | 9 | To run: 10 | 11 | # dotnet run 12 | -------------------------------------------------------------------------------- /java/run-tests: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | set -euo pipefail 3 | 4 | # A command-line tool that runs the DropboxContentHasher tests. 5 | 6 | ./ensure-built 7 | exec java -cp out RunTests "$@" 8 | -------------------------------------------------------------------------------- /java/hash-file: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | set -euo pipefail 3 | 4 | # A command-line tool that computes the Dropbox-Content-Hash of the given file. 5 | 6 | ./ensure-built 7 | exec java -cp out HashFile "$@" 8 | -------------------------------------------------------------------------------- /ReadMe.md: -------------------------------------------------------------------------------- 1 | # Dropbox API Content Hasher 2 | 3 | The Dropbox API returns file metadata with a "content_hash" field. This repository contains examples (in a few different programming languages) of how to compute that hash value. 4 | -------------------------------------------------------------------------------- /csharp/NuGet.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /java/ensure-built: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | set -euo pipefail 3 | 4 | rebuild=true 5 | if [[ -f out/stamp ]]; then 6 | if [[ "$(find build src -newer "out/stamp" | head -c 1)" == "" ]]; then 7 | rebuild=false 8 | fi 9 | fi 10 | if $rebuild; then 11 | ./build 12 | fi 13 | -------------------------------------------------------------------------------- /csharp/csharp.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp2.0 6 | 2.0.0-beta-001509-00 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /rust/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dropbox-content-hasher" 3 | version = "0.1.0" 4 | 5 | [dependencies] 6 | digest = "^0.4" 7 | sha2 = "^0.4" 8 | generic-array = "^0.6" 9 | rand = "^0.3" 10 | 11 | [lib] 12 | name = "dropbox_content_hasher" 13 | path = "src/lib.rs" 14 | 15 | [[bin]] 16 | name = "hash-file" 17 | path = "src/hash_file.rs" 18 | 19 | [[bin]] 20 | name = "run-tests" 21 | path = "src/run_tests.rs" 22 | -------------------------------------------------------------------------------- /License.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Dropbox, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /python/hash_file.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | 3 | # A command-line program that computes the Dropbox-Content-Hash of the given file. 4 | 5 | import sys 6 | 7 | from dropbox_content_hasher import DropboxContentHasher 8 | 9 | def main(): 10 | prog_name, args = sys.argv[0], sys.argv[1:] 11 | if len(args) != 1: 12 | sys.stderr.write("Expecting exactly one argument, got {}.\n".format(len(args))) 13 | sys.exit(1) 14 | 15 | fn = args[0] 16 | 17 | hasher = DropboxContentHasher() 18 | with open(fn, 'rb') as f: 19 | while True: 20 | chunk = f.read(1024) # or whatever chunk size you want 21 | if len(chunk) == 0: 22 | break 23 | hasher.update(chunk) 24 | print(hasher.hexdigest()) 25 | 26 | if __name__ == '__main__': 27 | main() 28 | -------------------------------------------------------------------------------- /js-node/hash-file.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // A command-line program that computes the Dropbox-Content-Hash of the given file. 4 | 5 | const fs = require('fs'); 6 | const dch = require('./dropbox-content-hasher'); 7 | 8 | function main() { 9 | const args = process.argv.slice(2); 10 | 11 | if (args.length != 1) { 12 | console.error("Expecting exactly one argument; got " + args.length + "."); 13 | process.exit(1); 14 | } 15 | 16 | const fn = args[0]; 17 | 18 | const hasher = dch.create(); 19 | const f = fs.createReadStream(fn); 20 | f.on('data', function(buf) { 21 | hasher.update(buf); 22 | }); 23 | f.on('end', function(err) { 24 | const hexDigest = hasher.digest('hex'); 25 | console.log(hexDigest); 26 | }); 27 | f.on('error', function(err) { 28 | console.error("Error reading from file: " + err); 29 | process.exit(1); 30 | }); 31 | } 32 | 33 | main(); 34 | -------------------------------------------------------------------------------- /csharp/src/CLI.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | /// 4 | /// The entrypoint for both RunTests and HashFile. Using a shared entrypoint 5 | /// because I can't figure out how to build two executables in a single project. 6 | /// 7 | public class CLI 8 | { 9 | public static void Main(string[] args) 10 | { 11 | if (args.Length == 0) { 12 | Console.WriteLine("Usage:"); 13 | Console.WriteLine(" COMMAND run-tests"); 14 | Console.WriteLine(" COMMAND hash-file "); 15 | return; 16 | } 17 | 18 | var sub = args[0]; 19 | var subArgs = new string[args.Length-1]; 20 | Array.Copy(args, 1, subArgs, 0, subArgs.Length); 21 | 22 | if (sub == "run-tests") { 23 | RunTests.SubMain(subArgs); 24 | } else if (sub == "hash-file") { 25 | HashFile.SubMain(subArgs); 26 | } else { 27 | Console.Error.WriteLine("Unknown sub-command: \"{0}\".", sub); 28 | Environment.Exit(1); return; 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /rust/src/hash_file.rs: -------------------------------------------------------------------------------- 1 | //! A command-line tool that computes the Dropbox-Content-Hash of the given file. 2 | 3 | extern crate digest; 4 | extern crate dropbox_content_hasher; 5 | 6 | use std::io::Write as io_Write; 7 | use std::io::Read as io_Read; 8 | use digest::Digest; 9 | use dropbox_content_hasher::DropboxContentHasher; 10 | 11 | fn main() { 12 | let mut args = std::env::args(); 13 | args.next().unwrap(); // Remove name of binary. 14 | if args.len() != 1 { 15 | writeln!(&mut std::io::stderr(), "Expecting exactly one argument, got {}.", args.len()).unwrap(); 16 | std::process::exit(1); 17 | } 18 | 19 | let file_name = args.next().unwrap(); 20 | 21 | let mut hasher = DropboxContentHasher::new(); 22 | let mut buf: [u8; 4096] = [0; 4096]; 23 | let mut f = std::fs::File::open(file_name).unwrap(); 24 | loop { 25 | let len = f.read(&mut buf).unwrap(); 26 | if len == 0 { break; } 27 | hasher.input(&buf[..len]) 28 | } 29 | drop(f); 30 | 31 | let hex_hash = format!("{:x}", hasher.result()); 32 | println!("{}", hex_hash); 33 | } -------------------------------------------------------------------------------- /java/src/HashFile.java: -------------------------------------------------------------------------------- 1 | import java.io.FileInputStream; 2 | import java.io.IOException; 3 | import java.io.InputStream; 4 | import java.security.MessageDigest; 5 | 6 | /** 7 | * A command-line tool that computes the Dropbox-Content-Hash of the given file. 8 | */ 9 | public class HashFile 10 | { 11 | public static void main(String[] args) 12 | throws IOException 13 | { 14 | if (args.length != 1) { 15 | System.err.println("Expecting exactly one argument, got " + args.length + "."); 16 | System.exit(1); return; 17 | } 18 | 19 | String fn = args[0]; 20 | 21 | MessageDigest hasher = new DropboxContentHasher(); 22 | byte[] buf = new byte[1024]; 23 | InputStream in = new FileInputStream(fn); 24 | try { 25 | while (true) { 26 | int n = in.read(buf); 27 | if (n < 0) break; // EOF 28 | hasher.update(buf, 0, n); 29 | } 30 | } 31 | finally { 32 | in.close(); 33 | } 34 | 35 | System.out.println(RunTests.hex(hasher.digest())); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /csharp/src/HashFile.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | 4 | /// 5 | /// A command-line tool that computes the Dropbox-Content-Hash of the given file. 6 | /// 7 | public class HashFile 8 | { 9 | public static void SubMain(string[] args) 10 | { 11 | if (args.Length != 1) { 12 | Console.Error.WriteLine("The hash-file sub-command expects exactly one argument, got " + args.Length + "."); 13 | Environment.Exit(1); return; 14 | } 15 | 16 | string fn = args[0]; 17 | 18 | var hasher = new DropboxContentHasher(); 19 | byte[] buf = new byte[1024]; 20 | using (var file = File.OpenRead(fn)) 21 | { 22 | while (true) 23 | { 24 | int n = file.Read(buf, 0, buf.Length); 25 | if (n <= 0) break; // EOF 26 | hasher.TransformBlock(buf, 0, n, buf, 0); 27 | } 28 | } 29 | 30 | hasher.TransformFinalBlock(Array.Empty(), 0, 0); 31 | string hexHash = DropboxContentHasher.ToHex(hasher.Hash); 32 | Console.WriteLine(hexHash); 33 | 34 | byte[] all = File.ReadAllBytes(fn); 35 | Console.WriteLine(DropboxContentHasher.ToHex(RunTests.ReferenceHasher(all, all.Length))); 36 | hasher = new DropboxContentHasher(); 37 | hasher.TransformBlock(all, 0, all.Length, all, 0); 38 | hasher.TransformFinalBlock(Array.Empty(), 0, 0); 39 | Console.WriteLine(DropboxContentHasher.ToHex(hasher.Hash)); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /js-node/run-tests.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // A command-line program that runs the dropbox-content-hasher tests. 4 | 5 | const crypto = require('crypto'); 6 | const dch = require('./dropbox-content-hasher'); 7 | 8 | function referenceHasher(data) { 9 | const overallHasher = crypto.createHash('sha256'); 10 | for (let pos = 0; pos < data.length; pos += dch.BLOCK_SIZE) { 11 | const chunk = data.slice(pos, pos+dch.BLOCK_SIZE); 12 | const blockHasher = crypto.createHash('sha256'); 13 | blockHasher.update(chunk); 14 | overallHasher.update(blockHasher.digest()); 15 | } 16 | return overallHasher.digest('hex'); 17 | } 18 | 19 | function check(data, chunkSizes) { 20 | console.log("checking " + JSON.stringify(chunkSizes)); 21 | 22 | const hasher = dch.create(); 23 | 24 | let pos = 0; 25 | for (const chunkSize of chunkSizes) { 26 | const chunk = data.slice(pos, pos+chunkSize); 27 | pos += chunkSize; 28 | 29 | hasher.update(chunk); 30 | } 31 | 32 | const result = hasher.digest('hex'); 33 | const reference = referenceHasher(data.slice(0, sum(chunkSizes))) 34 | 35 | const passed = (result === reference); 36 | if (!passed) { 37 | console.log("- FAILED: " + JSON.stringify(reference) + ", " + JSON.stringify(result)); 38 | } 39 | return passed; 40 | } 41 | 42 | function sum(arr) { 43 | let r = 0; 44 | for (const n of arr) { 45 | r += n; 46 | } 47 | return r; 48 | } 49 | 50 | function main() { 51 | const args = process.argv.slice(2); 52 | if (args.length > 0) { 53 | console.error("No arguments expected; got " + args.length + "."); 54 | process.exit(1); 55 | } 56 | 57 | const B = dch.BLOCK_SIZE; 58 | 59 | const tests = [ 60 | [0], 61 | [100], 62 | [100, 10], 63 | [B-1], 64 | [B], 65 | [B+1], 66 | 67 | [B-2, 1], 68 | [B-2, 2], 69 | [B-2, 3], 70 | 71 | [B-2, B+1], 72 | [B-2, B+2], 73 | [B-2, B+3], 74 | 75 | [5, 5, 5], 76 | [5, 5, 5, B], 77 | [5, 5, 5, 3*B], 78 | [5, 5, 5, 3*B, 5, 5, 5, 3*B], 79 | ]; 80 | 81 | let longestLength = 0; 82 | for (const test of tests) { 83 | longestLength = Math.max(longestLength, sum(test)); 84 | } 85 | 86 | console.log("generating random data"); 87 | const data = crypto.randomBytes(longestLength); 88 | 89 | for (const test of tests) { 90 | const passed = check(data, test); 91 | if (!passed) { 92 | process.exit(2); 93 | } 94 | } 95 | console.log("all passed"); 96 | } 97 | 98 | main(); 99 | -------------------------------------------------------------------------------- /rust/src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate digest; 2 | extern crate sha2; 3 | extern crate generic_array; 4 | 5 | use digest::Digest; 6 | use sha2::Sha256; 7 | use generic_array::{GenericArray}; 8 | 9 | pub const BLOCK_SIZE: usize = 4 * 1024 * 1024; 10 | 11 | /// Computes a hash using the same algorithm that the Dropbox API uses for the 12 | /// the "content_hash" metadata field. 13 | /// 14 | /// Implements the `digest::Digest` trait, whose `result()` function returns a 15 | /// raw binary representation of the hash. The "content_hash" field in the 16 | /// Dropbox API is a hexadecimal-encoded version of this value. 17 | /// 18 | /// Example: 19 | /// 20 | /// ``` 21 | /// use dropbox_content_hasher::{DropboxContentHasher, hex}; 22 | /// 23 | /// let mut hasher = DropboxContentHasher::new(); 24 | /// let mut buf: [u8; 4096] = [0; 4096]; 25 | /// let mut f = std::fs::File::open("some-file").unwrap(); 26 | /// loop { 27 | /// let len = f.read(&mut buf).unwrap(); 28 | /// if len == 0 { break; } 29 | /// hasher.input(&buf[..len]) 30 | /// } 31 | /// drop(f); 32 | /// 33 | /// let hex_hash = format!("{:x}", hasher.result()); 34 | /// println!("{}", hex_hash); 35 | /// ``` 36 | 37 | #[derive(Clone, Copy)] 38 | pub struct DropboxContentHasher { 39 | overall_hasher: Sha256, 40 | block_hasher: Sha256, 41 | block_pos: usize, 42 | } 43 | 44 | impl DropboxContentHasher { 45 | pub fn new() -> Self { 46 | DropboxContentHasher { 47 | overall_hasher: Sha256::new(), 48 | block_hasher: Sha256::new(), 49 | block_pos: 0, 50 | } 51 | } 52 | } 53 | 54 | impl Default for DropboxContentHasher { 55 | fn default() -> Self { Self::new() } 56 | } 57 | 58 | impl Digest for DropboxContentHasher { 59 | type OutputSize = ::OutputSize; 60 | type BlockSize = ::BlockSize; 61 | 62 | fn input(&mut self, mut input: &[u8]) { 63 | while input.len() > 0 { 64 | if self.block_pos == BLOCK_SIZE { 65 | self.overall_hasher.input(self.block_hasher.result().as_slice()); 66 | self.block_hasher = Sha256::new(); 67 | self.block_pos = 0; 68 | } 69 | 70 | let space_in_block = BLOCK_SIZE - self.block_pos; 71 | let (head, rest) = input.split_at(std::cmp::min(input.len(), space_in_block)); 72 | self.block_hasher.input(head); 73 | 74 | self.block_pos += head.len(); 75 | input = rest; 76 | } 77 | } 78 | 79 | fn result(mut self) -> GenericArray { 80 | if self.block_pos > 0 { 81 | self.overall_hasher.input(self.block_hasher.result().as_slice()); 82 | } 83 | self.overall_hasher.result() 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /rust/src/run_tests.rs: -------------------------------------------------------------------------------- 1 | //! A command-line tool that runs the `DropboxContentHasher` tests. 2 | 3 | extern crate digest; 4 | extern crate rand; 5 | extern crate dropbox_content_hasher; 6 | extern crate sha2; 7 | 8 | use digest::Digest; 9 | use std::io::Write as io_Write; 10 | use rand::Rng; 11 | use dropbox_content_hasher::{DropboxContentHasher, BLOCK_SIZE}; 12 | 13 | fn main() { 14 | let mut args = std::env::args(); 15 | args.next().unwrap(); // Remove name of binary. 16 | if args.len() != 0 { 17 | writeln!(&mut std::io::stderr(), "No arguments expected; got {}.", args.len()).unwrap(); 18 | std::process::exit(1); 19 | } 20 | 21 | let b = BLOCK_SIZE; 22 | 23 | let tests: &[&[usize]] = &[ 24 | &[0], 25 | &[100], 26 | &[100, 10], 27 | &[b-1], 28 | &[b], 29 | &[b+1], 30 | 31 | &[b-2, 1], 32 | &[b-2, 2], 33 | &[b-2, 3], 34 | 35 | &[b-2, b+1], 36 | &[b-2, b+2], 37 | &[b-2, b+3], 38 | 39 | &[5, 5, 5], 40 | &[5, 5, 5, b], 41 | &[5, 5, 5, 3*b], 42 | &[5, 5, 5, 3*b, 5, 5, 5, 3*b], 43 | ]; 44 | 45 | let longest_length = tests.iter().fold(0, |m, x| std::cmp::max(m, x.iter().sum())); 46 | 47 | println!("generating random data"); 48 | let mut data: Box<[u8]> = vec![0; longest_length].into_boxed_slice(); 49 | rand::ChaChaRng::new_unseeded().fill_bytes(data.as_mut()); 50 | 51 | for &test in tests { 52 | let passed = check(data.as_ref(), test); 53 | if !passed { 54 | std::process::exit(2); 55 | } 56 | } 57 | println!("all passed"); 58 | } 59 | 60 | fn reference_hasher(data: &[u8]) -> String { 61 | let mut overall_hasher = sha2::Sha256::new(); 62 | for chunk in data.chunks(BLOCK_SIZE) { 63 | let mut block_hasher = sha2::Sha256::new(); 64 | block_hasher.input(chunk); 65 | overall_hasher.input(block_hasher.result().as_slice()); 66 | } 67 | return format!("{:x}", overall_hasher.result()); 68 | } 69 | 70 | fn check(data: &[u8], chunk_sizes: &[usize]) -> bool { 71 | println!("checking {:?}", chunk_sizes); 72 | 73 | let mut hasher = DropboxContentHasher::new(); 74 | 75 | let mut input = data; 76 | let mut total_length = 0; 77 | for chunk_size in chunk_sizes.iter().cloned() { 78 | let (chunk, rest) = input.split_at(chunk_size); 79 | input = rest; 80 | hasher.input(chunk); 81 | total_length += chunk_size; 82 | } 83 | 84 | let result = format!("{:x}", hasher.result()); 85 | let reference = reference_hasher(data.split_at(total_length).0); 86 | 87 | let passed = result == reference; 88 | if !passed { 89 | println!("- FAILED: {:?}, {:?}", reference, result) 90 | } 91 | passed 92 | } 93 | -------------------------------------------------------------------------------- /python/run_tests.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | 3 | # A command-line progam that runs the dropbox_content_hasher tests. 4 | 5 | import hashlib 6 | import os 7 | import six 8 | import sys 9 | 10 | from dropbox_content_hasher import DropboxContentHasher, StreamHasher 11 | 12 | def reference_hasher(data): 13 | """ 14 | A simpler implementation, used solely to test the more complicated one. 15 | """ 16 | assert isinstance(data, six.binary_type), ( 17 | "Expecting a byte string, got {!r}".format(data)) 18 | block_hashes = (hashlib.sha256(data[i:i+DropboxContentHasher.BLOCK_SIZE]).digest() 19 | for i in six.moves.xrange(0, len(data), DropboxContentHasher.BLOCK_SIZE)) 20 | return hashlib.sha256(b''.join(block_hashes)).hexdigest() 21 | 22 | def check(data, chunk_sizes): 23 | print("checking {!r}".format(chunk_sizes)) 24 | hashers = [DropboxContentHasher()] 25 | 26 | read_hasher = DropboxContentHasher() 27 | read_stream = StreamHasher(six.BytesIO(data), read_hasher) 28 | 29 | write_hasher = DropboxContentHasher() 30 | write_target = six.BytesIO() 31 | write_stream = StreamHasher(write_target, write_hasher) 32 | 33 | pos = 0 34 | for chunk_size in chunk_sizes: 35 | chunk = data[pos:pos+chunk_size] 36 | pos += chunk_size 37 | 38 | hashers.append(hashers[0].copy()) 39 | for hasher in hashers: 40 | hasher.update(chunk) 41 | 42 | write_stream.write(chunk) 43 | read_chunk = read_stream.read(len(chunk)) 44 | assert read_chunk == chunk 45 | 46 | written = write_target.getvalue() 47 | assert written == data, (len(written), len(data)) 48 | 49 | results = [hasher.hexdigest() for hasher in hashers + [read_hasher, write_hasher]] 50 | reference = reference_hasher(data) 51 | 52 | passed = all(result == reference for result in results) 53 | if not passed: 54 | print("- FAILED: {!r}, {!r}".format(reference, results)) 55 | return passed 56 | 57 | def main(): 58 | args = sys.argv[1:] 59 | assert len(args) == 0, "No arguments expected; got {!r}.".format(args) 60 | 61 | B = DropboxContentHasher.BLOCK_SIZE 62 | 63 | tests = [ 64 | [0], 65 | [100], 66 | [100, 10], 67 | [B-1], 68 | [B], 69 | [B+1], 70 | 71 | [B-2, 1], 72 | [B-2, 2], 73 | [B-2, 3], 74 | 75 | [B-2, B+1], 76 | [B-2, B+2], 77 | [B-2, B+3], 78 | 79 | [5, 5, 5], 80 | [5, 5, 5, B], 81 | [5, 5, 5, 3*B], 82 | [5, 5, 5, 3*B, 5, 5, 5, 3*B], 83 | ] 84 | 85 | longest_length = 0 86 | for test in tests: 87 | longest_length = max(longest_length, sum(test)) 88 | 89 | print("generating random data") 90 | data = os.urandom(longest_length) 91 | 92 | for test in tests: 93 | passed = check(data[:sum(test)], test) 94 | if not passed: 95 | sys.exit(2) 96 | 97 | print("all passed") 98 | 99 | if __name__ == '__main__': 100 | main() 101 | -------------------------------------------------------------------------------- /js-node/dropbox-content-hasher.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | /** 4 | * Computes a hash using the same algorithm that the Dropbox API uses for the 5 | * the "content_hash" metadata field. 6 | * 7 | * The `digest()` method returns a raw binary representation of the hash. 8 | * The "content_hash" field in the Dropbox API is a hexadecimal-encoded version 9 | * of the digest. 10 | * 11 | * Example: 12 | * 13 | * const fs = require('fs'); 14 | * const dch = require('dropbox-content-hasher'); 15 | * 16 | * const hasher = dch.create(); 17 | * const f = fs.createReadStream('some-file'); 18 | * f.on('data', function(buf) { 19 | * hasher.update(buf); 20 | * }); 21 | * f.on('end', function(err) { 22 | * const hexDigest = hasher.digest('hex'); 23 | * console.log(hexDigest); 24 | * }); 25 | * f.on('error', function(err) { 26 | * console.error("Error reading from file: " + err); 27 | * process.exit(1); 28 | * }); 29 | */ 30 | 31 | const crypto = require('crypto'); 32 | 33 | const BLOCK_SIZE = 4 * 1024 * 1024; 34 | 35 | function DropboxContentHasher(overallHasher, blockHasher, blockPos) { 36 | this._overallHasher = overallHasher 37 | this._blockHasher = blockHasher 38 | this._blockPos = blockPos 39 | } 40 | 41 | DropboxContentHasher.prototype.update = function(data, inputEncoding) { 42 | if (this._overallHasher === null) { 43 | throw new AssertionError( 44 | "can't use this object anymore; you already called digest()"); 45 | } 46 | 47 | if (!Buffer.isBuffer(data)) { 48 | if (inputEncoding !== undefined && 49 | inputEncoding !== 'utf8' && inputEncoding !== 'ascii' && inputEncoding !== 'latin1') { 50 | // The docs for the standard hashers say they only accept these three encodings. 51 | throw new Error("Invalid 'inputEncoding': " + JSON.stringify(inputEncoding)); 52 | } 53 | data = Buffer.from(data, inputEncoding); 54 | } 55 | 56 | let offset = 0; 57 | while (offset < data.length) { 58 | if (this._blockPos === BLOCK_SIZE) { 59 | this._overallHasher.update(this._blockHasher.digest()); 60 | this._blockHasher = crypto.createHash('sha256'); 61 | this._blockPos = 0; 62 | } 63 | 64 | let spaceInBlock = BLOCK_SIZE - this._blockPos; 65 | let inputPartEnd = Math.min(data.length, offset+spaceInBlock); 66 | let inputPartLength = inputPartEnd - offset; 67 | this._blockHasher.update(data.slice(offset, inputPartEnd)); 68 | 69 | this._blockPos += inputPartLength; 70 | offset = inputPartEnd; 71 | } 72 | } 73 | 74 | DropboxContentHasher.prototype.digest = function(encoding) { 75 | if (this._overallHasher === null) { 76 | throw new AssertionError( 77 | "can't use this object anymore; you already called digest()"); 78 | } 79 | 80 | if (this._blockPos > 0) { 81 | this._overallHasher.update(this._blockHasher.digest()); 82 | this._blockHasher = null; 83 | } 84 | let r = this._overallHasher.digest(encoding); 85 | this._overallHasher = null; // Make sure we can't use this object anymore. 86 | return r; 87 | } 88 | 89 | exports.BLOCK_SIZE = BLOCK_SIZE; 90 | exports.create = function() { 91 | return new DropboxContentHasher(crypto.createHash('sha256'), crypto.createHash('sha256'), 0); 92 | } 93 | -------------------------------------------------------------------------------- /csharp/src/DropboxContentHasher.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Security.Cryptography; 3 | 4 | /// 5 | /// Computes a hash using the same algorithm that the Dropbox API uses for the 6 | /// the "content_hash" metadata field. 7 | /// 8 | /// 9 | /// 10 | /// The {@link #digest()} method returns a raw binary representation of the hash. 11 | /// The "content_hash" field in the Dropbox API is a hexadecimal-encoded version 12 | /// of the digest. 13 | /// 14 | /// 15 | /// 16 | /// var hasher = new DropboxContentHasher(); 17 | /// byte[] buf = new byte[1024]; 18 | /// using (var file = File.OpenRead("some-file")) 19 | /// { 20 | /// while (true) 21 | /// { 22 | /// int n = file.Read(buf, 0, buf.Length); 23 | /// if (n <= 0) break; // EOF 24 | /// hasher.TransformBlock(buf, 0, n, buf, 0); 25 | /// } 26 | /// } 27 | /// 28 | /// hasher.TransformFinalBlock(Array.Empty(), 0, 0); 29 | /// string hexHash = DropboxContentHasher.ToHex(hasher.Hash); 30 | /// Console.WriteLine(hexHash); 31 | /// 32 | public class DropboxContentHasher : HashAlgorithm 33 | { 34 | private SHA256 overallHasher; 35 | private SHA256 blockHasher; 36 | private int blockPos = 0; 37 | 38 | public const int BLOCK_SIZE = 4 * 1024 * 1024; 39 | 40 | public DropboxContentHasher() : this(SHA256.Create(), SHA256.Create(), 0) {} 41 | 42 | public DropboxContentHasher(SHA256 overallHasher, SHA256 blockHasher, int blockPos) 43 | { 44 | this.overallHasher = overallHasher; 45 | this.blockHasher = blockHasher; 46 | this.blockPos = blockPos; 47 | } 48 | 49 | public override int HashSize { get { return overallHasher.HashSize; } } 50 | 51 | protected override void HashCore(byte[] input, int offset, int len) 52 | { 53 | int inputEnd = offset + len; 54 | while (offset < inputEnd) { 55 | if (blockPos == BLOCK_SIZE) { 56 | FinishBlock(); 57 | } 58 | 59 | int spaceInBlock = BLOCK_SIZE - this.blockPos; 60 | int inputPartEnd = Math.Min(inputEnd, offset+spaceInBlock); 61 | int inputPartLength = inputPartEnd - offset; 62 | blockHasher.TransformBlock(input, offset, inputPartLength, input, offset); 63 | 64 | blockPos += inputPartLength; 65 | offset += inputPartLength; 66 | } 67 | } 68 | 69 | protected override byte[] HashFinal() 70 | { 71 | if (blockPos > 0) { 72 | FinishBlock(); 73 | } 74 | overallHasher.TransformFinalBlock(Array.Empty(), 0, 0); 75 | return overallHasher.Hash; 76 | } 77 | 78 | public override void Initialize() 79 | { 80 | blockHasher.Initialize(); 81 | overallHasher.Initialize(); 82 | blockPos = 0; 83 | } 84 | 85 | private void FinishBlock() 86 | { 87 | blockHasher.TransformFinalBlock(Array.Empty(), 0, 0); 88 | byte[] blockHash = blockHasher.Hash; 89 | blockHasher.Initialize(); 90 | 91 | overallHasher.TransformBlock(blockHash, 0, blockHash.Length, blockHash, 0); 92 | blockPos = 0; 93 | } 94 | 95 | private const string HEX_DIGITS = "0123456789abcdef"; 96 | 97 | /// 98 | /// A convenience method to convert a byte array into a hexadecimal string. 99 | /// 100 | public static string ToHex(byte[] data) 101 | { 102 | var r = new System.Text.StringBuilder(); 103 | foreach (byte b in data) { 104 | r.Append(HEX_DIGITS[(b >> 4)]); 105 | r.Append(HEX_DIGITS[(b & 0xF)]); 106 | } 107 | return r.ToString(); 108 | } 109 | } -------------------------------------------------------------------------------- /csharp/src/RunTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Security.Cryptography; 3 | 4 | /// 5 | /// A command-line tool that runs the DropboxContentHasher tests. 6 | /// 7 | public class RunTests 8 | { 9 | public static void SubMain(String[] args) 10 | { 11 | if (args.Length > 0) { 12 | Console.WriteLine("The run-tests sub-command expects zero arguments; got " + args.Length + "."); 13 | Environment.Exit(1); return; 14 | } 15 | 16 | int B = DropboxContentHasher.BLOCK_SIZE; 17 | 18 | int[][] tests = { 19 | new[] {0}, 20 | new[] {100}, 21 | new[] {100, 10}, 22 | new[] {B-1}, 23 | new[] {B}, 24 | new[] {B+1}, 25 | 26 | new[] {B-2, 1}, 27 | new[] {B-2, 2}, 28 | new[] {B-2, 3}, 29 | 30 | new[] {B-2, B+1}, 31 | new[] {B-2, B+2}, 32 | new[] {B-2, B+3}, 33 | 34 | new[] {5, 5, 5}, 35 | new[] {5, 5, 5, B}, 36 | new[] {5, 5, 5, 3*B}, 37 | new[] {5, 5, 5, 3*B, 5, 5, 5, 3*B}, 38 | }; 39 | 40 | int longestLength = 0; 41 | foreach (var test in tests) { 42 | longestLength = Math.Max(longestLength, Sum(test)); 43 | } 44 | 45 | Console.WriteLine("generating random data"); 46 | byte[] data = new byte[longestLength]; 47 | new Random(0).NextBytes(data); 48 | 49 | foreach (var test in tests) { 50 | bool passed = Check(data, test); 51 | if (!passed) { 52 | Environment.Exit(1); return; 53 | } 54 | } 55 | 56 | Console.WriteLine("all passed"); 57 | } 58 | 59 | 60 | /// 61 | /// A simple implementation, used solely to test the more complicated one. 62 | /// 63 | public static byte[] ReferenceHasher(byte[] input, int length) 64 | { 65 | int offset = 0; 66 | int remaining = length; 67 | 68 | var overallHasher = SHA256.Create(); 69 | var blockHasher = SHA256.Create(); 70 | 71 | while (remaining > 0) { 72 | int partSize = Math.Min(DropboxContentHasher.BLOCK_SIZE, remaining); 73 | blockHasher.TransformFinalBlock((byte[])input.Clone(), offset, partSize); 74 | byte[] d = blockHasher.Hash; 75 | blockHasher.Initialize(); 76 | overallHasher.TransformBlock(d, 0, d.Length, d, 0); 77 | 78 | remaining -= partSize; 79 | offset += partSize; 80 | } 81 | 82 | overallHasher.TransformFinalBlock(Array.Empty(), 0, 0); 83 | return overallHasher.Hash; 84 | } 85 | 86 | public static bool Check(byte[] data, int[] chunkLengths) 87 | { 88 | Console.WriteLine("checking [{0}]", string.Join(", ", chunkLengths)); 89 | 90 | var hasher = new DropboxContentHasher(); 91 | int totalLength = Sum(chunkLengths); 92 | 93 | int offset = 0; 94 | foreach (int chunkLength in chunkLengths) { 95 | hasher.TransformBlock(data, offset, chunkLength, data, offset); 96 | offset += chunkLength; 97 | } 98 | 99 | hasher.TransformFinalBlock(Array.Empty(), 0, 0); 100 | var result = DropboxContentHasher.ToHex(hasher.Hash); 101 | var reference = DropboxContentHasher.ToHex(ReferenceHasher(data, totalLength)); 102 | 103 | bool passed = (result == reference); 104 | if (!passed) { 105 | Console.WriteLine("- FAILED: " + reference + ", " + result); 106 | } 107 | return passed; 108 | } 109 | 110 | private static int Sum(int[] a) 111 | { 112 | int sum = 0; 113 | foreach (int i in a) { 114 | sum += i; 115 | } 116 | return sum; 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /rust/Cargo.lock: -------------------------------------------------------------------------------- 1 | [root] 2 | name = "dropbox-content-hasher" 3 | version = "0.1.0" 4 | dependencies = [ 5 | "digest 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 6 | "generic-array 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", 7 | "rand 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", 8 | "sha2 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", 9 | ] 10 | 11 | [[package]] 12 | name = "byte-tools" 13 | version = "0.1.3" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | 16 | [[package]] 17 | name = "digest" 18 | version = "0.4.0" 19 | source = "registry+https://github.com/rust-lang/crates.io-index" 20 | dependencies = [ 21 | "generic-array 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", 22 | ] 23 | 24 | [[package]] 25 | name = "digest-buffer" 26 | version = "0.2.0" 27 | source = "registry+https://github.com/rust-lang/crates.io-index" 28 | dependencies = [ 29 | "byte-tools 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", 30 | "generic-array 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", 31 | ] 32 | 33 | [[package]] 34 | name = "fake-simd" 35 | version = "0.1.2" 36 | source = "registry+https://github.com/rust-lang/crates.io-index" 37 | 38 | [[package]] 39 | name = "generic-array" 40 | version = "0.6.0" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | dependencies = [ 43 | "nodrop 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", 44 | "typenum 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)", 45 | ] 46 | 47 | [[package]] 48 | name = "libc" 49 | version = "0.2.20" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | 52 | [[package]] 53 | name = "nodrop" 54 | version = "0.1.8" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | dependencies = [ 57 | "odds 0.2.25 (registry+https://github.com/rust-lang/crates.io-index)", 58 | ] 59 | 60 | [[package]] 61 | name = "odds" 62 | version = "0.2.25" 63 | source = "registry+https://github.com/rust-lang/crates.io-index" 64 | 65 | [[package]] 66 | name = "rand" 67 | version = "0.3.15" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | dependencies = [ 70 | "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)", 71 | ] 72 | 73 | [[package]] 74 | name = "sha2" 75 | version = "0.4.2" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | dependencies = [ 78 | "byte-tools 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", 79 | "digest 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 80 | "digest-buffer 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", 81 | "fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", 82 | "generic-array 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", 83 | ] 84 | 85 | [[package]] 86 | name = "typenum" 87 | version = "1.5.2" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | 90 | [metadata] 91 | "checksum byte-tools 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0919189ba800c7ffe8778278116b7e0de3905ab81c72abb69c85cbfef7991279" 92 | "checksum digest 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "41a0f307b67d9f0e57edc00804d3146f9f889fe8b2422825566c8e8dd2b5733c" 93 | "checksum digest-buffer 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "79472b4b47364a1f1c23122d5b5e481b4657714c61617ea91daf6f57549b5f00" 94 | "checksum fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" 95 | "checksum generic-array 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7229d82657e79be00d5f2a110a973ab5340681b945cf1bc022be7cfebf2dc00c" 96 | "checksum libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)" = "684f330624d8c3784fb9558ca46c4ce488073a8d22450415c5eb4f4cfb0d11b5" 97 | "checksum nodrop 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "0dbbadd3f4c98dea0bd3d9b4be4c0cdaf1ab57035cb2e41fce3983db5add7cc5" 98 | "checksum odds 0.2.25 (registry+https://github.com/rust-lang/crates.io-index)" = "c3df9b730298cea3a1c3faa90b7e2f9df3a9c400d0936d6015e6165734eefcba" 99 | "checksum rand 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "022e0636ec2519ddae48154b028864bdce4eaf7d35226ab8e65c611be97b189d" 100 | "checksum sha2 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "25405172e8d8325cbbb72af68adc28931dacd1482d067facc46ac808f48df55c" 101 | "checksum typenum 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7242a7857c31d13620847d78af39ecac8d6c90aac23286e84aefe624c77c9c14" 102 | -------------------------------------------------------------------------------- /python/dropbox_content_hasher.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | 3 | import hashlib 4 | import six 5 | 6 | 7 | class DropboxContentHasher(object): 8 | """ 9 | Computes a hash using the same algorithm that the Dropbox API uses for the 10 | the "content_hash" metadata field. 11 | 12 | The digest() method returns a raw binary representation of the hash. The 13 | hexdigest() convenience method returns a hexadecimal-encoded version, which 14 | is what the "content_hash" metadata field uses. 15 | 16 | This class has the same interface as the hashers in the standard 'hashlib' 17 | package. 18 | 19 | Example: 20 | 21 | hasher = DropboxContentHasher() 22 | with open('some-file', 'rb') as f: 23 | while True: 24 | chunk = f.read(1024) # or whatever chunk size you want 25 | if len(chunk) == 0: 26 | break 27 | hasher.update(chunk) 28 | print(hasher.hexdigest()) 29 | """ 30 | 31 | BLOCK_SIZE = 4 * 1024 * 1024 32 | 33 | def __init__(self): 34 | self._overall_hasher = hashlib.sha256() 35 | self._block_hasher = hashlib.sha256() 36 | self._block_pos = 0 37 | 38 | self.digest_size = self._overall_hasher.digest_size 39 | # hashlib classes also define 'block_size', but I don't know how people use that value 40 | 41 | def update(self, new_data): 42 | if self._overall_hasher is None: 43 | raise AssertionError( 44 | "can't use this object anymore; you already called digest()") 45 | 46 | assert isinstance(new_data, six.binary_type), ( 47 | "Expecting a byte string, got {!r}".format(new_data)) 48 | 49 | new_data_pos = 0 50 | while new_data_pos < len(new_data): 51 | if self._block_pos == self.BLOCK_SIZE: 52 | self._overall_hasher.update(self._block_hasher.digest()) 53 | self._block_hasher = hashlib.sha256() 54 | self._block_pos = 0 55 | 56 | space_in_block = self.BLOCK_SIZE - self._block_pos 57 | part = new_data[new_data_pos:(new_data_pos+space_in_block)] 58 | self._block_hasher.update(part) 59 | 60 | self._block_pos += len(part) 61 | new_data_pos += len(part) 62 | 63 | def _finish(self): 64 | if self._overall_hasher is None: 65 | raise AssertionError( 66 | "can't use this object anymore; you already called digest() or hexdigest()") 67 | 68 | if self._block_pos > 0: 69 | self._overall_hasher.update(self._block_hasher.digest()) 70 | self._block_hasher = None 71 | h = self._overall_hasher 72 | self._overall_hasher = None # Make sure we can't use this object anymore. 73 | return h 74 | 75 | def digest(self): 76 | return self._finish().digest() 77 | 78 | def hexdigest(self): 79 | return self._finish().hexdigest() 80 | 81 | def copy(self): 82 | c = DropboxContentHasher.__new__(DropboxContentHasher) 83 | c._overall_hasher = self._overall_hasher.copy() 84 | c._block_hasher = self._block_hasher.copy() 85 | c._block_pos = self._block_pos 86 | return c 87 | 88 | 89 | class StreamHasher(object): 90 | """ 91 | A wrapper around a file-like object (either for reading or writing) 92 | that hashes everything that passes through it. Can be used with 93 | DropboxContentHasher or any 'hashlib' hasher. 94 | 95 | Example: 96 | 97 | hasher = DropboxContentHasher() 98 | with open('some-file', 'rb') as f: 99 | wrapped_f = StreamHasher(f, hasher) 100 | response = some_api_client.upload(wrapped_f) 101 | 102 | locally_computed = hasher.hexdigest() 103 | assert response.content_hash == locally_computed 104 | """ 105 | 106 | def __init__(self, f, hasher): 107 | self._f = f 108 | self._hasher = hasher 109 | 110 | def close(self): 111 | return self._f.close() 112 | 113 | def flush(self): 114 | return self._f.flush() 115 | 116 | def fileno(self): 117 | return self._f.fileno() 118 | 119 | def tell(self): 120 | return self._f.tell() 121 | 122 | def read(self, *args): 123 | b = self._f.read(*args) 124 | self._hasher.update(b) 125 | return b 126 | 127 | def write(self, b): 128 | self._hasher.update(b) 129 | return self._f.write(b) 130 | 131 | def next(self): 132 | b = self._f.next() 133 | self._hasher.update(b) 134 | return b 135 | 136 | def readline(self, *args): 137 | b = self._f.readline(*args) 138 | self._hasher.update(b) 139 | return b 140 | 141 | def readlines(self, *args): 142 | bs = self._f.readlines(*args) 143 | for b in bs: 144 | self._hasher.update(b) 145 | return b 146 | -------------------------------------------------------------------------------- /java/src/RunTests.java: -------------------------------------------------------------------------------- 1 | import java.nio.ByteBuffer; 2 | import java.security.MessageDigest; 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.Random; 6 | 7 | /** 8 | * A command-line tool that runs the DropboxContentHasher tests. 9 | */ 10 | public class RunTests 11 | { 12 | /** 13 | * A simple implementation, used solely to test the more complicated one. 14 | */ 15 | public static byte[] referenceHasher(byte[] input, int length) 16 | { 17 | int offset = 0; 18 | int remaining = length; 19 | 20 | MessageDigest overallHasher = DropboxContentHasher.newSha256Hasher(); 21 | MessageDigest blockHasher = DropboxContentHasher.newSha256Hasher(); 22 | 23 | while (remaining > 0) { 24 | int partSize = Math.min(DropboxContentHasher.BLOCK_SIZE, remaining); 25 | blockHasher.update(input, offset, partSize); 26 | byte[] d = blockHasher.digest(); 27 | overallHasher.update(d); 28 | 29 | remaining -= partSize; 30 | offset += partSize; 31 | } 32 | 33 | return overallHasher.digest(); 34 | } 35 | 36 | public static boolean check(byte[] data, int[] chunkLengths) 37 | { 38 | System.out.println("checking " + Arrays.toString(chunkLengths)); 39 | 40 | MessageDigest byteArrayHasher = new DropboxContentHasher(); 41 | MessageDigest byteBufferHasher = new DropboxContentHasher(); 42 | MessageDigest byteHasher = new DropboxContentHasher(); 43 | 44 | ArrayList clones = new ArrayList(); 45 | 46 | int totalLength = 0; 47 | for (int chunkLength : chunkLengths) { 48 | totalLength += chunkLength; 49 | } 50 | 51 | int offset = 0; 52 | for (int chunkLength : chunkLengths) { 53 | clones.add(clone(byteArrayHasher)); 54 | 55 | byteArrayHasher.update(data, offset, chunkLength); 56 | byteBufferHasher.update(ByteBuffer.wrap(data, offset, chunkLength)); 57 | for (int i = 0; i < chunkLength; i++) { 58 | byteHasher.update(data[offset+i]); 59 | } 60 | 61 | for (MessageDigest clone : clones) { 62 | clone.update(data, offset, chunkLength); 63 | } 64 | 65 | offset += chunkLength; 66 | } 67 | 68 | ArrayList allDigests = new ArrayList(); 69 | allDigests.add(byteArrayHasher); 70 | allDigests.add(byteBufferHasher); 71 | allDigests.add(byteHasher); 72 | allDigests.addAll(clones); 73 | 74 | String reference = hex(referenceHasher(data, totalLength)); 75 | boolean passed = true; 76 | 77 | ArrayList results = new ArrayList(); 78 | for (MessageDigest digest : allDigests) { 79 | String result = hex(digest.digest()); 80 | results.add(result); 81 | if (!result.equals(reference)) { 82 | passed = false; 83 | } 84 | } 85 | 86 | if (!passed) { 87 | System.out.println("- FAILED: " + reference + ", " + results); 88 | } 89 | 90 | return passed; 91 | } 92 | 93 | public static void main(String[] args) 94 | { 95 | if (args.length > 0) { 96 | System.err.println("No arguments expected; got " + args.length + "."); 97 | System.exit(1); return; 98 | } 99 | 100 | int B = DropboxContentHasher.BLOCK_SIZE; 101 | 102 | int[][] tests = { 103 | {0}, 104 | {100}, 105 | {100, 10}, 106 | {B-1}, 107 | {B}, 108 | {B+1}, 109 | 110 | {B-2, 1}, 111 | {B-2, 2}, 112 | {B-2, 3}, 113 | 114 | {B-2, B+1}, 115 | {B-2, B+2}, 116 | {B-2, B+3}, 117 | 118 | {5, 5, 5}, 119 | {5, 5, 5, B}, 120 | {5, 5, 5, 3*B}, 121 | {5, 5, 5, 3*B, 5, 5, 5, 3*B}, 122 | }; 123 | 124 | int longestLength = 0; 125 | for (int[] test : tests) { 126 | longestLength = Math.max(longestLength, sum(test)); 127 | } 128 | 129 | System.out.println("generating random data"); 130 | byte[] data = new byte[longestLength]; 131 | new Random(0).nextBytes(data); 132 | 133 | for (int[] test : tests) { 134 | boolean passed = check(data, test); 135 | if (!passed) { 136 | System.exit(1); return; 137 | } 138 | } 139 | 140 | System.out.println("all passed"); 141 | } 142 | 143 | private static int sum(int[] a) 144 | { 145 | int sum = 0; 146 | for (int i : a) { 147 | sum += i; 148 | } 149 | return sum; 150 | } 151 | 152 | public static MessageDigest clone(MessageDigest v) 153 | { 154 | try { 155 | return (MessageDigest) v.clone(); 156 | } catch (CloneNotSupportedException ex) { 157 | throw new AssertionError("Couldn't clone()", ex); 158 | } 159 | } 160 | 161 | static final char[] HEX_DIGITS = new char[]{ 162 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 163 | 'a', 'b', 'c', 'd', 'e', 'f'}; 164 | 165 | public static String hex(byte[] data) 166 | { 167 | char[] buf = new char[2*data.length]; 168 | int i = 0; 169 | for (byte b : data) { 170 | buf[i++] = HEX_DIGITS[(b & 0xf0) >>> 4]; 171 | buf[i++] = HEX_DIGITS[b & 0x0f]; 172 | } 173 | return new String(buf); 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /java/src/DropboxContentHasher.java: -------------------------------------------------------------------------------- 1 | import java.nio.ByteBuffer; 2 | import java.security.DigestException; 3 | import java.security.MessageDigest; 4 | import java.security.NoSuchAlgorithmException; 5 | 6 | /** 7 | * Computes a hash using the same algorithm that the Dropbox API uses for the 8 | * the "content_hash" metadata field. 9 | * 10 | *

11 | * The {@link #digest()} method returns a raw binary representation of the hash. 12 | * The "content_hash" field in the Dropbox API is a hexadecimal-encoded version 13 | * of the digest. 14 | *

15 | * 16 | *

17 | * Example: 18 | *

19 | * 20 | *
 21 |  * MessageDigest hasher = new DropboxContentHasher();
 22 |  * byte[] buf = new byte[1024];
 23 |  * InputStream in = new FileInputStream("some-file");
 24 |  * try {
 25 |  *     while (true) {
 26 |  *         int n = in.read(buf);
 27 |  *         if (n < 0) break;  // EOF
 28 |  *         hasher.update(buf, 0, n);
 29 |  *     }
 30 |  * }
 31 |  * finally {
 32 |  *     in.close();
 33 |  * }
 34 |  *
 35 |  * byte[] rawHash = hasher.digest();
 36 |  * System.out.println(hex(rawHash));
 37 |  *     // Assuming 'hex' is a method that converts a byte[] to
 38 |  *     // a hexadecimal-encoded String
 39 |  * 
40 | * 41 | *

42 | * If you need to hash something as it passes through a stream, you can use the 43 | * {@link java.security.DigestInputStream} or {@code java.security.DigestOutputStream} helpers. 44 | *

45 | * 46 | *
 47 |  * MessageDigest hasher = new DropboxContentHasher();
 48 |  * InputStream in = new FileInputStream("some-file");
 49 |  * UploadResponse r;
 50 |  * try {
 51 |  *     r = someApiClient.upload(new DigestInputStream(in, hasher)));
 52 |  * }
 53 |  * finally {
 54 |  *     in.close();
 55 |  * }
 56 |  *
 57 |  * String locallyComputed = hex(hasher.digest());
 58 |  * assert r.contentHash.equals(locallyComputed);
 59 |  * 
60 | */ 61 | public final class DropboxContentHasher extends MessageDigest implements Cloneable 62 | { 63 | private MessageDigest overallHasher; 64 | private MessageDigest blockHasher; 65 | private int blockPos = 0; 66 | 67 | public static final int BLOCK_SIZE = 4 * 1024 * 1024; 68 | 69 | public DropboxContentHasher() 70 | { 71 | this(newSha256Hasher(), newSha256Hasher(), 0); 72 | } 73 | 74 | private DropboxContentHasher(MessageDigest overallHasher, MessageDigest blockHasher, int blockPos) 75 | { 76 | super("Dropbox-Content-Hash"); 77 | this.overallHasher = overallHasher; 78 | this.blockHasher = blockHasher; 79 | this.blockPos = blockPos; 80 | } 81 | 82 | @Override 83 | protected void engineUpdate(byte input) 84 | { 85 | finishBlockIfFull(); 86 | 87 | blockHasher.update(input); 88 | blockPos += 1; 89 | } 90 | 91 | @Override 92 | protected int engineGetDigestLength() 93 | { 94 | return overallHasher.getDigestLength(); 95 | } 96 | 97 | @Override 98 | protected void engineUpdate(byte[] input, int offset, int len) 99 | { 100 | int inputEnd = offset + len; 101 | while (offset < inputEnd) { 102 | finishBlockIfFull(); 103 | 104 | int spaceInBlock = BLOCK_SIZE - this.blockPos; 105 | int inputPartEnd = Math.min(inputEnd, offset+spaceInBlock); 106 | int inputPartLength = inputPartEnd - offset; 107 | blockHasher.update(input, offset, inputPartLength); 108 | 109 | blockPos += inputPartLength; 110 | offset += inputPartLength; 111 | } 112 | } 113 | 114 | @Override 115 | protected void engineUpdate(ByteBuffer input) 116 | { 117 | int inputEnd = input.limit(); 118 | while (input.position() < inputEnd) { 119 | finishBlockIfFull(); 120 | 121 | int spaceInBlock = BLOCK_SIZE - this.blockPos; 122 | int inputPartEnd = Math.min(inputEnd, input.position()+spaceInBlock); 123 | int inputPartLength = inputPartEnd - input.position(); 124 | input.limit(inputPartEnd); 125 | blockHasher.update(input); 126 | 127 | blockPos += inputPartLength; 128 | input.position(inputPartEnd); 129 | } 130 | } 131 | 132 | @Override 133 | protected byte[] engineDigest() 134 | { 135 | finishBlockIfNonEmpty(); 136 | return overallHasher.digest(); 137 | } 138 | 139 | @Override 140 | protected int engineDigest(byte[] buf, int offset, int len) 141 | throws DigestException 142 | { 143 | finishBlockIfNonEmpty(); 144 | return overallHasher.digest(buf, offset, len); 145 | } 146 | 147 | @Override 148 | protected void engineReset() 149 | { 150 | this.overallHasher.reset(); 151 | this.blockHasher.reset(); 152 | this.blockPos = 0; 153 | } 154 | 155 | @Override 156 | public DropboxContentHasher clone() 157 | throws CloneNotSupportedException 158 | { 159 | DropboxContentHasher clone = (DropboxContentHasher) super.clone(); 160 | clone.overallHasher = (MessageDigest) clone.overallHasher.clone(); 161 | clone.blockHasher = (MessageDigest) clone.blockHasher.clone(); 162 | return clone; 163 | } 164 | 165 | private void finishBlock() 166 | { 167 | overallHasher.update(blockHasher.digest()); 168 | blockPos = 0; 169 | } 170 | 171 | private void finishBlockIfFull() 172 | { 173 | if (blockPos == BLOCK_SIZE) { 174 | finishBlock(); 175 | } 176 | } 177 | 178 | private void finishBlockIfNonEmpty() 179 | { 180 | if (blockPos > 0) { 181 | finishBlock(); 182 | } 183 | } 184 | 185 | static MessageDigest newSha256Hasher() 186 | { 187 | try { 188 | return MessageDigest.getInstance("SHA-256"); 189 | } 190 | catch (NoSuchAlgorithmException ex) { 191 | throw new AssertionError("Couldn't create SHA-256 hasher"); 192 | } 193 | } 194 | } 195 | --------------------------------------------------------------------------------