├── .gitignore ├── .travis.yml ├── README.md ├── dedoop.py ├── setup.cfg ├── setup.py ├── test-data ├── .keep ├── a.jpg ├── a.png ├── b.jpg └── c │ ├── a.jpg │ └── b.jpg └── test_dedoop.py /.gitignore: -------------------------------------------------------------------------------- 1 | test-data-deduped 2 | *.pyc 3 | .pytest_cache 4 | *.log 5 | .eggs 6 | dedoop.egg-info 7 | .env 8 | Pipfile* 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | branches: 3 | only: 4 | - master 5 | python: 6 | - '3.7' 7 | install: 8 | - python setup.py install 9 | script: 10 | - python setup.py test 11 | after_failure: 12 | - cat test.log 13 | env: 14 | global: 15 | - secure: Eav7dkMkbtXMC/1zC1PprJRlmiJI8wdXXUjIptUNPFDNJJuHsZowcMQilJNYrnkpuKkNRcB7djg3Y9d3cfR7HMp/d+AXouKzhMKHIWlIfTeb9p9RWtgwZudY7eilaE/gW90KUE3ePYZNNTMlcFOfAohFlCJspXx9yEC/N35KyxiCgoqq3Lo/5qi1+Cr3/LigCMfPmGgBL9hfVm7QPO6mOwq27MoNpY+kdpJfSzK+PpvszQYzpaTm1/Ld/yPjzDhT/nvlXNglCdVNKSC2+SgFB7mHDZMYVXnUr4V51PXqV4dpMngdSY6y+Ls1xv5V/R3uEcGuZme42stPpLHFg78oS6o2zMKiT6fNDGgM+O8yAc6uE14yoICHsBhjQ5vwt64LHYLVXQNtoZ91MGXSPTIe9j0CileBD59Z4eJyMO8y4NcHQSmpN+fu9DNa8Ibs9zveyd8OtoOOXSvt/+f/AodDbGGrXObdO7nmZKoRPdOas3qPDzv3hJcmgIt4HPRfUgffcjuFz8c1UllVUfKxWarwNl2+t3p7Livw/H9evq4B8Ni6wN7WGrxkjB5lacgCDT/kW66FRSiU61HXwuBmMYpMEugZbd/9PXQwlHJH1wMeYDJO1101NP+9PUOGLOuqxe1ahWoAV1FS1//oMfWWOMuRDIAaxBYZpWbPcYVlgrjkifw= 16 | - secure: adCFz/oIQPfSDniqqJkmtf/NULezEiTsjYHXnC51dHrczJ1DAUwgF27bSNyU7Ld3gIKISzPldgpW8aqyErAmz5O5q/D/s+PZMZYHiG0WGnT6Q+KV6w6QPjlJtr1qetQQXHcK2QAuSQC53UJNrYGhBliCQw8ZR4M3SAj4brFqOXgAWNo1Mxp3EaqxhoN6D+fscXzxylj6ykbX9kxbiRgq/d31+o+99P/t1vv5GuJzIPJQAvbEaDyz7kzU7LK//85SePrWADa4Xe0kYOSU7QSEM6m0kSB5/oX9izBJoM1tP6QlIszJs/IGEXGjVRCqM4Mrw94MFSpJPrJsGueSXc2qvS0ZzXmkh1c6vE5uubpl+clMC2+MSFx4Na6VCKszVeUPa6jlxMd3Xo1336DzfnWn2LDlJy0a7Fz/pFzG38ohRNgLymOBEVp6XykxMEcyiNbQLDuvl7Wdu4DO6FijKe+EkT7Iz+1C4d2GcnEzgAcINEWqT1odKB8syR2z2FJTpRtFOP8jMAH0ToQ/STgf6hB1tuaDi3AqDLdu/wlDtfeX3LsMvEJOi7NsB3T0F1jOE00LdbhfQxJG+VmAIT2te/B0VpyMZV8prrlAWshZKgfM6U4/TvLHR1CsEaBSDh5pome1GpkP7nSQJGG/UGtrX/HeQQFfmxtxr2w/9IBjf1oLRpk= 17 | - secure: dnNWkgQBkBWacGkOmgtrFOaBqotyylXX4TyRRn+JkDHOszZhDerupyKWhUFCnPTicE9cIeplJLq27cPR7yW31Jg6p61V+m6Pdoz2aPw01hZk5IHnOHF5jTr0SD9EcgHxPPGBAytDpWjVE1DpK7UvBohCtiIBEeh1QE6Ny6CdDd4tZ9wo3YTZCPW36vUUg9yEpPyxargx9jRaqpGNjDhbTHCH3D2nrjbUwn+SkMuBrwZ1lPxpXK++uLufeH0VAEetKxvj6fmqXQhhtgf9L72IYPUeK41uMqgzGrY5z74PaWzWHThlIHTAVbaB9rVadDl7Di9Mkdi6qEQ1FYEmFVicpkwR9iutddwnFtgOD+bacihe3iYBb0weQIMELWOMGpWP69WVRIwSH7a+ZVDPpqHQ4mTVD+S0DrFm3OzDUlRR5elV0n73SGmC9v4l6PIKO8hKci8Xm4vS9cNnY4om9+I6AA/Ce4CKi5R/Ul91MfxG+lRCx3TG2Qaa7unE7zNhYLWO7qXpdbbBdYtrwT/dFkdqJCvBmTnwHqhM3iOzeTKrwPeUtcAdztSPqKP1vw9UlGDh4B+bW0jC2AyU/ZBTD5RltSswARXKxFdmLLVQUYMP9jARjUZL9MgcicDxD90CnChsI11OhtjRvOPqg2rQibmeHxBZVPCtl7BtQ+Qhgw2uJLw= 18 | - secure: D5xIYYjRfK27J6e4eZ5vEHqt99fx5yOZIbkpTlB5ZP05qm8tUem2lOeMAh/Y2rLqwA8fSz1rpYVUeJYQMTosQqIKxeh+rWtg9OJVM0EoGXx1Sz99aAwbJ2ME3TPPe8OmCZ5mGDDCyf+0WmcvcUQfYJPu72PdyskYC8c9lEeQ3vtDk5J5YMhbbrs/7I3BGo4Few0/fJwClO2YqKZSzLrKddVKhn/HUPXdIRSIRa3nhyjM28gZEAr9q3kTG7OOGUHRAG29+SkMbYZxePzrVrnJ83B4DYUVIRZiYworc2cTFdCor7IcE4M3YiUK2PchCDybzjJkyMnRqzBI//dlddkioomfxUHpqGnfB7xwB6wfcbMFQTMJukX5OFpI9FSI8tMmAqhmciEShjacrqL5clahsds8TEB4CnbPBlFUDig08nl/zLL89LAUf/Awz7hVtla8wfXWfzt2WtjojsSLpzGhHVt9/88EfndMYLUuxYkLU0rNdM2L7ndJeseAlqIX94Lc1Kfy1Suika7N2oK9IJUUa75nrvopeW7Af76KC7nLKk+29kbysql2YTxrOaQNufDoyeNy8nMupTzMrdGFWTXc8AvN2qi6PjPVnTGZqDaC2Frsv4bgqDH5m8adxPCLz0C8RH40ZffpAfEMQY93pwp3UzTZjOOFm8vHX+k92o19/e8= 19 | - secure: A66JyEkLJVAMejFMH5G+PJaVfAHoZHDyPNoEebWpyudi2ok+dx2c2fnnnUS1c7DzZfh5MN6SyETsTtdkgWt4kbTmZZe1Q+7iOaZO6eXGgjsMJiLKZk4uZTt6c325W+mTQgpiFWJcW39qckR+NFCt+qPSy2BD8Fpq5zxLeNGJj814lOO/kT/pZntpMCJl9DXMLnPQlbpJET9RZ4rzvvzXyPa5xzk1ZhgPUsGjlxIfsc+lrNat4eMMCLKqxFo+jHaQ7qK3nsSlA7Jss19pP0ulvSvQ9YUuc30y/gomT31xUgS3PLS7p5EVGrdAadU7C9th1YPkBikgFd2RJ2Vay0bJktKdgVvL2TahDWB7cte/f45W0EX7Fxsyu3f7n2zGalEuF7hO6lW3p2bp7m5qx90tsR1Ye7l47XNSEy79xPNn1ZnwvwljeNbFrcrsqGi5+EiV+uNNztXLxza7Gdxm/6FQ/ihcbwM2cdvxpVGqm1RiqsYeAD1ZzAUskozmuEop62dRm79u2lMPmdpFxu/LjOIEianSm5U6PJydeeZmQGVBdE7/X7GPYYvKTEMYA2uXaSumz8bRGv37q85ycg1Zdq9mvmfNTSi/x38fXIT8mAVavEz7tF+hxhBM3t5RhmivAdC543aM9yNDns1e5/9UcglEEzr+/VgLdliuKn3uyC3l740= 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## dedoop 2 | 3 | [![Build Status](https://secure.travis-ci.org/edsu/dedoop.png)](http://travis-ci.org/edsu/dedoop) 4 | 5 | In [digital preservation] work you sometimes may find yourself accepting a disk 6 | or random assortment of files, and want to examine all of them looking for 7 | duplicates and copy them to a new location in a uniform way, while preserving 8 | the original paths as metadata to help you process the data. Ok, maybe this is a 9 | bit of niche use case, but this is what *dedoop* was created for. 10 | 11 | *dedoop* will recursively read a source directory of files and write them out to 12 | a new target directory or bucket in the cloud using the files's SHA256 checksum 13 | as the filename. If a given file occurs more than once in the source 14 | directory it will only be written once to the target location. File metadata 15 | such as the media type and original file name will be persisted in a JSON file 16 | that is output at the end of the process. In the case of writing to the cloud, 17 | object metadata will be used to store this information. 18 | 19 | ## Install 20 | 21 | Install Python 3 and: 22 | 23 | ``` 24 | % pip3 install dedoop 25 | ``` 26 | 27 | ## Usage 28 | 29 | ### Add to Storage 30 | 31 | To add a directory of data to the storage location you can: 32 | 33 | % dedoop add path/to/source path/to/target 34 | 35 | So for example if the source directory looks like this: 36 | 37 | source 38 | ├── a.jpg 39 | ├── a.png 40 | ├── b.jpg 41 | └── c 42 |    ├── a.jpg 43 |    └── b.jpg 44 | 45 | The resulting target could look like this (assuming the files of the same name 46 | had the same contents that hashed to these values): 47 | 48 | target 49 | ├── 1e89b90b5973baad2e6c3294ffe648ff53ab0b9d75188e9fbb8b38deb9ba3341.png 50 | ├── 45d257c93e59ec35187c6a34c8e62e72c3e9cfbb548984d6f6e8deb84bac41f4.jpg 51 | └── b6df8058fa818acfd91759edffa27e473f2308d5a6fca1e07a79189b95879953.jpg 52 | 53 | ## Add to the Cloud 54 | 55 | You can also write files to any cloud storage provider that is [supported] by [libcloud], 56 | such as Amazon S3, Google Cloud Storage, etc. 57 | 58 | ## Limit by File Extension 59 | 60 | If you like you can limit the types of files that are added by using the 61 | *--extensions* command line option and giving it a comma separated list of file 62 | extensions to include. All non-matching files (case insensitive) will be 63 | ignored. 64 | 65 | % dedoop add --extensions jpg,png path/to/source path/to/target 66 | 67 | ## List Cloud Files 68 | 69 | Its easy to list files on the file system. But its more difficult to see what's 70 | in the cloud--especially with the metadata dedoop has attached to each object. 71 | The *list* command will do that for you. 72 | 73 | % dedoop ls s3://my-storage-location/ 74 | 75 | ## Logging 76 | 77 | If you use *--verbose* you will see log messages on the console about what is 78 | happening. You can optionally send these messages to a log file of your choosing 79 | using the *--log* option. 80 | 81 | [digital preservation]: https://en.wikipedia.org/wiki/Digital_preservation 82 | [libcloud]: https://libcloud.readthedocs.io 83 | [supported]: https://libcloud.readthedocs.io/en/stable/storage/supported_providers.html 84 | -------------------------------------------------------------------------------- /dedoop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import re 5 | import csv 6 | import json 7 | import click 8 | import shutil 9 | import hashlib 10 | import logging 11 | import optparse 12 | 13 | from urllib.parse import urlparse 14 | from libcloud.storage.types import Provider, ContainerDoesNotExistError 15 | from libcloud.storage.providers import get_driver 16 | 17 | STORAGE_PROVIDERS = { 18 | 's3': Provider.S3, 19 | 'gs': Provider.GOOGLE_STORAGE 20 | } 21 | 22 | @click.group() 23 | @click.option('--debug/--no-debug', default=False) 24 | def cli(debug): 25 | click.echo('Debug mode is %s' % ('on' if debug else 'off')) 26 | 27 | @cli.command() 28 | def add(input_dir, output_dir): 29 | input_dir, output_dir = args 30 | db = Deduper() 31 | db.read(input_dir, extensions=opts.extensions, dotfiles=opts.dotfiles) 32 | db.write(output_dir) 33 | 34 | 35 | class Deduper(): 36 | 37 | def __init__(self, key=None, secret=None): 38 | self.db = {} 39 | self.key = key 40 | self.secret = secret 41 | 42 | def read(self, in_dir, extensions=[], dotfiles=False): 43 | self.db = {} 44 | extensions = [e.lower().strip('.') for e in extensions] 45 | for dirpath, dirnames, filenames in os.walk(in_dir): 46 | for filename in filenames: 47 | path = os.path.join(dirpath, filename) 48 | 49 | if filename.startswith('.') and not dotfiles: 50 | logging.info('ignoring dot file: %s', path) 51 | continue 52 | 53 | name, ext = os.path.splitext(path) 54 | if extensions and ext.lower().strip('.') not in extensions: 55 | logging.info('ignoring %s', path) 56 | continue 57 | 58 | self.add(path) 59 | 60 | def write(self, dest): 61 | uri = urlparse(dest) 62 | if uri.scheme in STORAGE_PROVIDERS.keys(): 63 | self.write_cloud(dest) 64 | else: 65 | self.write_fs(dest) 66 | 67 | def write_fs(self, out_dir): 68 | if not os.path.isdir(out_dir): 69 | logging.info('creating output directory %s', out_dir) 70 | os.makedirs(out_dir) 71 | 72 | num_digits = len(str(len(self.db.keys()))) 73 | 74 | for sha256, meta in self.items(): 75 | src = meta['paths'][0] 76 | filename, ext = os.path.splitext(src) 77 | ext = ext.lower() 78 | 79 | # if it doesn't look like an extension don't use it 80 | if not re.match(r'^[.][a-z0-9]+$', ext): 81 | ext = '' 82 | 83 | dst = os.path.join(out_dir, sha256 + ext) 84 | shutil.copyfile(src, dst) 85 | meta['path'] = dst.replace(out_dir + os.sep, '') 86 | logging.info('copied %s to %s', src, dst) 87 | 88 | def write_cloud(self, container_uri): 89 | container = self.get_container(container_uri) 90 | storage = container.driver 91 | 92 | for sha256, meta in self.items(): 93 | src = meta['paths'][0] 94 | object_name = sha256 95 | storage.upload_object(src, container, object_name) 96 | logging.info('copied %s to %s/%s', src, container, object_name) 97 | 98 | def add(self, path): 99 | sha256 = get_sha256(path) 100 | if sha256 in self.db: 101 | logging.warning('found duplicate %s', path) 102 | self.db[sha256]['paths'].append(path) 103 | else: 104 | self.db[sha256] = {'paths': [path], 'sha256': sha256} 105 | 106 | def items(self): 107 | keys = sorted(self.db.keys()) 108 | for key in keys: 109 | yield key, self.db[key] 110 | 111 | def json(self): 112 | data = {'items': []} 113 | for sha256, meta in self.items(): 114 | data['items'].append({ 115 | 'path': meta['path'], 116 | 'sha256': meta['sha256'], 117 | 'original_paths': meta['paths'], 118 | }) 119 | return data 120 | 121 | def get_container(self, container_uri): 122 | uri = urlparse(container_uri) 123 | provider = STORAGE_PROVIDERS.get(uri.scheme) 124 | container_name = uri.netloc 125 | 126 | if provider == None: 127 | raise Exception('unknown storage provider {}'.format(container_name)) 128 | else: 129 | storage = get_driver(provider)(self.key, self.secret) 130 | 131 | try: 132 | container = storage.get_container(container_name) 133 | except ContainerDoesNotExistError: 134 | container = storage.create_container(container_name) 135 | 136 | return container 137 | 138 | def get_sha256(path): 139 | h = hashlib.sha256() 140 | with open(path, 'rb') as fh: 141 | buff = None 142 | while buff != b'': 143 | buff = fh.read(1024) 144 | h.update(buff) 145 | sha256 = h.hexdigest() 146 | logging.info('sha256 %s %s', path, sha256) 147 | return sha256 148 | 149 | def split_option(option, opt_str, value, parser): 150 | parser.values.extensions = value.split(',') 151 | 152 | cli = click.CommandCollection(sources=[add]) 153 | 154 | if __name__ == "__main__": 155 | cli() 156 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest 3 | 4 | [tool:pytest] 5 | addopts = --verbose 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open("README.md") as f: 4 | long_description = f.read() 5 | 6 | setup( 7 | name = 'dedoop', 8 | version = '0.0.4', 9 | author = 'Ed Summers', 10 | author_email = 'ehs@pobox.com', 11 | url = 'https://github.com/edsu/dedoop', 12 | py_modules = ['dedoop',], 13 | description = 'dedupe files and send them to the cloud', 14 | long_description=long_description, 15 | long_description_content_type='text/markdown', 16 | install_requires = ['apache-libcloud', 'click'], 17 | setup_requires=['pytest-runner'], 18 | tests_require = ['pytest', 'python-dotenv'], 19 | entry_points = {'console_scripts': ['dedoop = dedoop:main']}, 20 | ) 21 | -------------------------------------------------------------------------------- /test-data/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edsu/dedoop/f5217e4eb142c1adca7d4f8e544dede0c7481340/test-data/.keep -------------------------------------------------------------------------------- /test-data/a.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edsu/dedoop/f5217e4eb142c1adca7d4f8e544dede0c7481340/test-data/a.jpg -------------------------------------------------------------------------------- /test-data/a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edsu/dedoop/f5217e4eb142c1adca7d4f8e544dede0c7481340/test-data/a.png -------------------------------------------------------------------------------- /test-data/b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edsu/dedoop/f5217e4eb142c1adca7d4f8e544dede0c7481340/test-data/b.jpg -------------------------------------------------------------------------------- /test-data/c/a.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edsu/dedoop/f5217e4eb142c1adca7d4f8e544dede0c7481340/test-data/c/a.jpg -------------------------------------------------------------------------------- /test-data/c/b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edsu/dedoop/f5217e4eb142c1adca7d4f8e544dede0c7481340/test-data/c/b.jpg -------------------------------------------------------------------------------- /test_dedoop.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import json 4 | import pytest 5 | import dedoop 6 | import dotenv 7 | import shutil 8 | import logging 9 | import libcloud 10 | 11 | 12 | dotenv.load_dotenv() 13 | logging.basicConfig(filename='test.log', level=logging.DEBUG) 14 | 15 | input_dir = 'test-data' 16 | output_dir = 'test-data-deduped' 17 | 18 | def setup(): 19 | if os.path.isdir(output_dir): 20 | shutil.rmtree(output_dir) 21 | 22 | def test_sha256(): 23 | assert dedoop.get_sha256(os.path.join(input_dir, 'a.jpg')) == 'b6df8058fa818acfd91759edffa27e473f2308d5a6fca1e07a79189b95879953' 24 | 25 | def test_read(): 26 | m = dedoop.Deduper() 27 | m.read(input_dir) 28 | assert len(list(m.items())) == 3 29 | 30 | def test_write(): 31 | if os.path.isdir(output_dir): 32 | shutil.rmtree(output_dir) 33 | 34 | m = dedoop.Deduper() 35 | m.read(input_dir) 36 | m.write(output_dir) 37 | 38 | files = os.listdir(output_dir) 39 | files.sort() 40 | assert len(files) == 3 41 | assert files[0] == '1e89b90b5973baad2e6c3294ffe648ff53ab0b9d75188e9fbb8b38deb9ba3341.png' 42 | assert files[1] == '45d257c93e59ec35187c6a34c8e62e72c3e9cfbb548984d6f6e8deb84bac41f4.jpg' 43 | assert files[2] == 'b6df8058fa818acfd91759edffa27e473f2308d5a6fca1e07a79189b95879953.jpg' 44 | 45 | def test_extensions(): 46 | m = dedoop.Deduper() 47 | m.read(input_dir, extensions=['jpg']) 48 | assert len(list(m.items())) == 2 49 | 50 | def test_dotfiles(): 51 | m = dedoop.Deduper() 52 | m.read(input_dir, dotfiles=True) 53 | assert len(list(m.items())) == 4 54 | 55 | 56 | def test_json(): 57 | m = dedoop.Deduper() 58 | m.read(input_dir) 59 | m.write(output_dir) 60 | data = m.json() 61 | 62 | assert len(data['items']) == 3 63 | 64 | assert data['items'][0]['path'] == '1e89b90b5973baad2e6c3294ffe648ff53ab0b9d75188e9fbb8b38deb9ba3341.png' 65 | assert data['items'][0]['sha256'] == '1e89b90b5973baad2e6c3294ffe648ff53ab0b9d75188e9fbb8b38deb9ba3341' 66 | assert data['items'][0]['original_paths'] == ['test-data/a.png'] 67 | 68 | assert data['items'][1]['path'] == '45d257c93e59ec35187c6a34c8e62e72c3e9cfbb548984d6f6e8deb84bac41f4.jpg' 69 | assert data['items'][1]['sha256'] == '45d257c93e59ec35187c6a34c8e62e72c3e9cfbb548984d6f6e8deb84bac41f4' 70 | assert data['items'][1]['original_paths'] == ['test-data/b.jpg', 'test-data/c/b.jpg'] 71 | 72 | def test_write_s3(): 73 | user = os.environ.get('DEDOOP_USER') 74 | assert user 75 | 76 | access_key = os.environ.get('DEDOOP_S3_ACCESS_KEY') 77 | assert access_key 78 | 79 | access_secret = os.environ.get('DEDOOP_S3_ACCESS_SECRET') 80 | assert access_secret 81 | 82 | m = dedoop.Deduper(access_key, access_secret) 83 | 84 | container_name = 's3://{}-dedoop-test'.format(user) 85 | container = get_test_container(m, container_name) 86 | 87 | m.read(input_dir) 88 | m.write(container_name) 89 | 90 | o = container.get_object('1e89b90b5973baad2e6c3294ffe648ff53ab0b9d75188e9fbb8b38deb9ba3341') 91 | assert o 92 | 93 | o = container.get_object('45d257c93e59ec35187c6a34c8e62e72c3e9cfbb548984d6f6e8deb84bac41f4') 94 | assert o 95 | 96 | o = container.get_object('b6df8058fa818acfd91759edffa27e473f2308d5a6fca1e07a79189b95879953') 97 | assert o 98 | 99 | def test_write_gs(): 100 | user = os.environ.get('DEDOOP_USER') 101 | assert user 102 | 103 | access_key = os.environ.get('DEDOOP_GS_ACCESS_KEY') 104 | assert access_key 105 | 106 | access_secret = os.environ.get('DEDOOP_GS_ACCESS_SECRET') 107 | assert access_secret 108 | 109 | m = dedoop.Deduper(access_key, access_secret) 110 | 111 | container_name = 'gs://{}-dedoop-test'.format(user) 112 | container = get_test_container(m, container_name) 113 | 114 | m.read(input_dir) 115 | m.write(container_name) 116 | 117 | o = container.get_object('1e89b90b5973baad2e6c3294ffe648ff53ab0b9d75188e9fbb8b38deb9ba3341') 118 | assert o 119 | 120 | o = container.get_object('45d257c93e59ec35187c6a34c8e62e72c3e9cfbb548984d6f6e8deb84bac41f4') 121 | assert o 122 | 123 | o = container.get_object('b6df8058fa818acfd91759edffa27e473f2308d5a6fca1e07a79189b95879953') 124 | assert o 125 | 126 | 127 | def get_test_container(deduper, container_name): 128 | container = deduper.get_container(container_name) 129 | 130 | # if the test container isn't empty remove all its contents 131 | if len(container.list_objects()) != 0: 132 | storage = container.driver 133 | for o in container.list_objects(): 134 | storage.delete_object(o) 135 | 136 | return container 137 | --------------------------------------------------------------------------------