├── hfilesize ├── __init__.py ├── test │ └── test_hfilesize.py └── hfilesize.py ├── MANIFEST.in ├── pdm.lock ├── .gitignore ├── pyproject.toml └── readme.md /hfilesize/__init__.py: -------------------------------------------------------------------------------- 1 | from .hfilesize import FileSize, Format 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | include *.py 3 | recursive-exclude .git * 4 | -------------------------------------------------------------------------------- /pdm.lock: -------------------------------------------------------------------------------- 1 | 2 | [metadata] 3 | lock_version = "4.0" 4 | content_hash = "sha256:283d196fda874e154c8cb9a8d9c42e4fbb7d4740fea54df382447ec3c49bb5ce" 5 | 6 | [metadata.files] 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | 3 | # *PyCharm* 4 | /.idea/ 5 | 6 | # *PDM* 7 | /.pdm.toml 8 | /__pypackages__/ 9 | 10 | # Distribution / packaging 11 | dist/ 12 | *.egg-info/ 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "hfilesize" 3 | version = "0.1.0" 4 | description = "Human readable file sizes" 5 | authors = [ 6 | {name = "simonzack", email = "simonzack@gmail.com"}, 7 | ] 8 | dependencies = [] 9 | requires-python = ">=3.10" 10 | readme = "readme.md" 11 | license = {text = "MIT"} 12 | classifiers = [ 13 | "Development Status :: 4 - Beta", 14 | "License :: OSI Approved :: MIT License", 15 | "Programming Language :: Python :: 3", 16 | "Topic :: Software Development :: Libraries :: Python Modules", 17 | ] 18 | 19 | [project.urls] 20 | Homepage = "https://github.com/simonzack/hfilesize" 21 | 22 | [build-system] 23 | requires = [ 24 | "pdm-pep517>=1.0.0", 25 | ] 26 | build-backend = "pdm.pep517.api" 27 | 28 | [tool.pdm] 29 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Human Readable File Sizes 2 | Parses & Formats integer file sizes to human readable file sizes. 3 | 4 | ## Development Setup 5 | To setup the project for development, run: 6 | 7 | $ cd hfilesize/ 8 | $ pdm install 9 | 10 | ## Example Usage 11 | ### Parsing 12 | 13 | >>> from hfilesize import Format, FileSize 14 | >>> FileSize('1k') 15 | ... 1000 16 | >>> FileSize('1K') 17 | ... 1024 18 | >>> FileSize('1kib') 19 | ... 1024 20 | >>> FileSize('1K', default_binary=False, case_sensitive=False) 21 | ... 1000 22 | >>> FileSize('1 kibibyte') 23 | ... 1024 24 | 25 | ### Formatting 26 | 27 | >>> '{:d}'.format(FileSize(1024)) 28 | ... '1024' 29 | >>> '{:.02fH}'.format(FileSize(1024)) 30 | ... '1 KB' 31 | >>> '{:.02fHcv}'.format(FileSize(1024)) 32 | ... '1 kilobyte' 33 | >>> '{:.02fhs}'.format(FileSize(1000)) 34 | ... '1 KB' 35 | >>> '{:.02fhs^0}'.format(FileSize(1000)) 36 | ... '1000 B' 37 | >>> '{: >10.02fH}'.format(FileSize(1024)) 38 | ... ' 1 KB' 39 | 40 | ## Documentation 41 | ### Parsing Options 42 | - `case_sensitive`: 43 | Use 1024 for upper case and 1000 for lower case if casing exists, as is common in unix utilities, e.g. dd 44 | 45 | - `default_binary`: 46 | Default base if it is not clear what the unit is (i.e. if it is not 'mib' or 'mebibytes') 47 | 48 | ### Formatting Options 49 | - format type: `[hH][size_format][^exponent]` 50 | - `h`: Base 1000 51 | - `H`: Base 1024 52 | - `size_format`: `c | cs | cv | e | ev | s | sv` 53 | - `c`: Commonly used case-sensitive suffixes 54 | - `cs`: Commonly used abbreviated case-sensitive suffixes 55 | - `cv`: Commonly used verbose case-sensitive suffixes 56 | - `e`: IEC suffixes 57 | - `ev`: IEC verbose suffixes 58 | - `s`: SI suffixes 59 | - `sv`: SI verbose suffixes 60 | - `exponent`: `integer` 61 | 62 | ## References 63 | Inspired by: 64 | 65 | - [`hurry.filesize`](https://pypi.python.org/pypi/hurry.filesize) 66 | - [Human readable file/memory sizes v2 (Python recipe)](http://code.activestate.com/recipes/578323-human-readable-filememory-sizes-v2/) 67 | 68 | ## License 69 | Licensed under MIT. 70 | -------------------------------------------------------------------------------- /hfilesize/test/test_hfilesize.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from hfilesize import FileSize, Format 4 | 5 | 6 | class TestHFileSize(unittest.TestCase): 7 | def test_parse(self): 8 | # basic tests on integers with no units 9 | self.assertEqual(FileSize(), 0) 10 | self.assertEqual(FileSize(1), 1) 11 | self.assertEqual(FileSize('1'), 1) 12 | self.assertEqual(FileSize('-1'), -1) 13 | 14 | # int base 15 | self.assertEqual(FileSize('0x12', 16), 0x12) 16 | 17 | # case sensitive 18 | self.assertEqual(FileSize('1k'), 1000) 19 | self.assertEqual(FileSize('1K'), 1024) 20 | self.assertEqual(FileSize('1kib'), 1024) 21 | self.assertEqual(FileSize('1kIB'), 1024) 22 | self.assertEqual(FileSize('1kb'), 1000) 23 | self.assertEqual(FileSize('1kB'), 1000) 24 | self.assertEqual(FileSize('1Kb'), 1024) 25 | self.assertEqual(FileSize('1KB'), 1024) 26 | 27 | self.assertEqual(FileSize('1k'), 1000**1) 28 | self.assertEqual(FileSize('1m'), 1000**2) 29 | self.assertEqual(FileSize('1g'), 1000**3) 30 | self.assertEqual(FileSize('1t'), 1000**4) 31 | self.assertEqual(FileSize('1p'), 1000**5) 32 | self.assertEqual(FileSize('1e'), 1000**6) 33 | self.assertEqual(FileSize('1z'), 1000**7) 34 | self.assertEqual(FileSize('1y'), 1000**8) 35 | 36 | self.assertEqual(FileSize('1K'), 1024**1) 37 | self.assertEqual(FileSize('1M'), 1024**2) 38 | self.assertEqual(FileSize('1G'), 1024**3) 39 | self.assertEqual(FileSize('1T'), 1024**4) 40 | self.assertEqual(FileSize('1P'), 1024**5) 41 | self.assertEqual(FileSize('1E'), 1024**6) 42 | self.assertEqual(FileSize('1Z'), 1024**7) 43 | self.assertEqual(FileSize('1Y'), 1024**8) 44 | 45 | # case insensitive 46 | self.assertEqual(FileSize('1K', default_binary=False, case_sensitive=False), 1000**1) 47 | self.assertEqual(FileSize('1K', default_binary=True, case_sensitive=False), 1024**1) 48 | 49 | # spacing 50 | self.assertEqual(FileSize('1 k'), 1000) 51 | 52 | # invalid values 53 | with self.assertRaises(ValueError): 54 | FileSize(1.1) 55 | with self.assertRaises(ValueError): 56 | FileSize('1.1') 57 | with self.assertRaises(ValueError): 58 | FileSize('1kibb') 59 | 60 | def test_format(self): 61 | # base guessing 62 | self.assertEqual(FileSize(1024).format(size_fmt=Format.casing), '1 KB') 63 | with self.assertRaises(ValueError): 64 | self.assertEqual(FileSize(1024).format(size_fmt=Format.si), '1 KB') 65 | 66 | # plural 67 | self.assertEqual(FileSize(0).format(base=1024, size_fmt=Format.casing), '0 bytes') 68 | self.assertEqual(FileSize(1).format(base=1024, size_fmt=Format.casing), '1 byte') 69 | self.assertEqual(FileSize(2).format(base=1024, size_fmt=Format.casing), '2 bytes') 70 | self.assertEqual(FileSize(1024).format(base=1024, size_fmt=Format.casing_verbose), '1 kilobyte') 71 | self.assertEqual(FileSize(1025).format(base=1024, size_fmt=Format.casing_verbose), '1.00 kilobytes') 72 | self.assertEqual(FileSize(2048).format(base=1024, size_fmt=Format.casing_verbose), '2 kilobytes') 73 | 74 | # float formatting 75 | self.assertEqual(FileSize(1024).format(base=1024, size_fmt=Format.casing), '1 KB') 76 | self.assertEqual(FileSize(1025).format(base=1024, size_fmt=Format.casing), '1.00 KB') 77 | self.assertEqual(FileSize(2048).format(base=1024, size_fmt=Format.casing), '2 KB') 78 | 79 | # exponent 80 | self.assertEqual(FileSize(1024).format(base=1024, size_fmt=Format.casing, exponent=0), '1024 bytes') 81 | 82 | # exponent bounds check 83 | with self.assertRaises(ValueError): 84 | FileSize(1024).format(base=1024, size_fmt=Format.casing, exponent=100) 85 | with self.assertRaises(ValueError): 86 | FileSize(1024).format(base=1024, size_fmt=Format.casing, exponent=-1) 87 | 88 | def test_str_format(self): 89 | self.assertEqual('{:d}'.format(FileSize(1024)), '1024') 90 | self.assertEqual('{:.02fH}'.format(FileSize(1024)), '1 KB') 91 | self.assertEqual('{:.02fhs}'.format(FileSize(1000)), '1 KB') 92 | self.assertEqual('{:.02fhs^0}'.format(FileSize(1000)), '1000 B') 93 | self.assertEqual('{:.02fHcv}'.format(FileSize(1024)), '1 kilobyte') 94 | self.assertEqual('{: >10.02fH}'.format(FileSize(1024)), '1 KB'.rjust(10)) 95 | -------------------------------------------------------------------------------- /hfilesize/hfilesize.py: -------------------------------------------------------------------------------- 1 | import math 2 | import re 3 | 4 | 5 | class Format: 6 | # We do not provide a lower case 1024 format to minimize ambiguity. 7 | casing = { 8 | 1024: [ 9 | (' byte', ' bytes'), 10 | ' KB', 11 | ' MB', 12 | ' GB', 13 | ' TB', 14 | ' PB', 15 | ' EB', 16 | ' ZB', 17 | ' YB', 18 | ], 19 | 1000: [ 20 | (' byte', ' bytes'), 21 | ' kb', 22 | ' mb', 23 | ' gb', 24 | ' tb', 25 | ' pb', 26 | ' eb', 27 | ' zb', 28 | ' yb', 29 | ], 30 | } 31 | 32 | casing_short = { 33 | 1024: [ 34 | '', 35 | 'K', 36 | 'M', 37 | 'G', 38 | 'T', 39 | 'P', 40 | 'E', 41 | 'Z', 42 | 'Y', 43 | ], 44 | 1000: [ 45 | '', 46 | ' kb', 47 | ' mb', 48 | ' gb', 49 | ' tb', 50 | ' pb', 51 | ' eb', 52 | ' zb', 53 | ' yb', 54 | ], 55 | } 56 | 57 | casing_verbose = { 58 | 1024: [ 59 | (' byte', ' bytes'), 60 | (' kilobyte', ' kilobytes'), 61 | (' megabyte', ' megabytes'), 62 | (' gigabyte', ' gigabytes'), 63 | (' terabyte', ' terabytes'), 64 | (' petabyte', ' petabytes'), 65 | (' exabyte', ' exabytes'), 66 | (' zettabyte', ' zettabytes'), 67 | (' yottabyte', ' yottabytes'), 68 | ] 69 | } 70 | 71 | iec = { 72 | 1024: [ 73 | '', 74 | ' KiB', 75 | ' MiB', 76 | ' GiB', 77 | ' TiB', 78 | ' PiB', 79 | ' EiB', 80 | ' ZiB', 81 | ' YiB', 82 | ] 83 | } 84 | 85 | iec_verbose = { 86 | 1024: [ 87 | (' byte', ' bytes'), 88 | (' kibibyte', 'kibibytes'), 89 | (' mebibyte', 'mebibytes'), 90 | (' gibibyte', 'gibibytes'), 91 | (' tebibyte', 'tebibytes'), 92 | (' pebibyte', 'pebibytes'), 93 | (' exbibyte', 'exbibytes'), 94 | (' zebibyte', 'zebibytes'), 95 | (' yobibyte', 'yobibytes'), 96 | ] 97 | } 98 | 99 | si = { 100 | 1000: [ 101 | ' B', 102 | ' KB', 103 | ' MB', 104 | ' GB', 105 | ' TB', 106 | ' PB', 107 | ' EB', 108 | ' ZB', 109 | ' YB', 110 | ] 111 | } 112 | 113 | si_verbose = { 114 | 1000: [ 115 | (' byte', ' bytes'), 116 | (' kilobyte', ' kilobytes'), 117 | (' megabyte', ' megabytes'), 118 | (' gigabyte', ' gigabytes'), 119 | (' terabyte', ' terabytes'), 120 | (' petabyte', ' petabytes'), 121 | (' exabyte', ' exabytes'), 122 | (' zettabyte', ' zettabytes'), 123 | (' yottabyte', ' yottabytes'), 124 | ] 125 | } 126 | 127 | 128 | parse_dict = { 129 | # `(exponent, case_char, base_if_certain)`. 130 | # base doesn't matter for bytes. 131 | '': (0, None, 1), 132 | 'b': (0, None, 1), 133 | 'byte': (0, None, 1), 134 | 'bytes': (0, None, 1), 135 | 136 | 'k': (1, 0, None), 137 | 'm': (2, 0, None), 138 | 'g': (3, 0, None), 139 | 't': (4, 0, None), 140 | 'p': (5, 0, None), 141 | 'e': (6, 0, None), 142 | 'z': (7, 0, None), 143 | 'y': (8, 0, None), 144 | 145 | 'kb': (1, 0, None), 146 | 'mb': (2, 0, None), 147 | 'gb': (3, 0, None), 148 | 'tb': (4, 0, None), 149 | 'pb': (5, 0, None), 150 | 'eb': (6, 0, None), 151 | 'zb': (7, 0, None), 152 | 'yb': (8, 0, None), 153 | 154 | 'kib': (1, None, 1024), 155 | 'mib': (2, None, 1024), 156 | 'gib': (3, None, 1024), 157 | 'tib': (4, None, 1024), 158 | 'pib': (5, None, 1024), 159 | 'eib': (6, None, 1024), 160 | 'zib': (7, None, 1024), 161 | 'yib': (8, None, 1024), 162 | 163 | 'kilobyte': (1, None, None), 164 | 'megabyte': (2, None, None), 165 | 'gigabyte': (3, None, None), 166 | 'terabyte': (4, None, None), 167 | 'petabyte': (5, None, None), 168 | 'exabyte': (6, None, None), 169 | 'zettabyte': (7, None, None), 170 | 'yottabyte': (8, None, None), 171 | 172 | 'kilobytes': (1, None, None), 173 | 'megabytes': (2, None, None), 174 | 'gigabytes': (3, None, None), 175 | 'terabytes': (4, None, None), 176 | 'petabytes': (5, None, None), 177 | 'exabytes': (6, None, None), 178 | 'zettabytes': (7, None, None), 179 | 'yottabytes': (8, None, None), 180 | 181 | 'kibibyte': (1, None, 1024), 182 | 'mebibyte': (2, None, 1024), 183 | 'gibibyte': (3, None, 1024), 184 | 'tebibyte': (4, None, 1024), 185 | 'pebibyte': (5, None, 1024), 186 | 'exbibyte': (6, None, 1024), 187 | 'zebibyte': (7, None, 1024), 188 | 'yobibyte': (8, None, 1024), 189 | 190 | 'kibibytes': (1, None, 1024), 191 | 'mebibytes': (2, None, 1024), 192 | 'gibibytes': (3, None, 1024), 193 | 'tebibytes': (4, None, 1024), 194 | 'pebibytes': (5, None, 1024), 195 | 'exbibytes': (6, None, 1024), 196 | 'zebibytes': (7, None, 1024), 197 | 'yobibytes': (8, None, 1024), 198 | } 199 | 200 | 201 | class FileSize(int): 202 | ''' 203 | Subclass of int to allow parsing & custom file size formatting. 204 | ''' 205 | 206 | def __new__(cls, value=0, base=10, default_binary=True, case_sensitive=True): 207 | ''' 208 | Parse file size, only accept ints as float has loss of precision, and using it is usually a user error. 209 | Otherwise allow any string int() allows. 210 | Bits are not used in file size descriptions hence ignored. 211 | 212 | args: 213 | case_sensitive: 214 | use 1024 for upper case and 1000 for lower case if casing exists, as is common in unix utilities, 215 | e.g. `dd` 216 | 217 | default_binary: 218 | default base if it is not clear what the unit is (i.e. if it is not 'mib' or 'mebibytes') 219 | ''' 220 | if isinstance(value, str): 221 | matches = re.match(r'^(.*\d)\s*([a-zA-Z]*)$', value) 222 | if not matches: 223 | raise ValueError 224 | size_str, unit_str = matches.groups() 225 | size = int(size_str, base) 226 | try: 227 | exponent, case_char, base_if_certain = parse_dict[unit_str.lower()] 228 | except KeyError: 229 | raise ValueError 230 | if base_if_certain is not None: 231 | is_binary = base_if_certain 232 | elif case_sensitive and case_char is not None: 233 | is_binary = unit_str[case_char].isupper() 234 | else: 235 | is_binary = default_binary 236 | size_base = 1024 if is_binary else 1000 237 | size *= size_base ** exponent 238 | return super(FileSize, cls).__new__(cls, size) 239 | elif isinstance(value, int): 240 | return super(FileSize, cls).__new__(cls, value) 241 | else: 242 | raise ValueError 243 | 244 | def format(self, base=1024, exponent=None, float_fmt='.2f', size_fmt=Format.casing): 245 | # base 246 | if base is None: 247 | # Try to infer the base from the format if it only has one format. 248 | if len(size_fmt) == 1: 249 | base = next(iter(size_fmt)) 250 | else: 251 | raise ValueError('base must be specified as it cannot be inferred') 252 | try: 253 | date_suffixes = size_fmt[base] 254 | except KeyError: 255 | raise ValueError('base') 256 | # exponent 257 | if exponent is None: 258 | # Get exponent if not specified. 259 | if self == 0: 260 | exponent = 0 261 | else: 262 | exponent = int(math.log(self, base)) 263 | exponent = max(exponent, 0) 264 | exponent = min(exponent, len(date_suffixes)-1) 265 | elif not 0 <= exponent < len(date_suffixes): 266 | raise ValueError('exponent out of range') 267 | # suffix 268 | suffix = date_suffixes[exponent] 269 | if isinstance(suffix, tuple): 270 | suffix = suffix[0] if self == base ** exponent else suffix[1] 271 | # size 272 | if self % (base ** exponent) == 0: 273 | float_fmt = '' 274 | size = self//(base ** exponent) 275 | else: 276 | size = self/(base ** exponent) 277 | # format final result 278 | return '{size:{float_fmt}}{suffix}'.format(size=size, float_fmt=float_fmt, suffix=suffix) 279 | 280 | def __format__(self, fmt): 281 | ''' 282 | format specification: 283 | format type: [hH][size_format][^exponent] 284 | size_format: c | cs | cv | e | ev | s | sv 285 | exponent: integer 286 | 287 | base is required sometimes if no exponent is specified 288 | always specifying the base gives a shorter format specification 289 | ''' 290 | # is it an empty format or not a special format for the size class 291 | matches = re.search(r'([hH])(?:(c|cs|cv|e|ev|s|sv)?(?:\^(\d+))?)?$', fmt) 292 | if not matches: 293 | return int(self).__format__(fmt) 294 | fmt_type, size_fmt, exponent = matches.groups() 295 | size_fmt = { 296 | None: Format.casing, 297 | 'c': Format.casing, 298 | 'cs': Format.casing_short, 299 | 'cv': Format.casing_verbose, 300 | 'e': Format.iec, 301 | 'ev': Format.iec_verbose, 302 | 's': Format.si, 303 | 'sv': Format.si_verbose, 304 | }[size_fmt] 305 | if fmt_type == 'h': 306 | base = 1000 307 | elif fmt_type == 'H': 308 | base = 1024 309 | else: 310 | assert False 311 | if exponent is not None: 312 | exponent = int(exponent) 313 | fmt = fmt[:matches.start(0)] 314 | # Get the non-float part. 315 | float_fmt_matches = re.search(r'(\.\d+)?(.)?$', fmt) 316 | float_res = self.format(base=base, exponent=exponent, float_fmt=float_fmt_matches.group(), size_fmt=size_fmt) 317 | fmt = fmt[:float_fmt_matches.start(0)] 318 | res = '{float_res:{other_fmt}}'.format(float_res=float_res, other_fmt=fmt) 319 | return res 320 | --------------------------------------------------------------------------------