├── hfilesize
    ├── __init__.py
    ├── test
    │   └── test_hfilesize.py
    └── hfilesize.py
├── MANIFEST.in
├── pdm.lock
├── .gitignore
├── pyproject.toml
└── readme.md


/hfilesize/__init__.py:
--------------------------------------------------------------------------------
1 | from .hfilesize import FileSize, Format
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include *.py
3 | recursive-exclude .git *
4 | 


--------------------------------------------------------------------------------
/pdm.lock:
--------------------------------------------------------------------------------
1 | 
2 | [metadata]
3 | lock_version = "4.0"
4 | content_hash = "sha256:283d196fda874e154c8cb9a8d9c42e4fbb7d4740fea54df382447ec3c49bb5ce"
5 | 
6 | [metadata.files]
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | 
 3 | # *PyCharm*
 4 | /.idea/
 5 | 
 6 | # *PDM*
 7 | /.pdm.toml
 8 | /__pypackages__/
 9 | 
10 | # Distribution / packaging
11 | dist/
12 | *.egg-info/
13 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "hfilesize"
 3 | version = "0.1.0"
 4 | description = "Human readable file sizes"
 5 | authors = [
 6 |     {name = "simonzack", email = "simonzack@gmail.com"},
 7 | ]
 8 | dependencies = []
 9 | requires-python = ">=3.10"
10 | readme = "readme.md"
11 | license = {text = "MIT"}
12 | classifiers = [
13 |     "Development Status :: 4 - Beta",
14 |     "License :: OSI Approved :: MIT License",
15 |     "Programming Language :: Python :: 3",
16 |     "Topic :: Software Development :: Libraries :: Python Modules",
17 | ]
18 | 
19 | [project.urls]
20 | Homepage = "https://github.com/simonzack/hfilesize"
21 | 
22 | [build-system]
23 | requires = [
24 |     "pdm-pep517>=1.0.0",
25 | ]
26 | build-backend = "pdm.pep517.api"
27 | 
28 | [tool.pdm]
29 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Human Readable File Sizes
 2 | Parses & Formats integer file sizes to human readable file sizes.
 3 | 
 4 | ## Development Setup
 5 | To setup the project for development, run:
 6 | 
 7 |     $ cd hfilesize/
 8 |     $ pdm install
 9 | 
10 | ## Example Usage
11 | ### Parsing
12 | 
13 |     >>> from hfilesize import Format, FileSize
14 |     >>> FileSize('1k')
15 |     ... 1000
16 |     >>> FileSize('1K')
17 |     ... 1024
18 |     >>> FileSize('1kib')
19 |     ... 1024
20 |     >>> FileSize('1K', default_binary=False, case_sensitive=False)
21 |     ... 1000
22 |     >>> FileSize('1 kibibyte')
23 |     ... 1024
24 | 
25 | ### Formatting
26 | 
27 |     >>> '{:d}'.format(FileSize(1024))
28 |     ... '1024'
29 |     >>> '{:.02fH}'.format(FileSize(1024))
30 |     ... '1 KB'
31 |     >>> '{:.02fHcv}'.format(FileSize(1024))
32 |     ... '1 kilobyte'
33 |     >>> '{:.02fhs}'.format(FileSize(1000))
34 |     ... '1 KB'
35 |     >>> '{:.02fhs^0}'.format(FileSize(1000))
36 |     ... '1000 B'
37 |     >>> '{: >10.02fH}'.format(FileSize(1024))
38 |     ... '      1 KB'
39 | 
40 | ## Documentation
41 | ### Parsing Options
42 | - `case_sensitive`:
43 | Use 1024 for upper case and 1000 for lower case if casing exists, as is common in unix utilities, e.g. dd
44 | 
45 | - `default_binary`:
46 | Default base if it is not clear what the unit is (i.e. if it is not 'mib' or 'mebibytes')
47 | 
48 | ### Formatting Options
49 | - format type:      `[hH][size_format][^exponent]`
50 |     - `h`:              Base 1000
51 |     - `H`:              Base 1024
52 | - `size_format`:    `c | cs | cv | e | ev | s | sv`
53 |     - `c`:              Commonly used case-sensitive suffixes 
54 |     - `cs`:             Commonly used abbreviated case-sensitive suffixes
55 |     - `cv`:             Commonly used verbose case-sensitive suffixes
56 |     - `e`:              IEC suffixes
57 |     - `ev`:             IEC verbose suffixes
58 |     - `s`:              SI suffixes
59 |     - `sv`:             SI verbose suffixes
60 | - `exponent`:       `integer`
61 | 
62 | ## References
63 | Inspired by:
64 | 
65 | - [`hurry.filesize`](https://pypi.python.org/pypi/hurry.filesize)
66 | - [Human readable file/memory sizes v2 (Python recipe)](http://code.activestate.com/recipes/578323-human-readable-filememory-sizes-v2/)
67 | 
68 | ## License
69 | Licensed under MIT.
70 | 


--------------------------------------------------------------------------------
/hfilesize/test/test_hfilesize.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from hfilesize import FileSize, Format
 4 | 
 5 | 
 6 | class TestHFileSize(unittest.TestCase):
 7 |     def test_parse(self):
 8 |         # basic tests on integers with no units
 9 |         self.assertEqual(FileSize(), 0)
10 |         self.assertEqual(FileSize(1), 1)
11 |         self.assertEqual(FileSize('1'), 1)
12 |         self.assertEqual(FileSize('-1'), -1)
13 | 
14 |         # int base
15 |         self.assertEqual(FileSize('0x12', 16), 0x12)
16 | 
17 |         # case sensitive
18 |         self.assertEqual(FileSize('1k'), 1000)
19 |         self.assertEqual(FileSize('1K'), 1024)
20 |         self.assertEqual(FileSize('1kib'), 1024)
21 |         self.assertEqual(FileSize('1kIB'), 1024)
22 |         self.assertEqual(FileSize('1kb'), 1000)
23 |         self.assertEqual(FileSize('1kB'), 1000)
24 |         self.assertEqual(FileSize('1Kb'), 1024)
25 |         self.assertEqual(FileSize('1KB'), 1024)
26 | 
27 |         self.assertEqual(FileSize('1k'), 1000**1)
28 |         self.assertEqual(FileSize('1m'), 1000**2)
29 |         self.assertEqual(FileSize('1g'), 1000**3)
30 |         self.assertEqual(FileSize('1t'), 1000**4)
31 |         self.assertEqual(FileSize('1p'), 1000**5)
32 |         self.assertEqual(FileSize('1e'), 1000**6)
33 |         self.assertEqual(FileSize('1z'), 1000**7)
34 |         self.assertEqual(FileSize('1y'), 1000**8)
35 | 
36 |         self.assertEqual(FileSize('1K'), 1024**1)
37 |         self.assertEqual(FileSize('1M'), 1024**2)
38 |         self.assertEqual(FileSize('1G'), 1024**3)
39 |         self.assertEqual(FileSize('1T'), 1024**4)
40 |         self.assertEqual(FileSize('1P'), 1024**5)
41 |         self.assertEqual(FileSize('1E'), 1024**6)
42 |         self.assertEqual(FileSize('1Z'), 1024**7)
43 |         self.assertEqual(FileSize('1Y'), 1024**8)
44 | 
45 |         # case insensitive
46 |         self.assertEqual(FileSize('1K', default_binary=False, case_sensitive=False), 1000**1)
47 |         self.assertEqual(FileSize('1K', default_binary=True, case_sensitive=False), 1024**1)
48 | 
49 |         # spacing
50 |         self.assertEqual(FileSize('1 k'), 1000)
51 | 
52 |         # invalid values
53 |         with self.assertRaises(ValueError):
54 |             FileSize(1.1)
55 |         with self.assertRaises(ValueError):
56 |             FileSize('1.1')
57 |         with self.assertRaises(ValueError):
58 |             FileSize('1kibb')
59 | 
60 |     def test_format(self):
61 |         # base guessing
62 |         self.assertEqual(FileSize(1024).format(size_fmt=Format.casing), '1 KB')
63 |         with self.assertRaises(ValueError):
64 |             self.assertEqual(FileSize(1024).format(size_fmt=Format.si), '1 KB')
65 | 
66 |         # plural
67 |         self.assertEqual(FileSize(0).format(base=1024, size_fmt=Format.casing), '0 bytes')
68 |         self.assertEqual(FileSize(1).format(base=1024, size_fmt=Format.casing), '1 byte')
69 |         self.assertEqual(FileSize(2).format(base=1024, size_fmt=Format.casing), '2 bytes')
70 |         self.assertEqual(FileSize(1024).format(base=1024, size_fmt=Format.casing_verbose), '1 kilobyte')
71 |         self.assertEqual(FileSize(1025).format(base=1024, size_fmt=Format.casing_verbose), '1.00 kilobytes')
72 |         self.assertEqual(FileSize(2048).format(base=1024, size_fmt=Format.casing_verbose), '2 kilobytes')
73 | 
74 |         # float formatting
75 |         self.assertEqual(FileSize(1024).format(base=1024, size_fmt=Format.casing), '1 KB')
76 |         self.assertEqual(FileSize(1025).format(base=1024, size_fmt=Format.casing), '1.00 KB')
77 |         self.assertEqual(FileSize(2048).format(base=1024, size_fmt=Format.casing), '2 KB')
78 | 
79 |         # exponent
80 |         self.assertEqual(FileSize(1024).format(base=1024, size_fmt=Format.casing, exponent=0), '1024 bytes')
81 | 
82 |         # exponent bounds check
83 |         with self.assertRaises(ValueError):
84 |             FileSize(1024).format(base=1024, size_fmt=Format.casing, exponent=100)
85 |         with self.assertRaises(ValueError):
86 |             FileSize(1024).format(base=1024, size_fmt=Format.casing, exponent=-1)
87 | 
88 |     def test_str_format(self):
89 |         self.assertEqual('{:d}'.format(FileSize(1024)), '1024')
90 |         self.assertEqual('{:.02fH}'.format(FileSize(1024)), '1 KB')
91 |         self.assertEqual('{:.02fhs}'.format(FileSize(1000)), '1 KB')
92 |         self.assertEqual('{:.02fhs^0}'.format(FileSize(1000)), '1000 B')
93 |         self.assertEqual('{:.02fHcv}'.format(FileSize(1024)), '1 kilobyte')
94 |         self.assertEqual('{: >10.02fH}'.format(FileSize(1024)), '1 KB'.rjust(10))
95 | 


--------------------------------------------------------------------------------
/hfilesize/hfilesize.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import re
  3 | 
  4 | 
  5 | class Format:
  6 |     # We do not provide a lower case 1024 format to minimize ambiguity.
  7 |     casing = {
  8 |         1024: [
  9 |             (' byte', ' bytes'),
 10 |             ' KB',
 11 |             ' MB',
 12 |             ' GB',
 13 |             ' TB',
 14 |             ' PB',
 15 |             ' EB',
 16 |             ' ZB',
 17 |             ' YB',
 18 |         ],
 19 |         1000: [
 20 |             (' byte', ' bytes'),
 21 |             ' kb',
 22 |             ' mb',
 23 |             ' gb',
 24 |             ' tb',
 25 |             ' pb',
 26 |             ' eb',
 27 |             ' zb',
 28 |             ' yb',
 29 |         ],
 30 |     }
 31 | 
 32 |     casing_short = {
 33 |         1024: [
 34 |             '',
 35 |             'K',
 36 |             'M',
 37 |             'G',
 38 |             'T',
 39 |             'P',
 40 |             'E',
 41 |             'Z',
 42 |             'Y',
 43 |         ],
 44 |         1000: [
 45 |             '',
 46 |             ' kb',
 47 |             ' mb',
 48 |             ' gb',
 49 |             ' tb',
 50 |             ' pb',
 51 |             ' eb',
 52 |             ' zb',
 53 |             ' yb',
 54 |         ],
 55 |     }
 56 | 
 57 |     casing_verbose = {
 58 |         1024: [
 59 |             (' byte', ' bytes'),
 60 |             (' kilobyte', ' kilobytes'),
 61 |             (' megabyte', ' megabytes'),
 62 |             (' gigabyte', ' gigabytes'),
 63 |             (' terabyte', ' terabytes'),
 64 |             (' petabyte', ' petabytes'),
 65 |             (' exabyte', ' exabytes'),
 66 |             (' zettabyte', ' zettabytes'),
 67 |             (' yottabyte', ' yottabytes'),
 68 |         ]
 69 |     }
 70 | 
 71 |     iec = {
 72 |         1024: [
 73 |             '',
 74 |             ' KiB',
 75 |             ' MiB',
 76 |             ' GiB',
 77 |             ' TiB',
 78 |             ' PiB',
 79 |             ' EiB',
 80 |             ' ZiB',
 81 |             ' YiB',
 82 |         ]
 83 |     }
 84 | 
 85 |     iec_verbose = {
 86 |         1024: [
 87 |             (' byte', ' bytes'),
 88 |             (' kibibyte', 'kibibytes'),
 89 |             (' mebibyte', 'mebibytes'),
 90 |             (' gibibyte', 'gibibytes'),
 91 |             (' tebibyte', 'tebibytes'),
 92 |             (' pebibyte', 'pebibytes'),
 93 |             (' exbibyte', 'exbibytes'),
 94 |             (' zebibyte', 'zebibytes'),
 95 |             (' yobibyte', 'yobibytes'),
 96 |         ]
 97 |     }
 98 | 
 99 |     si = {
100 |         1000: [
101 |             ' B',
102 |             ' KB',
103 |             ' MB',
104 |             ' GB',
105 |             ' TB',
106 |             ' PB',
107 |             ' EB',
108 |             ' ZB',
109 |             ' YB',
110 |         ]
111 |     }
112 | 
113 |     si_verbose = {
114 |         1000: [
115 |             (' byte', ' bytes'),
116 |             (' kilobyte', ' kilobytes'),
117 |             (' megabyte', ' megabytes'),
118 |             (' gigabyte', ' gigabytes'),
119 |             (' terabyte', ' terabytes'),
120 |             (' petabyte', ' petabytes'),
121 |             (' exabyte', ' exabytes'),
122 |             (' zettabyte', ' zettabytes'),
123 |             (' yottabyte', ' yottabytes'),
124 |         ]
125 |     }
126 | 
127 | 
128 | parse_dict = {
129 |     # `(exponent, case_char, base_if_certain)`.
130 |     # base doesn't matter for bytes.
131 |     '':             (0, None, 1),
132 |     'b':            (0, None, 1),
133 |     'byte':         (0, None, 1),
134 |     'bytes':        (0, None, 1),
135 | 
136 |     'k':            (1, 0, None),
137 |     'm':            (2, 0, None),
138 |     'g':            (3, 0, None),
139 |     't':            (4, 0, None),
140 |     'p':            (5, 0, None),
141 |     'e':            (6, 0, None),
142 |     'z':            (7, 0, None),
143 |     'y':            (8, 0, None),
144 | 
145 |     'kb':           (1, 0, None),
146 |     'mb':           (2, 0, None),
147 |     'gb':           (3, 0, None),
148 |     'tb':           (4, 0, None),
149 |     'pb':           (5, 0, None),
150 |     'eb':           (6, 0, None),
151 |     'zb':           (7, 0, None),
152 |     'yb':           (8, 0, None),
153 | 
154 |     'kib':          (1, None, 1024),
155 |     'mib':          (2, None, 1024),
156 |     'gib':          (3, None, 1024),
157 |     'tib':          (4, None, 1024),
158 |     'pib':          (5, None, 1024),
159 |     'eib':          (6, None, 1024),
160 |     'zib':          (7, None, 1024),
161 |     'yib':          (8, None, 1024),
162 | 
163 |     'kilobyte':     (1, None, None),
164 |     'megabyte':     (2, None, None),
165 |     'gigabyte':     (3, None, None),
166 |     'terabyte':     (4, None, None),
167 |     'petabyte':     (5, None, None),
168 |     'exabyte':      (6, None, None),
169 |     'zettabyte':    (7, None, None),
170 |     'yottabyte':    (8, None, None),
171 | 
172 |     'kilobytes':    (1, None, None),
173 |     'megabytes':    (2, None, None),
174 |     'gigabytes':    (3, None, None),
175 |     'terabytes':    (4, None, None),
176 |     'petabytes':    (5, None, None),
177 |     'exabytes':     (6, None, None),
178 |     'zettabytes':   (7, None, None),
179 |     'yottabytes':   (8, None, None),
180 | 
181 |     'kibibyte':     (1, None, 1024),
182 |     'mebibyte':     (2, None, 1024),
183 |     'gibibyte':     (3, None, 1024),
184 |     'tebibyte':     (4, None, 1024),
185 |     'pebibyte':     (5, None, 1024),
186 |     'exbibyte':     (6, None, 1024),
187 |     'zebibyte':     (7, None, 1024),
188 |     'yobibyte':     (8, None, 1024),
189 | 
190 |     'kibibytes':    (1, None, 1024),
191 |     'mebibytes':    (2, None, 1024),
192 |     'gibibytes':    (3, None, 1024),
193 |     'tebibytes':    (4, None, 1024),
194 |     'pebibytes':    (5, None, 1024),
195 |     'exbibytes':    (6, None, 1024),
196 |     'zebibytes':    (7, None, 1024),
197 |     'yobibytes':    (8, None, 1024),
198 | }
199 | 
200 | 
201 | class FileSize(int):
202 |     '''
203 |     Subclass of int to allow parsing & custom file size formatting.
204 |     '''
205 | 
206 |     def __new__(cls, value=0, base=10, default_binary=True, case_sensitive=True):
207 |         '''
208 |         Parse file size, only accept ints as float has loss of precision, and using it is usually a user error.
209 |         Otherwise allow any string int() allows.
210 |         Bits are not used in file size descriptions hence ignored.
211 | 
212 |         args:
213 |             case_sensitive:
214 |                 use 1024 for upper case and 1000 for lower case if casing exists, as is common in unix utilities,
215 |                 e.g. `dd`
216 | 
217 |             default_binary:
218 |                 default base if it is not clear what the unit is (i.e. if it is not 'mib' or 'mebibytes')
219 |         '''
220 |         if isinstance(value, str):
221 |             matches = re.match(r'^(.*\d)\s*([a-zA-Z]*)$', value)
222 |             if not matches:
223 |                 raise ValueError
224 |             size_str, unit_str = matches.groups()
225 |             size = int(size_str, base)
226 |             try:
227 |                 exponent, case_char, base_if_certain = parse_dict[unit_str.lower()]
228 |             except KeyError:
229 |                 raise ValueError
230 |             if base_if_certain is not None:
231 |                 is_binary = base_if_certain
232 |             elif case_sensitive and case_char is not None:
233 |                 is_binary = unit_str[case_char].isupper()
234 |             else:
235 |                 is_binary = default_binary
236 |             size_base = 1024 if is_binary else 1000
237 |             size *= size_base ** exponent
238 |             return super(FileSize, cls).__new__(cls, size)
239 |         elif isinstance(value, int):
240 |             return super(FileSize, cls).__new__(cls, value)
241 |         else:
242 |             raise ValueError
243 | 
244 |     def format(self, base=1024, exponent=None, float_fmt='.2f', size_fmt=Format.casing):
245 |         # base
246 |         if base is None:
247 |             # Try to infer the base from the format if it only has one format.
248 |             if len(size_fmt) == 1:
249 |                 base = next(iter(size_fmt))
250 |             else:
251 |                 raise ValueError('base must be specified as it cannot be inferred')
252 |         try:
253 |             date_suffixes = size_fmt[base]
254 |         except KeyError:
255 |             raise ValueError('base')
256 |         # exponent
257 |         if exponent is None:
258 |             # Get exponent if not specified.
259 |             if self == 0:
260 |                 exponent = 0
261 |             else:
262 |                 exponent = int(math.log(self, base))
263 |                 exponent = max(exponent, 0)
264 |                 exponent = min(exponent, len(date_suffixes)-1)
265 |         elif not 0 <= exponent < len(date_suffixes):
266 |             raise ValueError('exponent out of range')
267 |         # suffix
268 |         suffix = date_suffixes[exponent]
269 |         if isinstance(suffix, tuple):
270 |             suffix = suffix[0] if self == base ** exponent else suffix[1]
271 |         # size
272 |         if self % (base ** exponent) == 0:
273 |             float_fmt = ''
274 |             size = self//(base ** exponent)
275 |         else:
276 |             size = self/(base ** exponent)
277 |         # format final result
278 |         return '{size:{float_fmt}}{suffix}'.format(size=size, float_fmt=float_fmt, suffix=suffix)
279 | 
280 |     def __format__(self, fmt):
281 |         '''
282 |         format specification:
283 |             format type:    [hH][size_format][^exponent]
284 |             size_format:    c | cs | cv | e | ev | s | sv
285 |             exponent:       integer
286 | 
287 |         base is required sometimes if no exponent is specified
288 |         always specifying the base gives a shorter format specification
289 |         '''
290 |         # is it an empty format or not a special format for the size class
291 |         matches = re.search(r'([hH])(?:(c|cs|cv|e|ev|s|sv)?(?:\^(\d+))?)?$', fmt)
292 |         if not matches:
293 |             return int(self).__format__(fmt)
294 |         fmt_type, size_fmt, exponent = matches.groups()
295 |         size_fmt = {
296 |             None:   Format.casing,
297 |             'c':    Format.casing,
298 |             'cs':   Format.casing_short,
299 |             'cv':   Format.casing_verbose,
300 |             'e':    Format.iec,
301 |             'ev':   Format.iec_verbose,
302 |             's':    Format.si,
303 |             'sv':   Format.si_verbose,
304 |         }[size_fmt]
305 |         if fmt_type == 'h':
306 |             base = 1000
307 |         elif fmt_type == 'H':
308 |             base = 1024
309 |         else:
310 |             assert False
311 |         if exponent is not None:
312 |             exponent = int(exponent)
313 |         fmt = fmt[:matches.start(0)]
314 |         # Get the non-float part.
315 |         float_fmt_matches = re.search(r'(\.\d+)?(.)?$', fmt)
316 |         float_res = self.format(base=base, exponent=exponent, float_fmt=float_fmt_matches.group(), size_fmt=size_fmt)
317 |         fmt = fmt[:float_fmt_matches.start(0)]
318 |         res = '{float_res:{other_fmt}}'.format(float_res=float_res, other_fmt=fmt)
319 |         return res
320 | 


--------------------------------------------------------------------------------