├── .gitattributes
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── docs
    ├── zmq.png
    └── zmq.svg
├── partd
    ├── __init__.py
    ├── _version.py
    ├── buffer.py
    ├── compressed.py
    ├── core.py
    ├── dict.py
    ├── encode.py
    ├── file.py
    ├── numpy.py
    ├── pandas.py
    ├── pickle.py
    ├── python.py
    ├── tests
    │   ├── test_buffer.py
    │   ├── test_compressed.py
    │   ├── test_dict.py
    │   ├── test_encode.py
    │   ├── test_file.py
    │   ├── test_numpy.py
    │   ├── test_pandas.py
    │   ├── test_partd.py
    │   ├── test_pickle.py
    │   ├── test_python.py
    │   ├── test_utils.py
    │   └── test_zmq.py
    ├── utils.py
    └── zmq.py
├── pyproject.toml
├── requirements.txt
└── setup.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | partd/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     name: Python ${{ matrix.python-version }}
 8 |     runs-on: ubuntu-latest
 9 |     strategy:
10 |       fail-fast: false
11 |       matrix:
12 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
13 | 
14 |     steps:
15 |       - name: Checkout source
16 |         uses: actions/checkout@v2
17 | 
18 |       - name: Setup Conda Environment
19 |         uses: conda-incubator/setup-miniconda@v2.2.0
20 |         with:
21 |           miniforge-variant: Mambaforge
22 |           miniforge-version: latest
23 |           use-mamba: true
24 |           channel-priority: strict
25 |           python-version: ${{ matrix.python-version }}
26 |           auto-activate-base: false
27 | 
28 |       - name: Install dependencies
29 |         shell: bash -l {0}
30 |         run: mamba install pytest locket numpy toolz pandas blosc pyzmq pyarrow -c conda-forge
31 | 
32 |       - name: Install
33 |         shell: bash -l {0}
34 |         run: pip install .
35 | 
36 |       - name: Run Tests
37 |         shell: bash -l {0}
38 |         run: pytest partd --doctest-modules --verbose
39 |         env:
40 |           PYTHON_VERSION: ${{ matrix.python-version }}
41 | 
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 
3 | *.egg-info/
4 | build/
5 | dist/
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Dask is a community maintained project. We welcome contributions in the form of bug reports, documentation, code, design proposals, and more. 
2 | 
3 | For general information on how to contribute see https://docs.dask.org/en/latest/develop.html.
4 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ﻿Copyright (c) 2015, Continuum Analytics, Inc. and contributors
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 | Redistributions of source code must retain the above copyright notice,
 8 | this list of conditions and the following disclaimer.
 9 | 
10 | Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 | 
14 | Neither the name of Continuum Analytics nor the names of any contributors
15 | may be used to endorse or promote products derived from this software
16 | without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28 | THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include partd *.py
2 | 
3 | include setup.py
4 | include README.rst
5 | include LICENSE.txt
6 | include MANIFEST.in
7 | include versioneer.py
8 | include partd/_version.py
9 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | PartD
  2 | =====
  3 | 
  4 | |Build Status| |Version Status|
  5 | 
  6 | Key-value byte store with appendable values
  7 | 
  8 |     Partd stores key-value pairs.
  9 |     Values are raw bytes.
 10 |     We append on old values.
 11 | 
 12 | Partd excels at shuffling operations.
 13 | 
 14 | Operations
 15 | ----------
 16 | 
 17 | PartD has two main operations, ``append`` and ``get``.
 18 | 
 19 | 
 20 | Example
 21 | -------
 22 | 
 23 | 1.  Create a Partd backed by a directory::
 24 | 
 25 |         >>> import partd
 26 |         >>> p = partd.File('/path/to/new/dataset/')
 27 | 
 28 | 2.  Append key-byte pairs to dataset::
 29 | 
 30 |         >>> p.append({'x': b'Hello ', 'y': b'123'})
 31 |         >>> p.append({'x': b'world!', 'y': b'456'})
 32 | 
 33 | 3.  Get bytes associated to keys::
 34 | 
 35 |         >>> p.get('x')         # One key
 36 |         b'Hello world!'
 37 | 
 38 |         >>> p.get(['y', 'x'])  # List of keys
 39 |         [b'123456', b'Hello world!']
 40 | 
 41 | 4.  Destroy partd dataset::
 42 | 
 43 |         >>> p.drop()
 44 | 
 45 | That's it.
 46 | 
 47 | 
 48 | Implementations
 49 | ---------------
 50 | 
 51 | We can back a partd by an in-memory dictionary::
 52 | 
 53 |     >>> p = Dict()
 54 | 
 55 | For larger amounts of data or to share data between processes we back a partd
 56 | by a directory of files.  This uses file-based locks for consistency.::
 57 | 
 58 |     >>> p = File('/path/to/dataset/')
 59 | 
 60 | However this can fail for many small writes.  In these cases you may wish to buffer one partd with another, keeping a fixed maximum of data in the buffering partd.  This writes the larger elements of the first partd to the second partd when space runs low::
 61 | 
 62 |     >>> p = Buffer(Dict(), File(), available_memory=2e9)  # 2GB memory buffer
 63 | 
 64 | You might also want to have many distributed process write to a single partd
 65 | consistently.  This can be done with a server
 66 | 
 67 | *   Server Process::
 68 | 
 69 |         >>> p = Buffer(Dict(), File(), available_memory=2e9)  # 2GB memory buffer
 70 |         >>> s = Server(p, address='ipc://server')
 71 | 
 72 | *   Worker processes::
 73 | 
 74 |         >>> p = Client('ipc://server')  # Client machine talks to remote server
 75 | 
 76 | 
 77 | Encodings and Compression
 78 | -------------------------
 79 | 
 80 | Once we can robustly and efficiently append bytes to a partd we consider
 81 | compression and encodings.  This is generally available with the ``Encode``
 82 | partd, which accepts three functions, one to apply on bytes as they are
 83 | written, one to apply to bytes as they are read, and one to join bytestreams.
 84 | Common configurations already exist for common data and compression formats.
 85 | 
 86 | We may wish to compress and decompress data transparently as we interact with a
 87 | partd.  Objects like ``BZ2``, ``Blosc``, ``ZLib`` and ``Snappy`` exist and take
 88 | another partd as an argument.::
 89 | 
 90 |     >>> p = File(...)
 91 |     >>> p = ZLib(p)
 92 | 
 93 | These work exactly as before, the (de)compression happens automatically.
 94 | 
 95 | Common data formats like Python lists, numpy arrays, and pandas
 96 | dataframes are also supported out of the box.::
 97 | 
 98 |     >>> p = File(...)
 99 |     >>> p = NumPy(p)
100 |     >>> p.append({'x': np.array([...])})
101 | 
102 | This lets us forget about bytes and think instead in our normal data types.
103 | 
104 | Composition
105 | -----------
106 | 
107 | In principle we want to compose all of these choices together
108 | 
109 | 1.  Write policy:  ``Dict``, ``File``, ``Buffer``, ``Client``
110 | 2.  Encoding:  ``Pickle``, ``Numpy``, ``Pandas``, ...
111 | 3.  Compression:  ``Blosc``, ``Snappy``, ...
112 | 
113 | Partd objects compose by nesting.  Here we make a partd that writes pickle
114 | encoded BZ2 compressed bytes directly to disk::
115 | 
116 |     >>> p = Pickle(BZ2(File('foo')))
117 | 
118 | We could construct more complex systems that include compression,
119 | serialization, buffering, and remote access.::
120 | 
121 |     >>> server = Server(Buffer(Dict(), File(), available_memory=2e0))
122 | 
123 |     >>> client = Pickle(Snappy(Client(server.address)))
124 |     >>> client.append({'x': [1, 2, 3]})
125 | 
126 | .. |Build Status| image:: https://github.com/dask/partd/workflows/CI/badge.svg
127 |    :target: https://github.com/dask/partd/actions?query=workflow%3ACI
128 | .. |Version Status| image:: https://img.shields.io/pypi/v/partd.svg
129 |    :target: https://pypi.python.org/pypi/partd/
130 | 


--------------------------------------------------------------------------------
/docs/zmq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dask/partd/e832b655606342dc742ec1c564b07abf1ad58383/docs/zmq.png


--------------------------------------------------------------------------------
/docs/zmq.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 12 |    width="686.50342"
 13 |    height="555"
 14 |    id="svg2"
 15 |    version="1.1"
 16 |    inkscape:version="0.48.4 r9939"
 17 |    sodipodi:docname="New document 1">
 18 |   <defs
 19 |      id="defs4">
 20 |     <marker
 21 |        inkscape:stockid="Arrow1Mstart"
 22 |        orient="auto"
 23 |        refY="0"
 24 |        refX="0"
 25 |        id="Arrow1Mstart"
 26 |        style="overflow:visible">
 27 |       <path
 28 |          id="path3941"
 29 |          d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
 30 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
 31 |          transform="matrix(0.4,0,0,0.4,4,0)"
 32 |          inkscape:connector-curvature="0" />
 33 |     </marker>
 34 |     <marker
 35 |        inkscape:stockid="Arrow1Lstart"
 36 |        orient="auto"
 37 |        refY="0"
 38 |        refX="0"
 39 |        id="Arrow1Lstart"
 40 |        style="overflow:visible">
 41 |       <path
 42 |          id="path3935"
 43 |          d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
 44 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
 45 |          transform="matrix(0.8,0,0,0.8,10,0)"
 46 |          inkscape:connector-curvature="0" />
 47 |     </marker>
 48 |     <marker
 49 |        inkscape:stockid="Arrow1Lend"
 50 |        orient="auto"
 51 |        refY="0"
 52 |        refX="0"
 53 |        id="Arrow1Lend"
 54 |        style="overflow:visible">
 55 |       <path
 56 |          id="path3938"
 57 |          d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
 58 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
 59 |          transform="matrix(-0.8,0,0,-0.8,-10,0)"
 60 |          inkscape:connector-curvature="0" />
 61 |     </marker>
 62 |     <marker
 63 |        inkscape:stockid="Arrow1Mend"
 64 |        orient="auto"
 65 |        refY="0"
 66 |        refX="0"
 67 |        id="Arrow1Mend"
 68 |        style="overflow:visible">
 69 |       <path
 70 |          id="path3944"
 71 |          d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
 72 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
 73 |          transform="matrix(-0.4,0,0,-0.4,-4,0)"
 74 |          inkscape:connector-curvature="0" />
 75 |     </marker>
 76 |   </defs>
 77 |   <sodipodi:namedview
 78 |      id="base"
 79 |      pagecolor="#ffffff"
 80 |      bordercolor="#666666"
 81 |      borderopacity="1.0"
 82 |      inkscape:pageopacity="0.0"
 83 |      inkscape:pageshadow="2"
 84 |      inkscape:zoom="0.98994949"
 85 |      inkscape:cx="334.53648"
 86 |      inkscape:cy="386.0246"
 87 |      inkscape:document-units="px"
 88 |      inkscape:current-layer="layer1"
 89 |      showgrid="true"
 90 |      fit-margin-top="0"
 91 |      fit-margin-left="0"
 92 |      fit-margin-right="0"
 93 |      fit-margin-bottom="0"
 94 |      inkscape:window-width="1680"
 95 |      inkscape:window-height="1026"
 96 |      inkscape:window-x="2880"
 97 |      inkscape:window-y="41"
 98 |      inkscape:window-maximized="1">
 99 |     <inkscape:grid
100 |        type="xygrid"
101 |        id="grid2985"
102 |        empspacing="5"
103 |        visible="true"
104 |        enabled="true"
105 |        snapvisiblegridlinesonly="true"
106 |        spacingx="5px"
107 |        spacingy="5px"
108 |        originx="8.5034041px"
109 |        originy="-248px" />
110 |   </sodipodi:namedview>
111 |   <metadata
112 |      id="metadata7">
113 |     <rdf:RDF>
114 |       <cc:Work
115 |          rdf:about="">
116 |         <dc:format>image/svg+xml</dc:format>
117 |         <dc:type
118 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
119 |         <dc:title></dc:title>
120 |       </cc:Work>
121 |     </rdf:RDF>
122 |   </metadata>
123 |   <g
124 |      inkscape:label="Layer 1"
125 |      inkscape:groupmode="layer"
126 |      id="layer1"
127 |      transform="translate(8.5034041,-249.36218)">
128 |     <rect
129 |        style="fill:#550000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#550000;stroke-width:6;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:0.83475785;stroke-dasharray:none"
130 |        id="rect2987"
131 |        width="50"
132 |        height="50"
133 |        x="25"
134 |        y="252.36218" />
135 |     <rect
136 |        style="fill:#550000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#550000;stroke-width:6;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:0.83475785;stroke-dasharray:none"
137 |        id="rect2987-5"
138 |        width="50"
139 |        height="50"
140 |        x="125"
141 |        y="252.36218" />
142 |     <rect
143 |        style="fill:#550000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#550000;stroke-width:6;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:0.83475785;stroke-dasharray:none"
144 |        id="rect2987-3"
145 |        width="50"
146 |        height="50"
147 |        x="225"
148 |        y="252.36218" />
149 |     <rect
150 |        style="fill:#550000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#550000;stroke-width:6;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:0.83475785;stroke-dasharray:none"
151 |        id="rect2987-8"
152 |        width="50"
153 |        height="50"
154 |        x="325"
155 |        y="252.36218" />
156 |     <rect
157 |        style="fill:#550000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#550000;stroke-width:6;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:0.83475785;stroke-dasharray:none"
158 |        id="rect2987-1"
159 |        width="50"
160 |        height="50"
161 |        x="425"
162 |        y="252.36218" />
163 |     <rect
164 |        style="fill:#550000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#550000;stroke-width:6;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:0.83475785;stroke-dasharray:none"
165 |        id="rect2987-55"
166 |        width="50"
167 |        height="50"
168 |        x="525"
169 |        y="252.36218" />
170 |     <rect
171 |        style="fill:#550000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#550000;stroke-width:6;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:0.83475785;stroke-dasharray:none"
172 |        id="rect2987-57"
173 |        width="50"
174 |        height="50"
175 |        x="625"
176 |        y="252.36218" />
177 |     <rect
178 |        style="fill:#000000;fill-opacity:0.09264305;fill-rule:nonzero;stroke:#550000;stroke-width:6.49738836;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:0.83475785;stroke-dasharray:none"
179 |        id="rect3042"
180 |        width="349.50262"
181 |        height="205.50897"
182 |        x="174.33955"
183 |        y="403.64441" />
184 |     <path
185 |        style="opacity:0.03999999;fill:none;stroke:#000000;stroke-width:3;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
186 |        d="m 200,552.36218 c 300,0 300,0 300,0"
187 |        id="path3812"
188 |        inkscape:connector-curvature="0" />
189 |     <path
190 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
191 |        d="m 200,552.2103 c 300,0 300,0 300,0 l 0,0"
192 |        id="path3814"
193 |        inkscape:connector-curvature="0" />
194 |     <rect
195 |        style="opacity:0.03999999;fill:#000000;fill-opacity:0.1144414;fill-rule:nonzero;stroke:#000000;stroke-width:3;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
196 |        id="rect3816"
197 |        width="25"
198 |        height="50"
199 |        x="200"
200 |        y="502.36218" />
201 |     <rect
202 |        style="opacity:0.03999999;fill:#7e0000;fill-opacity:0.6893733;fill-rule:nonzero;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
203 |        id="rect3832"
204 |        width="175"
205 |        height="75"
206 |        x="100"
207 |        y="627.36218" />
208 |     <rect
209 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
210 |        id="rect3836"
211 |        width="25"
212 |        height="50"
213 |        x="200"
214 |        y="502.36218" />
215 |     <rect
216 |        style="fill:#7e0000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#00000f;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
217 |        id="rect3838"
218 |        width="25"
219 |        height="125"
220 |        x="225"
221 |        y="427.36218" />
222 |     <rect
223 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
224 |        id="rect3840"
225 |        width="25"
226 |        height="25"
227 |        x="250"
228 |        y="527.36218" />
229 |     <rect
230 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
231 |        id="rect3842"
232 |        width="25"
233 |        height="25"
234 |        x="275"
235 |        y="527.36218" />
236 |     <rect
237 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
238 |        id="rect3844"
239 |        width="25"
240 |        height="25"
241 |        x="300"
242 |        y="527.36218" />
243 |     <rect
244 |        style="fill:#7e0000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#00000f;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
245 |        id="rect3846"
246 |        width="25"
247 |        height="100"
248 |        x="325"
249 |        y="452.36218" />
250 |     <rect
251 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
252 |        id="rect3848"
253 |        width="25"
254 |        height="75"
255 |        x="350"
256 |        y="477.36218" />
257 |     <rect
258 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
259 |        id="rect3850"
260 |        width="25"
261 |        height="50"
262 |        x="375"
263 |        y="502.36218" />
264 |     <rect
265 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
266 |        id="rect3852"
267 |        width="25"
268 |        height="25"
269 |        x="400"
270 |        y="527.36218" />
271 |     <flowRoot
272 |        xml:space="preserve"
273 |        id="flowRoot3857"
274 |        style="font-size:25px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Latin Modern Sans;-inkscape-font-specification:Latin Modern Sans"><flowRegion
275 |          id="flowRegion3859"><rect
276 |            id="rect3861"
277 |            width="198.49498"
278 |            height="50.299217"
279 |            x="263.14474"
280 |            y="417.28461" /></flowRegion><flowPara
281 |          id="flowPara3863"></flowPara></flowRoot>    <flowRoot
282 |        xml:space="preserve"
283 |        id="flowRoot3869"
284 |        style="font-size:25px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Latin Modern Sans;-inkscape-font-specification:Latin Modern Sans"><flowRegion
285 |          id="flowRegion3871"><rect
286 |            id="rect3873"
287 |            width="188.79752"
288 |            height="52.366306"
289 |            x="261.93256"
290 |            y="417.62912" /></flowRegion><flowPara
291 |          id="flowPara3875">in memory groups</flowPara></flowRoot>    <flowRoot
292 |        xml:space="preserve"
293 |        id="flowRoot3877"
294 |        style="font-size:25px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Latin Modern Sans;-inkscape-font-specification:Latin Modern Sans"><flowRegion
295 |          id="flowRegion3879"><rect
296 |            id="rect3881"
297 |            width="164.25081"
298 |            height="30.489594"
299 |            x="265.87216"
300 |            y="577.31189" /></flowRegion><flowPara
301 |          id="flowPara3883">send to files</flowPara></flowRoot>    <flowRoot
302 |        xml:space="preserve"
303 |        id="flowRoot3885"
304 |        style="font-size:25px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000055;fill-opacity:0.51771115;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Latin Modern Sans;-inkscape-font-specification:Latin Modern Sans"><flowRegion
305 |          id="flowRegion3887"><rect
306 |            id="rect3889"
307 |            width="25.75889"
308 |            height="47.543095"
309 |            x="199.50513"
310 |            y="630.02271"
311 |            style="fill:#000055;fill-opacity:0.51771115;stroke:#000000;stroke-opacity:1" /></flowRegion><flowPara
312 |          id="flowPara3891"></flowPara></flowRoot>    <flowRoot
313 |        xml:space="preserve"
314 |        id="flowRoot3893"
315 |        style="font-size:25px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#2d0000;fill-opacity:1;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Latin Modern Sans;-inkscape-font-specification:Latin Modern Sans"><flowRegion
316 |          id="flowRegion3895"><rect
317 |            id="rect3897"
318 |            width="56.366512"
319 |            height="80.961067"
320 |            x="615.58698"
321 |            y="478.26379"
322 |            style="fill:#2d0000;fill-opacity:1;stroke:#000000;stroke-opacity:1" /></flowRegion><flowPara
323 |          id="flowPara3899"></flowPara></flowRoot>    <flowRoot
324 |        xml:space="preserve"
325 |        id="flowRoot3901"
326 |        style="font-size:25px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000033;fill-opacity:0.53133515;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Latin Modern Sans;-inkscape-font-specification:Latin Modern Sans"><flowRegion
327 |          id="flowRegion3903"><rect
328 |            id="rect3905"
329 |            width="64.851791"
330 |            height="47.026321"
331 |            x="643.16412"
332 |            y="394.71884"
333 |            style="fill:#000033;fill-opacity:0.53133515;stroke:#000000;stroke-opacity:1" /></flowRegion><flowPara
334 |          id="flowPara3907"></flowPara></flowRoot>    <path
335 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Lstart);marker-end:url(#Arrow1Lend)"
336 |        d="m 50,302.36218 c 300,100 300,100 300,100"
337 |        id="path3911"
338 |        inkscape:connector-curvature="0" />
339 |     <path
340 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Lstart);marker-end:url(#Arrow1Lend)"
341 |        d="m 150,302.36218 c 200,100 200,100 200,100"
342 |        id="path3913"
343 |        inkscape:connector-curvature="0" />
344 |     <path
345 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Lstart);marker-end:url(#Arrow1Lend)"
346 |        d="m 250,302.36218 c 100,100 100,100 100,100"
347 |        id="path3915"
348 |        inkscape:connector-curvature="0" />
349 |     <path
350 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Lstart);marker-end:url(#Arrow1Lend)"
351 |        d="m 350,302.36218 c 0,100 0,100 0,100 l 0,0"
352 |        id="path3917"
353 |        inkscape:connector-curvature="0" />
354 |     <path
355 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Lstart);marker-end:url(#Arrow1Lend)"
356 |        d="m 450,302.36218 c -100,100 -100,100 -100,100"
357 |        id="path3921"
358 |        inkscape:connector-curvature="0" />
359 |     <path
360 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Lstart);marker-end:url(#Arrow1Lend)"
361 |        d="m 550,302.36218 c -200,100 -200,100 -200,100 0,0 0,0 0,0"
362 |        id="path3923"
363 |        inkscape:connector-curvature="0" />
364 |     <path
365 |        style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Lstart);marker-end:url(#Arrow1Lend)"
366 |        d="m 650,302.36218 c -300,100 -300,100 -300,100"
367 |        id="path3925"
368 |        inkscape:connector-curvature="0" />
369 |     <rect
370 |        style="fill:#000080;fill-opacity:0.53133515;fill-rule:nonzero;stroke:#000000;stroke-width:4;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
371 |        id="rect3927"
372 |        width="450"
373 |        height="125"
374 |        x="125"
375 |        y="677.36218" />
376 |     <path
377 |        style="fill:none;stroke:#000000;stroke-width:4;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-start:url(#Arrow1Mstart);marker-end:url(#Arrow1Mend)"
378 |        d="m 350,602.36218 c 0,100 0,100 0,100"
379 |        id="path3929"
380 |        inkscape:connector-curvature="0" />
381 |     <flowRoot
382 |        xml:space="preserve"
383 |        id="flowRoot4375"
384 |        style="font-size:40px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Latin Modern Sans;-inkscape-font-specification:Latin Modern Sans"><flowRegion
385 |          id="flowRegion4377"><rect
386 |            id="rect4379"
387 |            width="347.89655"
388 |            height="56.155975"
389 |            x="191.92899"
390 |            y="712.53412"
391 |            style="font-size:40px;text-align:center;text-anchor:middle" /></flowRegion><flowPara
392 |          id="flowPara4381">File system</flowPara></flowRoot>    <flowRoot
393 |        xml:space="preserve"
394 |        id="flowRoot4383"
395 |        style="font-size:25px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Latin Modern Sans;-inkscape-font-specification:Latin Modern Sans"><flowRegion
396 |          id="flowRegion4385"><rect
397 |            id="rect4387"
398 |            width="132.12796"
399 |            height="49.610188"
400 |            x="583.76715"
401 |            y="338.90738" /></flowRegion><flowPara
402 |          id="flowPara4389"></flowPara></flowRoot>    <text
403 |        xml:space="preserve"
404 |        style="font-size:25px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Latin Modern Sans;-inkscape-font-specification:Latin Modern Sans"
405 |        x="706.09662"
406 |        y="378.59042"
407 |        id="text4391"
408 |        sodipodi:linespacing="125%"><tspan
409 |          sodipodi:role="line"
410 |          id="tspan4393"
411 |          x="706.09662"
412 |          y="378.59042" /></text>
413 |     <flowRoot
414 |        xml:space="preserve"
415 |        id="flowRoot4395"
416 |        style="font-size:25px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Latin Modern Sans;-inkscape-font-specification:Latin Modern Sans"
417 |        transform="translate(-41.046875,-2.5687598)"><flowRegion
418 |          id="flowRegion4397"><rect
419 |            id="rect4399"
420 |            width="133.94623"
421 |            height="60.806931"
422 |            x="574.0697"
423 |            y="330.29453"
424 |            style="text-align:center;text-anchor:middle" /></flowRegion><flowPara
425 |          id="flowPara4401">ZeroMQ</flowPara><flowPara
426 |          id="flowPara4403">sockets</flowPara></flowRoot>    <rect
427 |        style="opacity:0.03999999;fill:#000000;fill-opacity:0.1144414;fill-rule:nonzero;stroke:#000000;stroke-width:1.3076601;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
428 |        id="rect3816-8"
429 |        width="22.491409"
430 |        height="10.559433"
431 |        x="-7.8495741"
432 |        y="359.1113" />
433 |     <rect
434 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:0.43588671;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
435 |        id="rect3836-7"
436 |        width="22.491409"
437 |        height="10.559433"
438 |        x="-7.8495741"
439 |        y="359.1113" />
440 |     <rect
441 |        style="fill:#7e0000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#00000f;stroke-width:0.43588671;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
442 |        id="rect3838-9"
443 |        width="22.491409"
444 |        height="26.398582"
445 |        x="14.641839"
446 |        y="343.27216" />
447 |     <rect
448 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:0.43588671;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
449 |        id="rect3840-7"
450 |        width="22.491409"
451 |        height="5.2797165"
452 |        x="37.133247"
453 |        y="364.39102" />
454 |     <rect
455 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:0.43588671;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
456 |        id="rect3842-8"
457 |        width="22.491409"
458 |        height="5.2797165"
459 |        x="59.624657"
460 |        y="364.39102" />
461 |     <rect
462 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:0.43588671;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
463 |        id="rect3844-4"
464 |        width="22.491409"
465 |        height="5.2797165"
466 |        x="82.116066"
467 |        y="364.39102" />
468 |     <rect
469 |        style="fill:#7e0000;fill-opacity:0.506812;fill-rule:nonzero;stroke:#00000f;stroke-width:0.43588671;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
470 |        id="rect3846-1"
471 |        width="22.491409"
472 |        height="21.118866"
473 |        x="104.60748"
474 |        y="348.55188" />
475 |     <rect
476 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:0.43588671;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
477 |        id="rect3848-0"
478 |        width="22.491409"
479 |        height="15.839149"
480 |        x="127.09888"
481 |        y="353.8316" />
482 |     <rect
483 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:0.43588671;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
484 |        id="rect3850-1"
485 |        width="22.491409"
486 |        height="10.559433"
487 |        x="149.5903"
488 |        y="359.1113" />
489 |     <rect
490 |        style="fill:#7e0000;fill-opacity:0.2588556;fill-rule:nonzero;stroke:#00000f;stroke-width:0.43588671;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"
491 |        id="rect3852-0"
492 |        width="22.491409"
493 |        height="5.2797165"
494 |        x="172.08171"
495 |        y="364.39102" />
496 |   </g>
497 | </svg>
498 | 


--------------------------------------------------------------------------------
/partd/__init__.py:
--------------------------------------------------------------------------------
 1 | from contextlib import suppress
 2 | 
 3 | from .file import File
 4 | from .dict import Dict
 5 | from .buffer import Buffer
 6 | from .encode import Encode
 7 | from .pickle import Pickle
 8 | from .python import Python
 9 | from .compressed import *
10 | with suppress(ImportError):
11 |     from .numpy import Numpy
12 | with suppress(ImportError):
13 |     from .pandas import PandasColumns, PandasBlocks
14 | with suppress(ImportError):
15 |     from .zmq import Client, Server
16 | 
17 | from . import _version
18 | __version__ = _version.get_versions()['version']
19 | 


--------------------------------------------------------------------------------
/partd/_version.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # This file helps to compute a version number in source trees obtained from
  3 | # git-archive tarball (such as those provided by githubs download-from-tag
  4 | # feature). Distribution tarballs (built by setup.py sdist) and build
  5 | # directories (produced by setup.py build) will contain a much shorter file
  6 | # that just contains the computed version number.
  7 | 
  8 | # This file is released into the public domain.
  9 | # Generated by versioneer-0.29
 10 | # https://github.com/python-versioneer/python-versioneer
 11 | 
 12 | """Git implementation of _version.py."""
 13 | 
 14 | import errno
 15 | import os
 16 | import re
 17 | import subprocess
 18 | import sys
 19 | from typing import Any, Callable, Dict, List, Optional, Tuple
 20 | import functools
 21 | 
 22 | 
 23 | def get_keywords() -> Dict[str, str]:
 24 |     """Get the keywords needed to look up the version information."""
 25 |     # these strings will be replaced by git during git-archive.
 26 |     # setup.py/versioneer.py will grep for the variable names, so they must
 27 |     # each be defined on a line of their own. _version.py will just call
 28 |     # get_keywords().
 29 |     git_refnames = " (HEAD -> main)"
 30 |     git_full = "e832b655606342dc742ec1c564b07abf1ad58383"
 31 |     git_date = "2024-07-15 16:21:10 -0500"
 32 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 33 |     return keywords
 34 | 
 35 | 
 36 | class VersioneerConfig:
 37 |     """Container for Versioneer configuration parameters."""
 38 | 
 39 |     VCS: str
 40 |     style: str
 41 |     tag_prefix: str
 42 |     parentdir_prefix: str
 43 |     versionfile_source: str
 44 |     verbose: bool
 45 | 
 46 | 
 47 | def get_config() -> VersioneerConfig:
 48 |     """Create, populate and return the VersioneerConfig() object."""
 49 |     # these strings are filled in when 'setup.py versioneer' creates
 50 |     # _version.py
 51 |     cfg = VersioneerConfig()
 52 |     cfg.VCS = "git"
 53 |     cfg.style = "pep440"
 54 |     cfg.tag_prefix = ""
 55 |     cfg.parentdir_prefix = "partd-"
 56 |     cfg.versionfile_source = "partd/_version.py"
 57 |     cfg.verbose = False
 58 |     return cfg
 59 | 
 60 | 
 61 | class NotThisMethod(Exception):
 62 |     """Exception raised if a method is not valid for the current scenario."""
 63 | 
 64 | 
 65 | LONG_VERSION_PY: Dict[str, str] = {}
 66 | HANDLERS: Dict[str, Dict[str, Callable]] = {}
 67 | 
 68 | 
 69 | def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
 70 |     """Create decorator to mark a method as the handler of a VCS."""
 71 |     def decorate(f: Callable) -> Callable:
 72 |         """Store f in HANDLERS[vcs][method]."""
 73 |         if vcs not in HANDLERS:
 74 |             HANDLERS[vcs] = {}
 75 |         HANDLERS[vcs][method] = f
 76 |         return f
 77 |     return decorate
 78 | 
 79 | 
 80 | def run_command(
 81 |     commands: List[str],
 82 |     args: List[str],
 83 |     cwd: Optional[str] = None,
 84 |     verbose: bool = False,
 85 |     hide_stderr: bool = False,
 86 |     env: Optional[Dict[str, str]] = None,
 87 | ) -> Tuple[Optional[str], Optional[int]]:
 88 |     """Call the given command(s)."""
 89 |     assert isinstance(commands, list)
 90 |     process = None
 91 | 
 92 |     popen_kwargs: Dict[str, Any] = {}
 93 |     if sys.platform == "win32":
 94 |         # This hides the console window if pythonw.exe is used
 95 |         startupinfo = subprocess.STARTUPINFO()
 96 |         startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 97 |         popen_kwargs["startupinfo"] = startupinfo
 98 | 
 99 |     for command in commands:
100 |         try:
101 |             dispcmd = str([command] + args)
102 |             # remember shell=False, so use git.cmd on windows, not just git
103 |             process = subprocess.Popen([command] + args, cwd=cwd, env=env,
104 |                                        stdout=subprocess.PIPE,
105 |                                        stderr=(subprocess.PIPE if hide_stderr
106 |                                                else None), **popen_kwargs)
107 |             break
108 |         except OSError as e:
109 |             if e.errno == errno.ENOENT:
110 |                 continue
111 |             if verbose:
112 |                 print("unable to run %s" % dispcmd)
113 |                 print(e)
114 |             return None, None
115 |     else:
116 |         if verbose:
117 |             print("unable to find command, tried %s" % (commands,))
118 |         return None, None
119 |     stdout = process.communicate()[0].strip().decode()
120 |     if process.returncode != 0:
121 |         if verbose:
122 |             print("unable to run %s (error)" % dispcmd)
123 |             print("stdout was %s" % stdout)
124 |         return None, process.returncode
125 |     return stdout, process.returncode
126 | 
127 | 
128 | def versions_from_parentdir(
129 |     parentdir_prefix: str,
130 |     root: str,
131 |     verbose: bool,
132 | ) -> Dict[str, Any]:
133 |     """Try to determine the version from the parent directory name.
134 | 
135 |     Source tarballs conventionally unpack into a directory that includes both
136 |     the project name and a version string. We will also support searching up
137 |     two directory levels for an appropriately named parent directory
138 |     """
139 |     rootdirs = []
140 | 
141 |     for _ in range(3):
142 |         dirname = os.path.basename(root)
143 |         if dirname.startswith(parentdir_prefix):
144 |             return {"version": dirname[len(parentdir_prefix):],
145 |                     "full-revisionid": None,
146 |                     "dirty": False, "error": None, "date": None}
147 |         rootdirs.append(root)
148 |         root = os.path.dirname(root)  # up a level
149 | 
150 |     if verbose:
151 |         print("Tried directories %s but none started with prefix %s" %
152 |               (str(rootdirs), parentdir_prefix))
153 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
154 | 
155 | 
156 | @register_vcs_handler("git", "get_keywords")
157 | def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
158 |     """Extract version information from the given file."""
159 |     # the code embedded in _version.py can just fetch the value of these
160 |     # keywords. When used from setup.py, we don't want to import _version.py,
161 |     # so we do it with a regexp instead. This function is not used from
162 |     # _version.py.
163 |     keywords: Dict[str, str] = {}
164 |     try:
165 |         with open(versionfile_abs, "r") as fobj:
166 |             for line in fobj:
167 |                 if line.strip().startswith("git_refnames ="):
168 |                     mo = re.search(r'=\s*"(.*)"', line)
169 |                     if mo:
170 |                         keywords["refnames"] = mo.group(1)
171 |                 if line.strip().startswith("git_full ="):
172 |                     mo = re.search(r'=\s*"(.*)"', line)
173 |                     if mo:
174 |                         keywords["full"] = mo.group(1)
175 |                 if line.strip().startswith("git_date ="):
176 |                     mo = re.search(r'=\s*"(.*)"', line)
177 |                     if mo:
178 |                         keywords["date"] = mo.group(1)
179 |     except OSError:
180 |         pass
181 |     return keywords
182 | 
183 | 
184 | @register_vcs_handler("git", "keywords")
185 | def git_versions_from_keywords(
186 |     keywords: Dict[str, str],
187 |     tag_prefix: str,
188 |     verbose: bool,
189 | ) -> Dict[str, Any]:
190 |     """Get version information from git keywords."""
191 |     if "refnames" not in keywords:
192 |         raise NotThisMethod("Short version file found")
193 |     date = keywords.get("date")
194 |     if date is not None:
195 |         # Use only the last line.  Previous lines may contain GPG signature
196 |         # information.
197 |         date = date.splitlines()[-1]
198 | 
199 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
200 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
201 |         # -like" string, which we must then edit to make compliant), because
202 |         # it's been around since git-1.5.3, and it's too difficult to
203 |         # discover which version we're using, or to work around using an
204 |         # older one.
205 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
206 |     refnames = keywords["refnames"].strip()
207 |     if refnames.startswith("$Format"):
208 |         if verbose:
209 |             print("keywords are unexpanded, not using")
210 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
211 |     refs = {r.strip() for r in refnames.strip("()").split(",")}
212 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
213 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
214 |     TAG = "tag: "
215 |     tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
216 |     if not tags:
217 |         # Either we're using git < 1.8.3, or there really are no tags. We use
218 |         # a heuristic: assume all version tags have a digit. The old git %d
219 |         # expansion behaves like git log --decorate=short and strips out the
220 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
221 |         # between branches and tags. By ignoring refnames without digits, we
222 |         # filter out many common branch names like "release" and
223 |         # "stabilization", as well as "HEAD" and "master".
224 |         tags = {r for r in refs if re.search(r'\d', r)}
225 |         if verbose:
226 |             print("discarding '%s', no digits" % ",".join(refs - tags))
227 |     if verbose:
228 |         print("likely tags: %s" % ",".join(sorted(tags)))
229 |     for ref in sorted(tags):
230 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
231 |         if ref.startswith(tag_prefix):
232 |             r = ref[len(tag_prefix):]
233 |             # Filter out refs that exactly match prefix or that don't start
234 |             # with a number once the prefix is stripped (mostly a concern
235 |             # when prefix is '')
236 |             if not re.match(r'\d', r):
237 |                 continue
238 |             if verbose:
239 |                 print("picking %s" % r)
240 |             return {"version": r,
241 |                     "full-revisionid": keywords["full"].strip(),
242 |                     "dirty": False, "error": None,
243 |                     "date": date}
244 |     # no suitable tags, so version is "0+unknown", but full hex is still there
245 |     if verbose:
246 |         print("no suitable tags, using unknown + full revision id")
247 |     return {"version": "0+unknown",
248 |             "full-revisionid": keywords["full"].strip(),
249 |             "dirty": False, "error": "no suitable tags", "date": None}
250 | 
251 | 
252 | @register_vcs_handler("git", "pieces_from_vcs")
253 | def git_pieces_from_vcs(
254 |     tag_prefix: str,
255 |     root: str,
256 |     verbose: bool,
257 |     runner: Callable = run_command
258 | ) -> Dict[str, Any]:
259 |     """Get version from 'git describe' in the root of the source tree.
260 | 
261 |     This only gets called if the git-archive 'subst' keywords were *not*
262 |     expanded, and _version.py hasn't already been rewritten with a short
263 |     version string, meaning we're inside a checked out source tree.
264 |     """
265 |     GITS = ["git"]
266 |     if sys.platform == "win32":
267 |         GITS = ["git.cmd", "git.exe"]
268 | 
269 |     # GIT_DIR can interfere with correct operation of Versioneer.
270 |     # It may be intended to be passed to the Versioneer-versioned project,
271 |     # but that should not change where we get our version from.
272 |     env = os.environ.copy()
273 |     env.pop("GIT_DIR", None)
274 |     runner = functools.partial(runner, env=env)
275 | 
276 |     _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
277 |                    hide_stderr=not verbose)
278 |     if rc != 0:
279 |         if verbose:
280 |             print("Directory %s not under git control" % root)
281 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
282 | 
283 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
284 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
285 |     describe_out, rc = runner(GITS, [
286 |         "describe", "--tags", "--dirty", "--always", "--long",
287 |         "--match", f"{tag_prefix}[[:digit:]]*"
288 |     ], cwd=root)
289 |     # --long was added in git-1.5.5
290 |     if describe_out is None:
291 |         raise NotThisMethod("'git describe' failed")
292 |     describe_out = describe_out.strip()
293 |     full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
294 |     if full_out is None:
295 |         raise NotThisMethod("'git rev-parse' failed")
296 |     full_out = full_out.strip()
297 | 
298 |     pieces: Dict[str, Any] = {}
299 |     pieces["long"] = full_out
300 |     pieces["short"] = full_out[:7]  # maybe improved later
301 |     pieces["error"] = None
302 | 
303 |     branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
304 |                              cwd=root)
305 |     # --abbrev-ref was added in git-1.6.3
306 |     if rc != 0 or branch_name is None:
307 |         raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
308 |     branch_name = branch_name.strip()
309 | 
310 |     if branch_name == "HEAD":
311 |         # If we aren't exactly on a branch, pick a branch which represents
312 |         # the current commit. If all else fails, we are on a branchless
313 |         # commit.
314 |         branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
315 |         # --contains was added in git-1.5.4
316 |         if rc != 0 or branches is None:
317 |             raise NotThisMethod("'git branch --contains' returned error")
318 |         branches = branches.split("\n")
319 | 
320 |         # Remove the first line if we're running detached
321 |         if "(" in branches[0]:
322 |             branches.pop(0)
323 | 
324 |         # Strip off the leading "* " from the list of branches.
325 |         branches = [branch[2:] for branch in branches]
326 |         if "master" in branches:
327 |             branch_name = "master"
328 |         elif not branches:
329 |             branch_name = None
330 |         else:
331 |             # Pick the first branch that is returned. Good or bad.
332 |             branch_name = branches[0]
333 | 
334 |     pieces["branch"] = branch_name
335 | 
336 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
337 |     # TAG might have hyphens.
338 |     git_describe = describe_out
339 | 
340 |     # look for -dirty suffix
341 |     dirty = git_describe.endswith("-dirty")
342 |     pieces["dirty"] = dirty
343 |     if dirty:
344 |         git_describe = git_describe[:git_describe.rindex("-dirty")]
345 | 
346 |     # now we have TAG-NUM-gHEX or HEX
347 | 
348 |     if "-" in git_describe:
349 |         # TAG-NUM-gHEX
350 |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
351 |         if not mo:
352 |             # unparsable. Maybe git-describe is misbehaving?
353 |             pieces["error"] = ("unable to parse git-describe output: '%s'"
354 |                                % describe_out)
355 |             return pieces
356 | 
357 |         # tag
358 |         full_tag = mo.group(1)
359 |         if not full_tag.startswith(tag_prefix):
360 |             if verbose:
361 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
362 |                 print(fmt % (full_tag, tag_prefix))
363 |             pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
364 |                                % (full_tag, tag_prefix))
365 |             return pieces
366 |         pieces["closest-tag"] = full_tag[len(tag_prefix):]
367 | 
368 |         # distance: number of commits since tag
369 |         pieces["distance"] = int(mo.group(2))
370 | 
371 |         # commit: short hex revision ID
372 |         pieces["short"] = mo.group(3)
373 | 
374 |     else:
375 |         # HEX: no tags
376 |         pieces["closest-tag"] = None
377 |         out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
378 |         pieces["distance"] = len(out.split())  # total number of commits
379 | 
380 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
381 |     date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
382 |     # Use only the last line.  Previous lines may contain GPG signature
383 |     # information.
384 |     date = date.splitlines()[-1]
385 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
386 | 
387 |     return pieces
388 | 
389 | 
390 | def plus_or_dot(pieces: Dict[str, Any]) -> str:
391 |     """Return a + if we don't already have one, else return a ."""
392 |     if "+" in pieces.get("closest-tag", ""):
393 |         return "."
394 |     return "+"
395 | 
396 | 
397 | def render_pep440(pieces: Dict[str, Any]) -> str:
398 |     """Build up version string, with post-release "local version identifier".
399 | 
400 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
401 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
402 | 
403 |     Exceptions:
404 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
405 |     """
406 |     if pieces["closest-tag"]:
407 |         rendered = pieces["closest-tag"]
408 |         if pieces["distance"] or pieces["dirty"]:
409 |             rendered += plus_or_dot(pieces)
410 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
411 |             if pieces["dirty"]:
412 |                 rendered += ".dirty"
413 |     else:
414 |         # exception #1
415 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"],
416 |                                           pieces["short"])
417 |         if pieces["dirty"]:
418 |             rendered += ".dirty"
419 |     return rendered
420 | 
421 | 
422 | def render_pep440_branch(pieces: Dict[str, Any]) -> str:
423 |     """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
424 | 
425 |     The ".dev0" means not master branch. Note that .dev0 sorts backwards
426 |     (a feature branch will appear "older" than the master branch).
427 | 
428 |     Exceptions:
429 |     1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
430 |     """
431 |     if pieces["closest-tag"]:
432 |         rendered = pieces["closest-tag"]
433 |         if pieces["distance"] or pieces["dirty"]:
434 |             if pieces["branch"] != "master":
435 |                 rendered += ".dev0"
436 |             rendered += plus_or_dot(pieces)
437 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
438 |             if pieces["dirty"]:
439 |                 rendered += ".dirty"
440 |     else:
441 |         # exception #1
442 |         rendered = "0"
443 |         if pieces["branch"] != "master":
444 |             rendered += ".dev0"
445 |         rendered += "+untagged.%d.g%s" % (pieces["distance"],
446 |                                           pieces["short"])
447 |         if pieces["dirty"]:
448 |             rendered += ".dirty"
449 |     return rendered
450 | 
451 | 
452 | def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
453 |     """Split pep440 version string at the post-release segment.
454 | 
455 |     Returns the release segments before the post-release and the
456 |     post-release version number (or -1 if no post-release segment is present).
457 |     """
458 |     vc = str.split(ver, ".post")
459 |     return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
460 | 
461 | 
462 | def render_pep440_pre(pieces: Dict[str, Any]) -> str:
463 |     """TAG[.postN.devDISTANCE] -- No -dirty.
464 | 
465 |     Exceptions:
466 |     1: no tags. 0.post0.devDISTANCE
467 |     """
468 |     if pieces["closest-tag"]:
469 |         if pieces["distance"]:
470 |             # update the post release segment
471 |             tag_version, post_version = pep440_split_post(pieces["closest-tag"])
472 |             rendered = tag_version
473 |             if post_version is not None:
474 |                 rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
475 |             else:
476 |                 rendered += ".post0.dev%d" % (pieces["distance"])
477 |         else:
478 |             # no commits, use the tag as the version
479 |             rendered = pieces["closest-tag"]
480 |     else:
481 |         # exception #1
482 |         rendered = "0.post0.dev%d" % pieces["distance"]
483 |     return rendered
484 | 
485 | 
486 | def render_pep440_post(pieces: Dict[str, Any]) -> str:
487 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
488 | 
489 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
490 |     (a dirty tree will appear "older" than the corresponding clean one),
491 |     but you shouldn't be releasing software with -dirty anyways.
492 | 
493 |     Exceptions:
494 |     1: no tags. 0.postDISTANCE[.dev0]
495 |     """
496 |     if pieces["closest-tag"]:
497 |         rendered = pieces["closest-tag"]
498 |         if pieces["distance"] or pieces["dirty"]:
499 |             rendered += ".post%d" % pieces["distance"]
500 |             if pieces["dirty"]:
501 |                 rendered += ".dev0"
502 |             rendered += plus_or_dot(pieces)
503 |             rendered += "g%s" % pieces["short"]
504 |     else:
505 |         # exception #1
506 |         rendered = "0.post%d" % pieces["distance"]
507 |         if pieces["dirty"]:
508 |             rendered += ".dev0"
509 |         rendered += "+g%s" % pieces["short"]
510 |     return rendered
511 | 
512 | 
513 | def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
514 |     """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
515 | 
516 |     The ".dev0" means not master branch.
517 | 
518 |     Exceptions:
519 |     1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
520 |     """
521 |     if pieces["closest-tag"]:
522 |         rendered = pieces["closest-tag"]
523 |         if pieces["distance"] or pieces["dirty"]:
524 |             rendered += ".post%d" % pieces["distance"]
525 |             if pieces["branch"] != "master":
526 |                 rendered += ".dev0"
527 |             rendered += plus_or_dot(pieces)
528 |             rendered += "g%s" % pieces["short"]
529 |             if pieces["dirty"]:
530 |                 rendered += ".dirty"
531 |     else:
532 |         # exception #1
533 |         rendered = "0.post%d" % pieces["distance"]
534 |         if pieces["branch"] != "master":
535 |             rendered += ".dev0"
536 |         rendered += "+g%s" % pieces["short"]
537 |         if pieces["dirty"]:
538 |             rendered += ".dirty"
539 |     return rendered
540 | 
541 | 
542 | def render_pep440_old(pieces: Dict[str, Any]) -> str:
543 |     """TAG[.postDISTANCE[.dev0]] .
544 | 
545 |     The ".dev0" means dirty.
546 | 
547 |     Exceptions:
548 |     1: no tags. 0.postDISTANCE[.dev0]
549 |     """
550 |     if pieces["closest-tag"]:
551 |         rendered = pieces["closest-tag"]
552 |         if pieces["distance"] or pieces["dirty"]:
553 |             rendered += ".post%d" % pieces["distance"]
554 |             if pieces["dirty"]:
555 |                 rendered += ".dev0"
556 |     else:
557 |         # exception #1
558 |         rendered = "0.post%d" % pieces["distance"]
559 |         if pieces["dirty"]:
560 |             rendered += ".dev0"
561 |     return rendered
562 | 
563 | 
564 | def render_git_describe(pieces: Dict[str, Any]) -> str:
565 |     """TAG[-DISTANCE-gHEX][-dirty].
566 | 
567 |     Like 'git describe --tags --dirty --always'.
568 | 
569 |     Exceptions:
570 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
571 |     """
572 |     if pieces["closest-tag"]:
573 |         rendered = pieces["closest-tag"]
574 |         if pieces["distance"]:
575 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
576 |     else:
577 |         # exception #1
578 |         rendered = pieces["short"]
579 |     if pieces["dirty"]:
580 |         rendered += "-dirty"
581 |     return rendered
582 | 
583 | 
584 | def render_git_describe_long(pieces: Dict[str, Any]) -> str:
585 |     """TAG-DISTANCE-gHEX[-dirty].
586 | 
587 |     Like 'git describe --tags --dirty --always -long'.
588 |     The distance/hash is unconditional.
589 | 
590 |     Exceptions:
591 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
592 |     """
593 |     if pieces["closest-tag"]:
594 |         rendered = pieces["closest-tag"]
595 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
596 |     else:
597 |         # exception #1
598 |         rendered = pieces["short"]
599 |     if pieces["dirty"]:
600 |         rendered += "-dirty"
601 |     return rendered
602 | 
603 | 
604 | def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
605 |     """Render the given version pieces into the requested style."""
606 |     if pieces["error"]:
607 |         return {"version": "unknown",
608 |                 "full-revisionid": pieces.get("long"),
609 |                 "dirty": None,
610 |                 "error": pieces["error"],
611 |                 "date": None}
612 | 
613 |     if not style or style == "default":
614 |         style = "pep440"  # the default
615 | 
616 |     if style == "pep440":
617 |         rendered = render_pep440(pieces)
618 |     elif style == "pep440-branch":
619 |         rendered = render_pep440_branch(pieces)
620 |     elif style == "pep440-pre":
621 |         rendered = render_pep440_pre(pieces)
622 |     elif style == "pep440-post":
623 |         rendered = render_pep440_post(pieces)
624 |     elif style == "pep440-post-branch":
625 |         rendered = render_pep440_post_branch(pieces)
626 |     elif style == "pep440-old":
627 |         rendered = render_pep440_old(pieces)
628 |     elif style == "git-describe":
629 |         rendered = render_git_describe(pieces)
630 |     elif style == "git-describe-long":
631 |         rendered = render_git_describe_long(pieces)
632 |     else:
633 |         raise ValueError("unknown style '%s'" % style)
634 | 
635 |     return {"version": rendered, "full-revisionid": pieces["long"],
636 |             "dirty": pieces["dirty"], "error": None,
637 |             "date": pieces.get("date")}
638 | 
639 | 
640 | def get_versions() -> Dict[str, Any]:
641 |     """Get version information or return default if unable to do so."""
642 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
643 |     # __file__, we can work backwards from there to the root. Some
644 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
645 |     # case we can only use expanded keywords.
646 | 
647 |     cfg = get_config()
648 |     verbose = cfg.verbose
649 | 
650 |     try:
651 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
652 |                                           verbose)
653 |     except NotThisMethod:
654 |         pass
655 | 
656 |     try:
657 |         root = os.path.realpath(__file__)
658 |         # versionfile_source is the relative path from the top of the source
659 |         # tree (where the .git directory might live) to this file. Invert
660 |         # this to find the root from __file__.
661 |         for _ in cfg.versionfile_source.split('/'):
662 |             root = os.path.dirname(root)
663 |     except NameError:
664 |         return {"version": "0+unknown", "full-revisionid": None,
665 |                 "dirty": None,
666 |                 "error": "unable to find root of source tree",
667 |                 "date": None}
668 | 
669 |     try:
670 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
671 |         return render(pieces, cfg.style)
672 |     except NotThisMethod:
673 |         pass
674 | 
675 |     try:
676 |         if cfg.parentdir_prefix:
677 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
678 |     except NotThisMethod:
679 |         pass
680 | 
681 |     return {"version": "0+unknown", "full-revisionid": None,
682 |             "dirty": None,
683 |             "error": "unable to compute version", "date": None}
684 | 


--------------------------------------------------------------------------------
/partd/buffer.py:
--------------------------------------------------------------------------------
  1 | from .core import Interface
  2 | from threading import Lock
  3 | from toolz import merge_with, topk, accumulate, pluck
  4 | from operator import add
  5 | from bisect import bisect
  6 | from collections import defaultdict
  7 | from queue import Queue, Empty
  8 | 
  9 | 
 10 | def zero():
 11 |     return 0
 12 | 
 13 | class Buffer(Interface):
 14 |     def __init__(self, fast, slow, available_memory=1e9):
 15 |         self.lock = Lock()
 16 |         self.fast = fast
 17 |         self.slow = slow
 18 |         self.available_memory = available_memory
 19 |         self.lengths = defaultdict(zero)
 20 |         self.memory_usage = 0
 21 |         Interface.__init__(self)
 22 | 
 23 |     def __getstate__(self):
 24 |         return {'fast': self.fast,
 25 |                 'slow': self.slow,
 26 |                 'memory_usage': self.memory_usage,
 27 |                 'lengths': self.lengths,
 28 |                 'available_memory': self.available_memory}
 29 | 
 30 |     def __setstate__(self, state):
 31 |         Interface.__setstate__(self, state)
 32 |         self.lock = Lock()
 33 |         self.__dict__.update(state)
 34 | 
 35 |     def append(self, data, lock=True, **kwargs):
 36 |         if lock: self.lock.acquire()
 37 |         try:
 38 |             for k, v in data.items():
 39 |                 self.lengths[k] += len(v)
 40 |                 self.memory_usage += len(v)
 41 |             self.fast.append(data, lock=False, **kwargs)
 42 | 
 43 |             while self.memory_usage > self.available_memory:
 44 |                 keys = keys_to_flush(self.lengths, 0.1, maxcount=20)
 45 |                 self.flush(keys)
 46 | 
 47 |         finally:
 48 |             if lock: self.lock.release()
 49 | 
 50 |     def _get(self, keys, lock=True, **kwargs):
 51 |         if lock: self.lock.acquire()
 52 |         try:
 53 |             result = list(map(add, self.fast.get(keys, lock=False),
 54 |                                    self.slow.get(keys, lock=False)))
 55 |         finally:
 56 |             if lock: self.lock.release()
 57 |         return result
 58 | 
 59 |     def _iset(self, key, value, lock=True):
 60 |         """ Idempotent set """
 61 |         if lock: self.lock.acquire()
 62 |         try:
 63 |             self.fast.iset(key, value, lock=False)
 64 |         finally:
 65 |             if lock: self.lock.release()
 66 | 
 67 |     def _delete(self, keys, lock=True):
 68 |         if lock: self.lock.acquire()
 69 |         try:
 70 |             self.fast.delete(keys, lock=False)
 71 |             self.slow.delete(keys, lock=False)
 72 |         finally:
 73 |             if lock: self.lock.release()
 74 | 
 75 |     def drop(self):
 76 |         self._iset_seen.clear()
 77 |         self.fast.drop()
 78 |         self.slow.drop()
 79 | 
 80 |     def __exit__(self, *args):
 81 |         self.drop()
 82 | 
 83 |     def flush(self, keys=None, block=None):
 84 |         """ Flush keys to disk
 85 | 
 86 |         Parameters
 87 |         ----------
 88 | 
 89 |         keys: list or None
 90 |             list of keys to flush
 91 |         block: bool (defaults to None)
 92 |             Whether or not to block until all writing is complete
 93 | 
 94 |         If no keys are given then flush all keys
 95 |         """
 96 |         if keys is None:
 97 |             keys = list(self.lengths)
 98 | 
 99 |         self.slow.append(dict(zip(keys, self.fast.get(keys))))
100 |         self.fast.delete(keys)
101 | 
102 |         for key in keys:
103 |             self.memory_usage -= self.lengths[key]
104 |             del self.lengths[key]
105 | 
106 | 
107 | def keys_to_flush(lengths, fraction=0.1, maxcount=100000):
108 |     """ Which keys to remove
109 | 
110 |     >>> lengths = {'a': 20, 'b': 10, 'c': 15, 'd': 15,
111 |     ...            'e': 10, 'f': 25, 'g': 5}
112 |     >>> keys_to_flush(lengths, 0.5)
113 |     ['f', 'a']
114 |     """
115 |     top = topk(max(len(lengths) // 2, 1),
116 |                lengths.items(),
117 |                key=1)
118 |     total = sum(lengths.values())
119 |     cutoff = min(maxcount, max(1,
120 |                    bisect(list(accumulate(add, pluck(1, top))),
121 |                           total * fraction)))
122 |     result = [k for k, v in top[:cutoff]]
123 |     assert result
124 |     return result
125 | 


--------------------------------------------------------------------------------
/partd/compressed.py:
--------------------------------------------------------------------------------
 1 | from contextlib import suppress
 2 | from functools import partial
 3 | 
 4 | from .encode import Encode
 5 | 
 6 | __all__ = []
 7 | 
 8 | 
 9 | def bytes_concat(L):
10 |     return b''.join(L)
11 | 
12 | 
13 | with suppress(ImportError, AttributeError):
14 |     # In case snappy is not installed, or another package called snappy that does not implement compress / decompress.
15 |     # For example, SnapPy (https://pypi.org/project/snappy/)
16 |     import snappy
17 |     Snappy = partial(Encode,
18 |                      snappy.compress,
19 |                      snappy.decompress,
20 |                      bytes_concat)
21 |     __all__.append('Snappy')
22 | 
23 | 
24 | with suppress(ImportError):
25 |     import zlib
26 |     ZLib = partial(Encode,
27 |                    zlib.compress,
28 |                    zlib.decompress,
29 |                    bytes_concat)
30 |     __all__.append('ZLib')
31 | 
32 | 
33 | with suppress(ImportError):
34 |     import bz2
35 |     BZ2 = partial(Encode,
36 |                   bz2.compress,
37 |                   bz2.decompress,
38 |                   bytes_concat)
39 |     __all__.append('BZ2')
40 | 
41 | 
42 | with suppress(ImportError):
43 |     import blosc
44 |     Blosc = partial(Encode,
45 |                     blosc.compress,
46 |                     blosc.decompress,
47 |                     bytes_concat)
48 |     __all__.append('Blosc')
49 | 


--------------------------------------------------------------------------------
/partd/core.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import locket
 4 | import string
 5 | from toolz import memoize
 6 | from contextlib import contextmanager
 7 | from .utils import nested_get, flatten
 8 | 
 9 | 
10 | 
11 | # http://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename-in-python
12 | valid_chars = "-_.() " + string.ascii_letters + string.digits + os.path.sep
13 | 
14 | 
15 | def escape_filename(fn):
16 |     """ Escape text so that it is a valid filename
17 | 
18 |     >>> escape_filename('Foo!bar?')
19 |     'Foobar'
20 | 
21 |     """
22 |     return ''.join(filter(valid_chars.__contains__, fn))
23 | 
24 | 
25 | def filename(path, key):
26 |     return os.path.join(path, escape_filename(token(key)))
27 | 
28 | 
29 | def token(key):
30 |     """
31 | 
32 |     >>> token('hello')
33 |     'hello'
34 |     >>> token(('hello', 'world'))  # doctest: +SKIP
35 |     'hello/world'
36 |     """
37 |     if isinstance(key, str):
38 |         return key
39 |     elif isinstance(key, tuple):
40 |         return os.path.join(*map(token, key))
41 |     else:
42 |         return str(key)
43 | 
44 | 
45 | class Interface:
46 |     def __init__(self):
47 |         self._iset_seen = set()
48 | 
49 |     def __setstate__(self, state):
50 |         self.__dict__.update(state)
51 |         self._iset_seen = set()
52 | 
53 |     def iset(self, key, value, **kwargs):
54 |         if key in self._iset_seen:
55 |             return
56 |         else:
57 |             self._iset(key, value, **kwargs)
58 |             self._iset_seen.add(key)
59 | 
60 |     def __enter__(self):
61 |         return self
62 | 
63 |     def __exit__(self, type, value, traceback):
64 |         self.drop()
65 | 
66 |     def iget(self, key):
67 |         return self._get([key], lock=False)[0]
68 | 
69 |     def get(self, keys, **kwargs):
70 |         if not isinstance(keys, list):
71 |             return self.get([keys], **kwargs)[0]
72 |         elif any(isinstance(key, list) for key in keys):  # nested case
73 |             flatkeys = list(flatten(keys))
74 |             result = self.get(flatkeys, **kwargs)
75 |             return nested_get(keys, dict(zip(flatkeys, result)))
76 |         else:
77 |             return self._get(keys, **kwargs)
78 | 
79 |     def delete(self, keys, **kwargs):
80 |         if not isinstance(keys, list):
81 |             return self._delete([keys], **kwargs)
82 |         else:
83 |             return self._delete(keys, **kwargs)
84 | 
85 |     def pop(self, keys, **kwargs):
86 |         with self.partd.lock:
87 |             result = self.partd.get(keys, lock=False)
88 |             self.partd.delete(keys, lock=False)
89 |         return result
90 | 
91 | 


--------------------------------------------------------------------------------
/partd/dict.py:
--------------------------------------------------------------------------------
 1 | from .core import Interface
 2 | from threading import Lock
 3 | 
 4 | 
 5 | class Dict(Interface):
 6 |     def __init__(self):
 7 |         self.lock = Lock()
 8 |         self.data = dict()
 9 |         Interface.__init__(self)
10 | 
11 |     def __getstate__(self):
12 |         return {'data': self.data}
13 | 
14 |     def __setstate__(self, state):
15 |         Interface.__setstate__(self, state)
16 |         Dict.__init__(self)
17 |         self.data = state['data']
18 | 
19 |     def append(self, data, lock=True, **kwargs):
20 |         if lock: self.lock.acquire()
21 |         try:
22 |             for k, v in data.items():
23 |                 if k not in self.data:
24 |                     self.data[k] = []
25 |                 self.data[k].append(v)
26 |         finally:
27 |             if lock: self.lock.release()
28 | 
29 |     def _get(self, keys, lock=True, **kwargs):
30 |         assert isinstance(keys, (list, tuple, set))
31 |         if lock:
32 |             self.lock.acquire()
33 |         try:
34 |             result = [b''.join(self.data.get(key, [])) for key in keys]
35 |         finally:
36 |             if lock:
37 |                 self.lock.release()
38 |         return result
39 | 
40 |     def _iset(self, key, value, lock=True):
41 |         """ Idempotent set """
42 |         if lock:
43 |             self.lock.acquire()
44 |         try:
45 |             self.data[key] = [value]
46 |         finally:
47 |             if lock:
48 |                 self.lock.release()
49 | 
50 |     def _delete(self, keys, lock=True):
51 |         if lock:
52 |             self.lock.acquire()
53 |         try:
54 |             for key in keys:
55 |                 if key in self.data:
56 |                     del self.data[key]
57 |         finally:
58 |             if lock:
59 |                 self.lock.release()
60 | 
61 |     def drop(self):
62 |         self._iset_seen.clear()
63 |         self.data.clear()
64 | 
65 |     def __exit__(self, *args):
66 |         self.drop()
67 | 


--------------------------------------------------------------------------------
/partd/encode.py:
--------------------------------------------------------------------------------
 1 | from .core import Interface
 2 | from .file import File
 3 | from toolz import valmap
 4 | from .utils import frame, framesplit
 5 | 
 6 | 
 7 | class Encode(Interface):
 8 |     def __init__(self, encode, decode, join, partd=None):
 9 |         if not partd or isinstance(partd, str):
10 |             partd = File(partd)
11 |         self.partd = partd
12 |         self.encode = encode
13 |         self.decode = decode
14 |         self.join = join
15 |         Interface.__init__(self)
16 | 
17 |     def __getstate__(self):
18 |         return self.__dict__
19 | 
20 |     __setstate__ = Interface.__setstate__
21 | 
22 |     def append(self, data, **kwargs):
23 |         data = valmap(self.encode, data)
24 |         data = valmap(frame, data)
25 |         self.partd.append(data, **kwargs)
26 | 
27 |     def _get(self, keys, **kwargs):
28 |         raw = self.partd._get(keys, **kwargs)
29 |         return [self.join([self.decode(frame) for frame in framesplit(chunk)])
30 |                 for chunk in raw]
31 | 
32 |     def delete(self, keys, **kwargs):
33 |         return self.partd.delete(keys, **kwargs)
34 | 
35 |     def _iset(self, key, value, **kwargs):
36 |         return self.partd.iset(key, frame(self.encode(value)), **kwargs)
37 | 
38 |     def drop(self):
39 |         return self.partd.drop()
40 | 
41 |     @property
42 |     def lock(self):
43 |         return self.partd.lock
44 | 
45 |     def __exit__(self, *args):
46 |         self.drop()
47 |         self.partd.__exit__(*args)
48 | 


--------------------------------------------------------------------------------
/partd/file.py:
--------------------------------------------------------------------------------
  1 | import atexit
  2 | from contextlib import suppress
  3 | import os
  4 | import shutil
  5 | import string
  6 | import tempfile
  7 | 
  8 | from .core import Interface
  9 | import locket
 10 | 
 11 | 
 12 | class File(Interface):
 13 |     def __init__(self, path=None, dir=None):
 14 |         if not path:
 15 |             path = tempfile.mkdtemp(suffix='.partd', dir=dir)
 16 |             cleanup_files.append(path)
 17 |             self._explicitly_given_path = False
 18 |         else:
 19 |             self._explicitly_given_path = True
 20 |         self.path = path
 21 |         if not os.path.exists(path):
 22 |             with suppress(OSError):
 23 |                 os.makedirs(path)
 24 |         self.lock = locket.lock_file(self.filename('.lock'))
 25 |         Interface.__init__(self)
 26 | 
 27 |     def __getstate__(self):
 28 |         return {'path': self.path}
 29 | 
 30 |     def __setstate__(self, state):
 31 |         Interface.__setstate__(self, state)
 32 |         File.__init__(self, state['path'])
 33 | 
 34 |     def append(self, data, lock=True, fsync=False, **kwargs):
 35 |         if lock: self.lock.acquire()
 36 |         try:
 37 |             for k, v in data.items():
 38 |                 fn = self.filename(k)
 39 |                 if not os.path.exists(os.path.dirname(fn)):
 40 |                     os.makedirs(os.path.dirname(fn))
 41 |                 with open(fn, 'ab') as f:
 42 |                     f.write(v)
 43 |                     if fsync:
 44 |                         os.fsync(f)
 45 |         finally:
 46 |             if lock: self.lock.release()
 47 | 
 48 |     def _get(self, keys, lock=True, **kwargs):
 49 |         assert isinstance(keys, (list, tuple, set))
 50 |         if lock:
 51 |             self.lock.acquire()
 52 |         try:
 53 |             result = []
 54 |             for key in keys:
 55 |                 try:
 56 |                     with open(self.filename(key), 'rb') as f:
 57 |                         result.append(f.read())
 58 |                 except OSError:
 59 |                     result.append(b'')
 60 |         finally:
 61 |             if lock:
 62 |                 self.lock.release()
 63 |         return result
 64 | 
 65 |     def _iset(self, key, value, lock=True):
 66 |         """ Idempotent set """
 67 |         fn = self.filename(key)
 68 |         if not os.path.exists(os.path.dirname(fn)):
 69 |             os.makedirs(os.path.dirname(fn))
 70 |         if lock:
 71 |             self.lock.acquire()
 72 |         try:
 73 |             with open(self.filename(key), 'wb') as f:
 74 |                 f.write(value)
 75 |         finally:
 76 |             if lock:
 77 |                 self.lock.release()
 78 | 
 79 |     def _delete(self, keys, lock=True):
 80 |         if lock:
 81 |             self.lock.acquire()
 82 |         try:
 83 |             for key in keys:
 84 |                 path = filename(self.path, key)
 85 |                 if os.path.exists(path):
 86 |                     os.remove(path)
 87 |         finally:
 88 |             if lock:
 89 |                 self.lock.release()
 90 | 
 91 |     def drop(self):
 92 |         if os.path.exists(self.path):
 93 |             shutil.rmtree(self.path)
 94 |         self._iset_seen.clear()
 95 |         os.mkdir(self.path)
 96 | 
 97 |     def filename(self, key):
 98 |         return filename(self.path, key)
 99 | 
100 |     def __exit__(self, *args):
101 |         self.drop()
102 |         os.rmdir(self.path)
103 | 
104 |     def __del__(self):
105 |         if not self._explicitly_given_path:
106 |             self.drop()
107 |             os.rmdir(self.path)
108 | 
109 | 
110 | def filename(path, key):
111 |     return os.path.join(path, escape_filename(token(key)))
112 | 
113 | 
114 | # http://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename-in-python
115 | valid_chars = "-_.() " + string.ascii_letters + string.digits + os.path.sep
116 | 
117 | 
118 | def escape_filename(fn):
119 |     """ Escape text so that it is a valid filename
120 | 
121 |     >>> escape_filename('Foo!bar?')
122 |     'Foobar'
123 | 
124 |     """
125 |     return ''.join(filter(valid_chars.__contains__, fn))
126 | 
127 | 
128 | 
129 | def token(key):
130 |     """
131 | 
132 |     >>> token('hello')
133 |     'hello'
134 |     >>> token(('hello', 'world'))  # doctest: +SKIP
135 |     'hello/world'
136 |     """
137 |     if isinstance(key, str):
138 |         return key
139 |     elif isinstance(key, tuple):
140 |         return os.path.join(*map(token, key))
141 |     else:
142 |         return str(key)
143 | 
144 | 
145 | cleanup_files = list()
146 | 
147 | @atexit.register
148 | def cleanup():
149 |     for fn in cleanup_files:
150 |         if os.path.exists(fn):
151 |             shutil.rmtree(fn)
152 | 


--------------------------------------------------------------------------------
/partd/numpy.py:
--------------------------------------------------------------------------------
  1 | """ Store arrays
  2 | 
  3 | We put arrays on disk as raw bytes, extending along the first dimension.
  4 | Alongside each array x we ensure the value x.dtype which stores the string
  5 | description of the array's dtype.
  6 | """
  7 | from contextlib import suppress
  8 | import pickle
  9 | 
 10 | import numpy as np
 11 | from toolz import valmap, identity, partial
 12 | from .core import Interface
 13 | from .file import File
 14 | from .utils import frame, framesplit, suffix
 15 | 
 16 | 
 17 | def serialize_dtype(dt):
 18 |     """ Serialize dtype to bytes
 19 | 
 20 |     >>> serialize_dtype(np.dtype('i4'))
 21 |     b'<i4'
 22 |     >>> serialize_dtype(np.dtype('M8[us]'))
 23 |     b'<M8[us]'
 24 |     """
 25 |     return dt.str.encode()
 26 | 
 27 | 
 28 | def parse_dtype(s):
 29 |     """ Parse text as numpy dtype
 30 | 
 31 |     >>> parse_dtype(b'i4')
 32 |     dtype('int32')
 33 | 
 34 |     >>> parse_dtype(b"[('a', 'i4')]")
 35 |     dtype([('a', '<i4')])
 36 |     """
 37 |     if s.startswith(b'['):
 38 |         return np.dtype(eval(s))  # Dangerous!
 39 |     else:
 40 |         return np.dtype(s)
 41 | 
 42 | 
 43 | class Numpy(Interface):
 44 |     def __init__(self, partd=None):
 45 |         if not partd or isinstance(partd, str):
 46 |             partd = File(partd)
 47 |         self.partd = partd
 48 |         Interface.__init__(self)
 49 | 
 50 |     def __getstate__(self):
 51 |         return {'partd': self.partd}
 52 | 
 53 |     def append(self, data, **kwargs):
 54 |         for k, v in data.items():
 55 |             self.partd.iset(suffix(k, '.dtype'), serialize_dtype(v.dtype))
 56 |         self.partd.append(valmap(serialize, data), **kwargs)
 57 | 
 58 |     def _get(self, keys, **kwargs):
 59 |         bytes = self.partd._get(keys, **kwargs)
 60 |         dtypes = self.partd._get([suffix(key, '.dtype') for key in keys],
 61 |                                  lock=False)
 62 |         dtypes = map(parse_dtype, dtypes)
 63 |         return list(map(deserialize, bytes, dtypes))
 64 | 
 65 |     def delete(self, keys, **kwargs):
 66 |         keys2 = [suffix(key, '.dtype') for key in keys]
 67 |         self.partd.delete(keys2, **kwargs)
 68 | 
 69 |     def _iset(self, key, value):
 70 |         return self.partd._iset(key, value)
 71 | 
 72 |     def drop(self):
 73 |         return self.partd.drop()
 74 | 
 75 |     def __del__(self):
 76 |         self.partd.__del__()
 77 | 
 78 |     @property
 79 |     def lock(self):
 80 |         return self.partd.lock
 81 | 
 82 |     def __exit__(self, *args):
 83 |         self.drop()
 84 |         self.partd.__exit__(self, *args)
 85 | 
 86 | try:
 87 |     from pandas import msgpack
 88 | except ImportError:
 89 |     try:
 90 |         import msgpack
 91 |     except ImportError:
 92 |         msgpack = False
 93 | 
 94 | 
 95 | def serialize(x):
 96 |     if x.dtype == 'O':
 97 |         l = x.flatten().tolist()
 98 |         with suppress(Exception):  # Try msgpack (faster on strings)
 99 |             return frame(msgpack.packb(l, use_bin_type=True))
100 |         return frame(pickle.dumps(l, protocol=pickle.HIGHEST_PROTOCOL))
101 |     else:
102 |         return x.tobytes()
103 | 
104 | 
105 | def deserialize(bytes, dtype, copy=False):
106 |     if dtype == 'O':
107 |         try:
108 |             if msgpack.version >= (0, 5, 2):
109 |                 unpack_kwargs = {'raw': False}
110 |             else:
111 |                 unpack_kwargs = {'encoding': 'utf-8'}
112 | 
113 |             blocks = [msgpack.unpackb(f, **unpack_kwargs)
114 |                       for f in framesplit(bytes)]
115 |         except Exception:
116 |             blocks = [pickle.loads(f) for f in framesplit(bytes)]
117 | 
118 |         result = np.empty(sum(map(len, blocks)), dtype='O')
119 |         i = 0
120 |         for block in blocks:
121 |             result[i:i + len(block)] = block
122 |             i += len(block)
123 |         return result
124 |     else:
125 |         result = np.frombuffer(bytes, dtype)
126 |         if copy:
127 |             result = result.copy()
128 |         return result
129 | 
130 | 
131 | compress_text = identity
132 | decompress_text = identity
133 | compress_bytes = lambda bytes, itemsize: bytes
134 | decompress_bytes = identity
135 | 
136 | with suppress(ImportError):
137 |     import blosc
138 |     blosc.set_nthreads(1)
139 | 
140 |     compress_bytes = blosc.compress
141 |     decompress_bytes = blosc.decompress
142 | 
143 |     compress_text = partial(blosc.compress, typesize=1)
144 |     decompress_text = blosc.decompress
145 | 
146 | with suppress(ImportError):
147 |     from snappy import compress as compress_text
148 |     from snappy import decompress as decompress_text
149 | 
150 | 
151 | def compress(bytes, dtype):
152 |     if dtype == 'O':
153 |         return compress_text(bytes)
154 |     else:
155 |         return compress_bytes(bytes, dtype.itemsize)
156 | 
157 | 
158 | def decompress(bytes, dtype):
159 |     if dtype == 'O':
160 |         return decompress_text(bytes)
161 |     else:
162 |         return decompress_bytes(bytes)
163 | 


--------------------------------------------------------------------------------
/partd/pandas.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | import pickle
  3 | 
  4 | import pandas as pd
  5 | from packaging.version import Version
  6 | 
  7 | PANDAS_GE_210 = Version(pd.__version__).release >= (2, 1, 0)
  8 | PANDAS_GE_300 =  Version(pd.__version__).major >= 3
  9 | 
 10 | if PANDAS_GE_300:
 11 |     from pandas.api.internals import create_dataframe_from_blocks
 12 |     create_block_manager_from_blocks = None
 13 |     make_block = None
 14 | else:
 15 |     create_dataframe_from_blocks = None
 16 |     try:
 17 |         from pandas.core.internals.managers import create_block_manager_from_blocks
 18 |     except ImportError:
 19 |         from pandas.core.internals import create_block_manager_from_blocks
 20 | 
 21 |     from pandas.core.internals import make_block
 22 | 
 23 | from . import numpy as pnp
 24 | from .core import Interface
 25 | from .encode import Encode
 26 | from .utils import extend, framesplit, frame
 27 | from pandas.api.types import is_extension_array_dtype
 28 | from pandas.api.extensions import ExtensionArray
 29 | 
 30 | def is_extension_array(x):
 31 |     return isinstance(x, ExtensionArray)
 32 | 
 33 | 
 34 | dumps = partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL)
 35 | 
 36 | 
 37 | 
 38 | class PandasColumns(Interface):
 39 |     def __init__(self, partd=None):
 40 |         self.partd = pnp.Numpy(partd)
 41 |         Interface.__init__(self)
 42 | 
 43 |     def append(self, data, **kwargs):
 44 |         for k, df in data.items():
 45 |             self.iset(extend(k, '.columns'), dumps(list(df.columns)))
 46 |             self.iset(extend(k, '.index-name'), dumps(df.index.name))
 47 | 
 48 |         # TODO: don't use values, it does some work.  Look at _blocks instead
 49 |         #       pframe/cframe do this well
 50 |         arrays = {extend(k, col): df[col].values
 51 |                        for k, df in data.items()
 52 |                        for col in df.columns}
 53 |         arrays.update({extend(k, '.index'): df.index.values
 54 |                             for k, df in data.items()})
 55 |         # TODO: handle categoricals
 56 |         self.partd.append(arrays, **kwargs)
 57 | 
 58 |     def _get(self, keys, columns=None, **kwargs):
 59 |         if columns is None:
 60 |             columns = self.partd.partd.get([extend(k, '.columns') for k in keys],
 61 |                                            **kwargs)
 62 |             columns = list(map(pickle.loads, columns))
 63 |         else:
 64 |             columns = [columns] * len(keys)
 65 |         index_names = self.partd.partd.get([extend(k, '.index-name')
 66 |                                             for k in keys], **kwargs)
 67 |         index_names = map(pickle.loads, index_names)
 68 | 
 69 |         keys = [[extend(k, '.index'), [extend(k, col) for col in cols]]
 70 |                  for k, cols in zip(keys, columns)]
 71 | 
 72 |         arrays = self.partd.get(keys, **kwargs)
 73 | 
 74 |         return [pd.DataFrame(dict(zip(cols, arrs)), columns=cols,
 75 |                              index=pd.Index(index, name=iname))
 76 |             for iname, (index, arrs), cols in zip(index_names, arrays, columns)]
 77 | 
 78 |     def __getstate__(self):
 79 |         return {'partd': self.partd}
 80 | 
 81 |     def _iset(self, key, value):
 82 |         return self.partd._iset(key, value)
 83 | 
 84 |     def drop(self):
 85 |         return self.partd.drop()
 86 | 
 87 |     @property
 88 |     def lock(self):
 89 |         return self.partd.partd.lock
 90 | 
 91 |     def __exit__(self, *args):
 92 |         self.drop()
 93 |         self.partd.__exit__(self, *args)
 94 | 
 95 |     def __del__(self):
 96 |         self.partd.__del__()
 97 | 
 98 | 
 99 | def index_to_header_bytes(ind):
100 |     # These have special `__reduce__` methods, just use pickle
101 |     if isinstance(ind, (pd.DatetimeIndex,
102 |                         pd.MultiIndex,
103 |                         pd.RangeIndex)):
104 |         return None, dumps(ind)
105 | 
106 |     if isinstance(ind, pd.CategoricalIndex):
107 |         cat = (ind.ordered, ind.categories)
108 |         values = ind.codes
109 |     else:
110 |         cat = None
111 |         values = ind.values
112 | 
113 |     if is_extension_array_dtype(ind):
114 |         return None, dumps(ind)
115 | 
116 |     header = (type(ind), {k: getattr(ind, k, None) for k in ind._attributes}, values.dtype, cat)
117 |     bytes = pnp.compress(pnp.serialize(values), values.dtype)
118 |     return header, bytes
119 | 
120 | 
121 | def index_from_header_bytes(header, bytes):
122 |     if header is None:
123 |         return pickle.loads(bytes)
124 | 
125 |     typ, attr, dtype, cat = header
126 |     data = pnp.deserialize(pnp.decompress(bytes, dtype), dtype, copy=True)
127 |     if cat:
128 |         data = pd.Categorical.from_codes(data, cat[1], ordered=cat[0])
129 |     return typ.__new__(typ, data=data, **attr)
130 | 
131 | 
132 | def block_to_header_bytes(block):
133 |     values = block.values
134 |     if isinstance(values, pd.Categorical):
135 |         extension = ('categorical_type', (values.ordered, values.categories))
136 |         values = values.codes
137 |     elif isinstance(block, pd.DatetimeTZDtype):
138 |         extension = ('datetime64_tz_type', (block.values.tzinfo,))
139 |         values = values.view('i8')
140 |     elif is_extension_array_dtype(block.dtype) or is_extension_array(values):
141 |         extension = ("other", ())
142 |     else:
143 |         extension = ('numpy_type', ())
144 | 
145 |     header = (block.mgr_locs.as_array, values.dtype, values.shape, extension)
146 |     if extension == ("other", ()):
147 |         bytes = pickle.dumps(values)
148 |     else:
149 |         bytes = pnp.compress(pnp.serialize(values), values.dtype)
150 |     return header, bytes
151 | 
152 | 
153 | def block_from_header_bytes(header, bytes, create_block: bool):
154 |     placement, dtype, shape, (extension_type, extension_values) = header
155 | 
156 |     if extension_type == "other":
157 |         values = pickle.loads(bytes)
158 |     else:
159 |         values = pnp.deserialize(pnp.decompress(bytes, dtype), dtype,
160 |                                  copy=True).reshape(shape)
161 |     if extension_type == 'categorical_type':
162 |         values = pd.Categorical.from_codes(values,
163 |                                            extension_values[1],
164 |                                            ordered=extension_values[0])
165 |     elif extension_type == 'datetime64_tz_type':
166 |         tz_info = extension_values[0]
167 |         values = pd.DatetimeIndex(values).tz_localize('utc').tz_convert(
168 |             tz_info)
169 |     if create_block:
170 |         return make_block(values, placement=placement)
171 |     return values, placement
172 | 
173 | 
174 | def serialize(df):
175 |     """ Serialize and compress a Pandas DataFrame
176 | 
177 |     Uses Pandas blocks, snappy, and blosc to deconstruct an array into bytes
178 |     """
179 |     col_header, col_bytes = index_to_header_bytes(df.columns)
180 |     ind_header, ind_bytes = index_to_header_bytes(df.index)
181 |     headers = [col_header, ind_header]
182 |     bytes = [col_bytes, ind_bytes]
183 | 
184 |     for block in df._mgr.blocks:
185 |         h, b = block_to_header_bytes(block)
186 |         headers.append(h)
187 |         bytes.append(b)
188 | 
189 |     frames = [dumps(headers)] + bytes
190 |     return b''.join(map(frame, frames))
191 | 
192 | 
193 | def deserialize(bytes):
194 |     """ Deserialize and decompress bytes back to a pandas DataFrame """
195 |     frames = list(framesplit(bytes))
196 |     headers = pickle.loads(frames[0])
197 |     bytes = frames[1:]
198 |     axes = [index_from_header_bytes(headers[0], bytes[0]),
199 |             index_from_header_bytes(headers[1], bytes[1])]
200 |     blocks = [block_from_header_bytes(h, b, create_block=not PANDAS_GE_300)
201 |               for (h, b) in zip(headers[2:], bytes[2:])]
202 |     if PANDAS_GE_300:
203 |         return pd.api.internals.create_dataframe_from_blocks(blocks, axes[1], axes[0])
204 |     elif PANDAS_GE_210:
205 |         return pd.DataFrame._from_mgr(create_block_manager_from_blocks(blocks, axes), axes=axes)
206 |     else:
207 |         return pd.DataFrame(create_block_manager_from_blocks(blocks, axes))
208 | 
209 | 
210 | def join(dfs):
211 |     if not dfs:
212 |         return pd.DataFrame()
213 |     else:
214 |         result = pd.concat(dfs)
215 |         dtypes = {
216 |             col: "category"
217 |             for col in result.columns
218 |             if (
219 |                 isinstance(dfs[0][col].dtype, pd.CategoricalDtype)
220 |                 and not isinstance(result[col].dtype, pd.CategoricalDtype)
221 |             )
222 |         }
223 |         if dtypes:
224 |             result = result.astype(dtypes)
225 |         return result
226 | 
227 | PandasBlocks = partial(Encode, serialize, deserialize, join)
228 | 


--------------------------------------------------------------------------------
/partd/pickle.py:
--------------------------------------------------------------------------------
 1 | """
 2 | get/put functions that consume/produce Python lists using Pickle to serialize
 3 | """
 4 | import pickle
 5 | 
 6 | from .encode import Encode
 7 | from functools import partial
 8 | 
 9 | def concat(lists):
10 |     return sum(lists, [])
11 | 
12 | Pickle = partial(Encode,
13 |                  partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL),
14 |                  pickle.loads,
15 |                  concat)
16 | 


--------------------------------------------------------------------------------
/partd/python.py:
--------------------------------------------------------------------------------
 1 | """
 2 | get/put functions that consume/produce Python lists using msgpack or pickle
 3 | to serialize.
 4 | 
 5 | First we try msgpack (it's faster).  If that fails then we default to pickle.
 6 | """
 7 | import pickle
 8 | 
 9 | try:
10 |     from pandas import msgpack
11 | except ImportError:
12 |     try:
13 |         import msgpack
14 |     except ImportError:
15 |         msgpack = False
16 | 
17 | 
18 | from .encode import Encode
19 | from functools import partial
20 | 
21 | 
22 | def dumps(x):
23 |     try:
24 |         return msgpack.packb(x, use_bin_type=True)
25 |     except:
26 |         return pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
27 | 
28 | def loads(x):
29 |     try:
30 |         if msgpack.version >= (0, 5, 2):
31 |             unpack_kwargs = {'raw': False}
32 |         else:
33 |             unpack_kwargs = {'encoding': 'utf-8'}
34 |         return msgpack.unpackb(x, **unpack_kwargs)
35 |     except:
36 |         return pickle.loads(x)
37 | 
38 | 
39 | def concat(lists):
40 |     return sum(lists, [])
41 | 
42 | 
43 | Python = partial(Encode, dumps, loads, concat)
44 | 


--------------------------------------------------------------------------------
/partd/tests/test_buffer.py:
--------------------------------------------------------------------------------
 1 | from partd.dict import Dict
 2 | from partd.file import File
 3 | from partd.buffer import Buffer, keys_to_flush
 4 | import pickle
 5 | 
 6 | import shutil
 7 | import os
 8 | 
 9 | 
10 | def test_partd():
11 |     a = Dict()
12 |     b = Dict()
13 |     with Buffer(a, b, available_memory=10) as p:
14 |         p.append({'x': b'Hello', 'y': b'abc'})
15 |         assert a.get(['x', 'y']) == [b'Hello', b'abc']
16 | 
17 |         p.append({'x': b'World!', 'y': b'def'})
18 |         assert a.get(['x', 'y']) == [b'', b'abcdef']
19 |         assert b.get(['x', 'y']) == [b'HelloWorld!', b'']
20 | 
21 |         result = p.get(['y', 'x'])
22 |         assert result == [b'abcdef', b'HelloWorld!']
23 | 
24 |         assert p.get('z') == b''
25 | 
26 |         with p.lock:  # uh oh, possible deadlock
27 |             result = p.get(['x'], lock=False)
28 | 
29 | 
30 | def test_keys_to_flush():
31 |     lengths = {'a': 20, 'b': 10, 'c': 15, 'd': 15, 'e': 10, 'f': 25, 'g': 5}
32 |     assert keys_to_flush(lengths, 0.5) == ['f', 'a']
33 | 
34 | 
35 | def test_pickle():
36 |     with Dict() as a:
37 |         with File() as b:
38 |             c = Buffer(a, b)
39 | 
40 |             c.append({'x': b'123'})
41 | 
42 |             d = pickle.loads(pickle.dumps(c))
43 | 
44 |             assert d.get('x') == c.get('x')
45 | 
46 |             pickled_attrs = ('memory_usage', 'lengths', 'available_memory')
47 |             for attr in pickled_attrs:
48 |                 assert hasattr(d, attr)
49 |                 assert getattr(d, attr) == getattr(c, attr)
50 |             # special case Dict and File -- some attrs do not pickle
51 |             assert hasattr(d, 'fast')
52 |             assert d.fast.data == c.fast.data
53 |             assert hasattr(d, 'slow')
54 |             assert d.slow.path == c.slow.path
55 | 


--------------------------------------------------------------------------------
/partd/tests/test_compressed.py:
--------------------------------------------------------------------------------
 1 | from partd.compressed import ZLib
 2 | 
 3 | 
 4 | import shutil
 5 | import os
 6 | import pickle
 7 | 
 8 | 
 9 | def test_partd():
10 |     with ZLib() as p:
11 |         p.append({'x': b'Hello', 'y': b'abc'})
12 |         p.append({'x': b'World!', 'y': b'def'})
13 |         assert os.path.exists(p.partd.filename('x'))
14 |         assert os.path.exists(p.partd.filename('y'))
15 | 
16 |         result = p.get(['y', 'x'])
17 |         assert result == [b'abcdef', b'HelloWorld!']
18 | 
19 |         assert p.get('z') == b''
20 | 
21 |         with p.lock:  # uh oh, possible deadlock
22 |             result = p.get(['x'], lock=False)
23 | 
24 |     assert not os.path.exists(p.partd.path)
25 | 
26 | 
27 | def test_pickle():
28 |     with ZLib() as p:
29 |         p.append({'x': b'123'})
30 |         q = pickle.loads(pickle.dumps(p))
31 |         assert q.get('x') == b'123'
32 | 


--------------------------------------------------------------------------------
/partd/tests/test_dict.py:
--------------------------------------------------------------------------------
 1 | from partd.dict import Dict
 2 | 
 3 | import shutil
 4 | import os
 5 | 
 6 | 
 7 | def test_partd():
 8 |     with Dict() as p:
 9 |         p.append({'x': b'Hello', 'y': b'abc'})
10 |         p.append({'x': b'World!', 'y': b'def'})
11 | 
12 |         result = p.get(['y', 'x'])
13 |         assert result == [b'abcdef', b'HelloWorld!']
14 | 
15 |         assert p.get('z') == b''
16 | 
17 |         with p.lock:  # uh oh, possible deadlock
18 |             result = p.get(['x'], lock=False)
19 | 
20 | 
21 | def test_key_tuple():
22 |     with Dict() as p:
23 |         p.append({('a', 'b'): b'123'})
24 |         assert p.get(('a', 'b')) == b'123'
25 | 
26 | 
27 | def test_iset():
28 |     with Dict() as p:
29 |         p.iset('x', b'123')
30 |         assert 'x' in p._iset_seen
31 |         assert 'y' not in p._iset_seen
32 |         p.iset('x', b'123')
33 |         p.iset('x', b'123')
34 |         assert p.get('x') == b'123'
35 | 
36 | 
37 | def test_delete_non_existent_key():
38 |     with Dict() as p:
39 |         p.append({'x': b'123'})
40 |         p.delete(['x', 'y'])
41 |         assert p.get(['x', 'y']) == [b'', b'']
42 | 


--------------------------------------------------------------------------------
/partd/tests/test_encode.py:
--------------------------------------------------------------------------------
 1 | from partd.file import File
 2 | from partd.encode import Encode
 3 | 
 4 | import zlib
 5 | import shutil
 6 | import os
 7 | 
 8 | 
 9 | def test_partd():
10 |     with Encode(zlib.compress, zlib.decompress, b''.join) as p:
11 |         p.append({'x': b'Hello', 'y': b'abc'})
12 |         p.append({'x': b'World!', 'y': b'def'})
13 | 
14 |         result = p.get(['y', 'x'])
15 |         assert result == [b'abcdef', b'HelloWorld!']
16 | 
17 |         assert p.get('z') == b''
18 | 
19 |         with p.lock:  # uh oh, possible deadlock
20 |             result = p.get(['x'], lock=False)
21 | 
22 | 
23 | def test_ensure():
24 |     with Encode(zlib.compress, zlib.decompress, b''.join) as p:
25 |         p.iset('x', b'123')
26 |         p.iset('x', b'123')
27 |         p.iset('x', b'123')
28 |         assert p.get('x') == b'123'
29 | 


--------------------------------------------------------------------------------
/partd/tests/test_file.py:
--------------------------------------------------------------------------------
 1 | from partd.file import File
 2 | 
 3 | import shutil
 4 | import os
 5 | 
 6 | 
 7 | def test_partd():
 8 |     with File() as p:
 9 |         p.append({'x': b'Hello', 'y': b'abc'})
10 |         p.append({'x': b'World!', 'y': b'def'})
11 |         assert os.path.exists(p.filename('x'))
12 |         assert os.path.exists(p.filename('y'))
13 | 
14 |         result = p.get(['y', 'x'])
15 |         assert result == [b'abcdef', b'HelloWorld!']
16 | 
17 |         assert p.get('z') == b''
18 | 
19 |         with p.lock:  # uh oh, possible deadlock
20 |             result = p.get(['x'], lock=False)
21 | 
22 |     assert not os.path.exists(p.path)
23 | 
24 | 
25 | def test_key_tuple():
26 |     with File() as p:
27 |         p.append({('a', 'b'): b'123'})
28 |         assert os.path.exists(p.filename(('a', 'b')))
29 | 
30 | 
31 | def test_iset():
32 |     with File() as p:
33 |         p.iset('x', b'123')
34 |         assert 'x' in p._iset_seen
35 |         assert 'y' not in p._iset_seen
36 |         p.iset('x', b'123')
37 |         p.iset('x', b'123')
38 |         assert p.get('x') == b'123'
39 | 
40 | 
41 | def test_nested_get():
42 |     with File() as p:
43 |         p.append({'x': b'1', 'y': b'2', 'z': b'3'})
44 |         assert p.get(['x', ['y', 'z']]) == [b'1', [b'2', b'3']]
45 | 
46 | 
47 | def test_drop():
48 |     with File() as p:
49 |         p.append({'x': b'123'})
50 |         p.iset('y', b'abc')
51 |         assert p.get('x') == b'123'
52 |         assert p.get('y') == b'abc'
53 | 
54 |         p.drop()
55 |         assert p.get('x') == b''
56 |         assert p.get('y') == b''
57 | 
58 |         p.append({'x': b'123'})
59 |         p.iset('y', b'def')
60 |         assert p.get('x') == b'123'
61 |         assert p.get('y') == b'def'
62 | 
63 | 
64 | def test_del():
65 |     f = File()
66 | 
67 |     assert f.path
68 |     assert os.path.exists(f.path)
69 | 
70 |     f.__del__()
71 |     assert not os.path.exists(f.path)
72 | 
73 |     with File('Foo') as p:
74 |         p.__del__()
75 |         assert os.path.exists(p.path)
76 | 
77 | 
78 | def test_specify_dirname():
79 |     with File(dir=os.getcwd()) as f:
80 |         assert os.getcwd() in f.path
81 | 


--------------------------------------------------------------------------------
/partd/tests/test_numpy.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | np = pytest.importorskip('numpy')  # noqa
 3 | 
 4 | import pickle
 5 | 
 6 | import partd
 7 | from partd.numpy import Numpy
 8 | 
 9 | 
10 | def test_numpy():
11 |     dt = np.dtype([('a', 'i4'), ('b', 'i2'), ('c', 'f8')])
12 |     with Numpy() as p:
13 |         p.append({'a': np.array([10, 20, 30], dtype=dt['a']),
14 |                   'b': np.array([ 1,  2,  3], dtype=dt['b']),
15 |                   'c': np.array([.1, .2, .3], dtype=dt['c'])})
16 |         p.append({'a': np.array([70, 80, 90], dtype=dt['a']),
17 |                   'b': np.array([ 7,  8,  9], dtype=dt['b']),
18 |                   'c': np.array([.7, .8, .9], dtype=dt['c'])})
19 | 
20 |         result = p.get(['a', 'c'])
21 |         assert (result[0] == np.array([10, 20, 30, 70, 80, 90],dtype=dt['a'])).all()
22 |         assert (result[1] == np.array([.1, .2, .3, .7, .8, .9],dtype=dt['c'])).all()
23 | 
24 |         with p.lock:  # uh oh, possible deadlock
25 |             result = p.get(['a'], lock=False)
26 | 
27 | 
28 | def test_nested():
29 |     with Numpy() as p:
30 |         p.append({'x': np.array([1, 2, 3]),
31 |                  ('y', 1): np.array([4, 5, 6]),
32 |                  ('z', 'a', 3): np.array([.1, .2, .3])})
33 |         assert (p.get(('z', 'a', 3)) == np.array([.1, .2, .3])).all()
34 | 
35 | 
36 | def test_serialization():
37 |     with Numpy() as p:
38 |         p.append({'x': np.array([1, 2, 3])})
39 |         q = pickle.loads(pickle.dumps(p))
40 |         assert (q.get('x') == [1, 2, 3]).all()
41 | 
42 | 
43 | array_of_lists = np.empty(3, dtype='O')
44 | array_of_lists[:] = [[1, 2], [3, 4], [5, 6]]
45 | 
46 | 
47 | @pytest.mark.parametrize('x', [np.array(['Alice', 'Bob', 'Charlie'], dtype='O'),
48 |                                array_of_lists])
49 | def test_object_dtype(x):
50 |     with Numpy() as p:
51 |         p.append({'x': x})
52 |         p.append({'x': x})
53 |         assert isinstance(p.get('x'), np.ndarray)
54 |         assert (p.get('x') == np.concatenate([x, x])).all()
55 | 
56 | 
57 | def test_datetime_types():
58 |     x = np.array(['2014-01-01T12:00:00'], dtype='M8[us]')
59 |     y = np.array(['2014-01-01T12:00:00'], dtype='M8[s]')
60 |     with Numpy() as p:
61 |         p.append({'x': x, 'y': y})
62 |         assert p.get('x').dtype == x.dtype
63 |         assert p.get('y').dtype == y.dtype
64 | 
65 | 
66 | def test_non_utf8_bytes():
67 |     a = np.array([b'\xc3\x28', b'\xa0\xa1', b'\xe2\x28\xa1', b'\xe2\x82\x28',
68 |                   b'\xf0\x28\x8c\xbc'], dtype='O')
69 |     s = partd.numpy.serialize(a)
70 |     assert (partd.numpy.deserialize(s, 'O') == a).all()
71 | 


--------------------------------------------------------------------------------
/partd/tests/test_pandas.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | pytest.importorskip('pandas')  # noqa
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | import pandas.testing as tm
  7 | import os
  8 | 
  9 | try:
 10 |     import pyarrow as pa
 11 | except ImportError:
 12 |     pa = None
 13 | 
 14 | from partd.pandas import PandasColumns, PandasBlocks, serialize, deserialize
 15 | 
 16 | 
 17 | df1 = pd.DataFrame({'a': [1, 2, 3],
 18 |                     'b': [1., 2., 3.],
 19 |                     'c': ['x', 'y', 'x']}, columns=['a', 'b', 'c'],
 20 |                     index=pd.Index([1, 2, 3], name='myindex'))
 21 | 
 22 | df2 = pd.DataFrame({'a': [10, 20, 30],
 23 |                     'b': [10., 20., 30.],
 24 |                     'c': ['X', 'Y', 'X']}, columns=['a', 'b', 'c'],
 25 |                     index=pd.Index([10, 20, 30], name='myindex'))
 26 | 
 27 | 
 28 | def test_PandasColumns():
 29 |     with PandasColumns() as p:
 30 |         assert os.path.exists(p.partd.partd.path)
 31 | 
 32 |         p.append({'x': df1, 'y': df2})
 33 |         p.append({'x': df2, 'y': df1})
 34 |         assert os.path.exists(p.partd.partd.filename('x'))
 35 |         assert os.path.exists(p.partd.partd.filename(('x', 'a')))
 36 |         assert os.path.exists(p.partd.partd.filename(('x', '.index')))
 37 |         assert os.path.exists(p.partd.partd.filename('y'))
 38 | 
 39 |         result = p.get(['y', 'x'])
 40 |         tm.assert_frame_equal(result[0], pd.concat([df2, df1]))
 41 |         tm.assert_frame_equal(result[1], pd.concat([df1, df2]))
 42 | 
 43 |         with p.lock:  # uh oh, possible deadlock
 44 |             result = p.get(['x'], lock=False)
 45 | 
 46 |     assert not os.path.exists(p.partd.partd.path)
 47 | 
 48 | 
 49 | def test_column_selection():
 50 |     with PandasColumns('foo') as p:
 51 |         p.append({'x': df1, 'y': df2})
 52 |         p.append({'x': df2, 'y': df1})
 53 |         result = p.get('x', columns=['c', 'b'])
 54 |         tm.assert_frame_equal(result, pd.concat([df1, df2])[['c', 'b']])
 55 | 
 56 | 
 57 | def test_PandasBlocks():
 58 |     with PandasBlocks() as p:
 59 |         assert os.path.exists(p.partd.path)
 60 | 
 61 |         p.append({'x': df1, 'y': df2})
 62 |         p.append({'x': df2, 'y': df1})
 63 |         assert os.path.exists(p.partd.filename('x'))
 64 |         assert os.path.exists(p.partd.filename('y'))
 65 | 
 66 |         result = p.get(['y', 'x'])
 67 |         tm.assert_frame_equal(result[0], pd.concat([df2, df1]))
 68 |         tm.assert_frame_equal(result[1], pd.concat([df1, df2]))
 69 | 
 70 |         with p.lock:  # uh oh, possible deadlock
 71 |             result = p.get(['x'], lock=False)
 72 | 
 73 |     assert not os.path.exists(p.partd.path)
 74 | 
 75 | 
 76 | @pytest.mark.parametrize('ordered', [False, True])
 77 | def test_serialize_categoricals(ordered):
 78 |     frame = pd.DataFrame({'x': [1, 2, 3, 4],
 79 |                           'y': pd.Categorical(['c', 'a', 'b', 'a'],
 80 |                                               ordered=ordered)},
 81 |                           index=pd.Categorical(['x', 'y', 'z', 'x'],
 82 |                                                 ordered=ordered))
 83 |     frame.index.name = 'foo'
 84 |     frame.columns.name = 'bar'
 85 | 
 86 |     for ind, df in [(0, frame), (1, frame.T)]:
 87 |         df2 = deserialize(serialize(df))
 88 |         tm.assert_frame_equal(df, df2)
 89 | 
 90 | 
 91 | def test_serialize_multi_index():
 92 |     df = pd.DataFrame({'x': ['a', 'b', 'c', 'a', 'b', 'c'],
 93 |                        'y': [1, 2, 3, 4, 5, 6],
 94 |                        'z': [7., 8, 9, 10, 11, 12]})
 95 |     df = df.groupby([df.x, df.y]).sum()
 96 |     df.index.name = 'foo'
 97 |     df.columns.name = 'bar'
 98 | 
 99 |     df2 = deserialize(serialize(df))
100 |     tm.assert_frame_equal(df, df2)
101 | 
102 | 
103 | @pytest.mark.parametrize('base', [
104 |     pd.Timestamp('1987-03-3T01:01:01+0001'),
105 |     pd.Timestamp('1987-03-03 01:01:01-0600', tz='US/Central'),
106 | ])
107 | def test_serialize(base):
108 |     df = pd.DataFrame({'x': [
109 |         base + pd.Timedelta(seconds=i)
110 |         for i in np.random.randint(0, 1000, size=10)],
111 |                        'y': list(range(10)),
112 |                        'z': pd.date_range('2017', periods=10)})
113 |     df2 = deserialize(serialize(df))
114 |     tm.assert_frame_equal(df, df2)
115 | 
116 | 
117 | def test_other_extension_types():
118 |     pytest.importorskip("pandas", minversion="0.25.0")
119 |     a = pd.array([pd.Period("2000"), pd.Period("2001")])
120 |     df = pd.DataFrame({"A": a})
121 |     df2 = deserialize(serialize(df))
122 |     tm.assert_frame_equal(df, df2)
123 | 
124 | @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Float64", "Float32"])
125 | def test_index_numeric_extension_types(dtype):
126 |     pytest.importorskip("pandas", minversion="1.4.0")
127 | 
128 |     df = pd.DataFrame({"x": [1, 2, 3]}, index=[4, 5, 6])
129 |     df.index = df.index.astype(dtype)
130 |     df2 = deserialize(serialize(df))
131 |     tm.assert_frame_equal(df, df2)
132 |     
133 | @pytest.mark.parametrize(
134 |     "dtype",
135 |     [
136 |         "string[python]",
137 |         pytest.param(
138 |             "string[pyarrow]",
139 |             marks=pytest.mark.skipif(pa is None, reason="Requires pyarrow"),
140 |         ),
141 |     ],
142 | )
143 | def test_index_non_numeric_extension_types(dtype):
144 |     pytest.importorskip("pandas", minversion="1.4.0")
145 |     df = pd.DataFrame({"x": [1, 2, 3]}, index=["a", "b", "c"])
146 |     df.index = df.index.astype(dtype)
147 |     df2 = deserialize(serialize(df))
148 |     tm.assert_frame_equal(df, df2)
149 | 
150 | 
151 | def test_categorical_concat():
152 |     pytest.importorskip("pandas", minversion="2")
153 | 
154 |     df1 = pd.DataFrame({"a": ["x", "y"]}, dtype="category")
155 |     df2 = pd.DataFrame({"a": ["y", "z"]}, dtype="category")
156 | 
157 |     with PandasBlocks() as p:
158 |         p.append({'x': df1})
159 |         p.append({'x': df2})
160 | 
161 |         result = p.get(["x"])
162 |         pd.testing.assert_frame_equal(result[0], pd.concat([df1, df2]).astype("category"))
163 | 


--------------------------------------------------------------------------------
/partd/tests/test_partd.py:
--------------------------------------------------------------------------------
 1 | from partd import File
 2 | from partd.core import token, escape_filename, filename
 3 | from partd import core
 4 | import os
 5 | import shutil
 6 | from contextlib import contextmanager
 7 | 
 8 | 
 9 | def test_partd():
10 |     path = 'tmp.partd'
11 | 
12 |     with File(path) as p:
13 |         p.append({'x': b'Hello', 'y': b'abc'})
14 |         p.append({'x': b'World!', 'y': b'def'})
15 |         assert os.path.exists(p.filename('x'))
16 |         assert os.path.exists(p.filename('y'))
17 | 
18 |         result = p.get(['y', 'x'])
19 |         assert result == [b'abcdef', b'HelloWorld!']
20 | 
21 |         assert p.get('z') == b''
22 | 
23 |         with p.lock:  # uh oh, possible deadlock
24 |             result = p.get(['x'], lock=False)
25 | 
26 |     assert not os.path.exists(path)
27 | 
28 | 
29 | def test_key_tuple():
30 |     with File('foo') as p:
31 |         p.append({('a', 'b'): b'123'})
32 |         assert os.path.exists(os.path.join(p.path, 'a', 'b'))
33 | 
34 | 
35 | def test_ensure():
36 |     with File('foo') as p:
37 |         p.iset('x', b'123')
38 |         p.iset('x', b'123')
39 |         p.iset('x', b'123')
40 | 
41 |         assert p.get('x') == b'123'
42 | 
43 | 
44 | def test_filenames():
45 |     assert token('hello') == 'hello'
46 |     assert token(('hello', 'world')) == os.path.join('hello', 'world')
47 |     assert escape_filename(os.path.join('a', 'b')) == os.path.join('a', 'b')
48 |     assert filename('dir', ('a', 'b')) == os.path.join('dir', 'a', 'b')
49 | 


--------------------------------------------------------------------------------
/partd/tests/test_pickle.py:
--------------------------------------------------------------------------------
 1 | from partd.pickle import Pickle
 2 | 
 3 | 
 4 | import os
 5 | import shutil
 6 | 
 7 | def test_pickle():
 8 |     with Pickle() as p:
 9 |         p.append({'x': ['Hello', 'World!'], 'y': [1, 2, 3]})
10 |         p.append({'x': ['Alice', 'Bob!'], 'y': [4, 5, 6]})
11 |         assert os.path.exists(p.partd.filename('x'))
12 |         assert os.path.exists(p.partd.filename('y'))
13 | 
14 |         result = p.get(['y', 'x'])
15 |         assert result == [[1, 2, 3, 4, 5, 6],
16 |                           ['Hello', 'World!', 'Alice', 'Bob!']]
17 | 
18 |         with p.lock:  # uh oh, possible deadlock
19 |             result = p.get(['x'], lock=False)
20 | 
21 |     assert not os.path.exists(p.partd.path)
22 | 
23 | 
24 | def test_ensure():
25 |     with Pickle() as p:
26 |         p.iset('x', [1, 2, 3])
27 |         p.iset('x', [1, 2, 3])
28 | 
29 |         assert p.get('x') == [1, 2, 3]
30 | 


--------------------------------------------------------------------------------
/partd/tests/test_python.py:
--------------------------------------------------------------------------------
 1 | from partd.python import dumps, loads
 2 | 
 3 | 
 4 | import os
 5 | import shutil
 6 | from math import sin
 7 | 
 8 | 
 9 | def test_pack_unpack():
10 |     data = [1, 2, b'Hello', 'Hello']
11 |     assert loads(dumps(data)) == data
12 | 
13 |     data = [1, 2, sin]
14 |     assert loads(dumps(data)) == data
15 | 


--------------------------------------------------------------------------------
/partd/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | from partd.utils import frame, framesplit
 2 | import struct
 3 | 
 4 | 
 5 | def test_frame():
 6 |     assert frame(b'Hello') == struct.pack('Q', 5) + b'Hello'
 7 | 
 8 | 
 9 | def test_framesplit():
10 |     L = [b'Hello', b'World!', b'123']
11 |     assert list(framesplit(b''.join(map(frame, L)))) == L
12 | 


--------------------------------------------------------------------------------
/partd/tests/test_zmq.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | pytest.importorskip('zmq')
  3 | 
  4 | from partd.zmq import Server, keys_to_flush, File, Client
  5 | from partd import core, Dict
  6 | from threading import Thread
  7 | from time import sleep
  8 | from contextlib import contextmanager
  9 | import pickle
 10 | 
 11 | import os
 12 | import shutil
 13 | 
 14 | 
 15 | def test_server():
 16 |     s = Server()
 17 |     try:
 18 |         s.start()
 19 |         s.append({'x': b'abc', 'y': b'1234'})
 20 |         s.append({'x': b'def', 'y': b'5678'})
 21 | 
 22 |         assert s.get(['x']) == [b'abcdef']
 23 |         assert s.get(['x', 'y']) == [b'abcdef', b'12345678']
 24 | 
 25 |         assert s.get(['x']) == [b'abcdef']
 26 |     finally:
 27 |         s.close()
 28 | 
 29 | 
 30 | def dont_test_flow_control():
 31 |     path = 'bar'
 32 |     if os.path.exists('bar'):
 33 |         shutil.rmtree('bar')
 34 |     s = Server('bar', available_memory=1, n_outstanding_writes=3, start=False)
 35 |     p = Client(s.address)
 36 |     try:
 37 |         listen_thread = Thread(target=s.listen)
 38 |         listen_thread.start()
 39 |         """ Don't start these threads
 40 |         self._write_to_disk_thread = Thread(target=self._write_to_disk)
 41 |         self._write_to_disk_thread.start()
 42 |         self._free_frozen_sockets_thread = Thread(target=self._free_frozen_sockets)
 43 |         self._free_frozen_sockets_thread.start()
 44 |         """
 45 |         p.append({'x': b'12345'})
 46 |         sleep(0.1)
 47 |         assert s._out_disk_buffer.qsize() == 1
 48 |         p.append({'x': b'12345'})
 49 |         p.append({'x': b'12345'})
 50 |         sleep(0.1)
 51 |         assert s._out_disk_buffer.qsize() == 3
 52 | 
 53 |         held_append = Thread(target=p.append, args=({'x': b'123'},))
 54 |         held_append.start()
 55 | 
 56 |         sleep(0.1)
 57 |         assert held_append.is_alive()  # held!
 58 | 
 59 |         assert not s._frozen_sockets.empty()
 60 | 
 61 |         write_to_disk_thread = Thread(target=s._write_to_disk)
 62 |         write_to_disk_thread.start()
 63 |         free_frozen_sockets_thread = Thread(target=s._free_frozen_sockets)
 64 |         free_frozen_sockets_thread.start()
 65 | 
 66 |         sleep(0.2)
 67 |         assert not held_append.is_alive()
 68 |         assert s._frozen_sockets.empty()
 69 |     finally:
 70 |         s.close()
 71 | 
 72 | 
 73 | @contextmanager
 74 | def partd_server(**kwargs):
 75 |     with Server(**kwargs) as server:
 76 |         with Client(server.address) as p:
 77 |             yield (p, server)
 78 | 
 79 | 
 80 | def test_partd_object():
 81 |     with partd_server() as (p, server):
 82 |         p.append({'x': b'Hello', 'y': b'abc'})
 83 |         p.append({'x': b'World!', 'y': b'def'})
 84 | 
 85 |         result = p.get(['y', 'x'])
 86 |         assert result == [b'abcdef', b'HelloWorld!']
 87 | 
 88 | 
 89 | def test_delete():
 90 |     with partd_server() as (p, server):
 91 |         p.append({'x': b'Hello'})
 92 |         assert p.get('x') == b'Hello'
 93 |         p.delete(['x'])
 94 |         assert p.get('x') == b''
 95 | 
 96 | 
 97 | def test_iset():
 98 |     with partd_server() as (p, server):
 99 |         p.iset('x', b'111')
100 |         p.iset('x', b'111')
101 |         assert p.get('x') == b'111'
102 | 
103 | 
104 | def test_tuple_keys():
105 |     with partd_server() as (p, server):
106 |         p.append({('x', 'y'): b'123'})
107 |         assert p.get(('x', 'y')) == b'123'
108 | 
109 | 
110 | def test_serialization():
111 |     with partd_server() as (p, server):
112 |         p.append({'x': b'123'})
113 |         q = pickle.loads(pickle.dumps(p))
114 |         assert q.get('x') == b'123'
115 | 
116 | 
117 | def test_drop():
118 |     with partd_server() as (p, server):
119 |         p.append({'x': b'123'})
120 |         p.drop()
121 |         assert p.get('x') == b''
122 | 
123 | 
124 | def dont_test_server_autocreation():
125 |     with Client() as p:
126 |         p.append({'x': b'123'})
127 |         assert p.get('x') == b'123'
128 | 


--------------------------------------------------------------------------------
/partd/utils.py:
--------------------------------------------------------------------------------
  1 | from contextlib import contextmanager
  2 | import os
  3 | import shutil
  4 | import tempfile
  5 | import struct
  6 | 
  7 | 
  8 | def raises(exc, lamda):
  9 |     try:
 10 |         lamda()
 11 |         return False
 12 |     except exc:
 13 |         return True
 14 | 
 15 | 
 16 | @contextmanager
 17 | def tmpfile(extension=''):
 18 |     extension = '.' + extension.lstrip('.')
 19 |     handle, filename = tempfile.mkstemp(extension)
 20 |     os.close(handle)
 21 |     os.remove(filename)
 22 | 
 23 |     try:
 24 |         yield filename
 25 |     finally:
 26 |         if os.path.exists(filename):
 27 |             if os.path.isdir(filename):
 28 |                 shutil.rmtree(filename)
 29 |             else:
 30 |                 os.remove(filename)
 31 | 
 32 | 
 33 | def frame(bytes):
 34 |     """ Pack the length of the bytes in front of the bytes
 35 | 
 36 |     TODO: This does a full copy.  This should maybe be inlined somehow
 37 |     wherever this gets used instead.  My laptop shows a data bandwidth of
 38 |     2GB/s
 39 |     """
 40 |     return struct.pack('Q', len(bytes)) + bytes
 41 | 
 42 | 
 43 | def framesplit(bytes):
 44 |     """ Split buffer into frames of concatenated chunks
 45 | 
 46 |     >>> data = frame(b'Hello') + frame(b'World')
 47 |     >>> list(framesplit(data))  # doctest: +SKIP
 48 |     [b'Hello', b'World']
 49 |     """
 50 |     i = 0; n = len(bytes)
 51 |     chunks = list()
 52 |     while i < n:
 53 |         nbytes = struct.unpack('Q', bytes[i:i+8])[0]
 54 |         i += 8
 55 |         yield bytes[i: i + nbytes]
 56 |         i += nbytes
 57 | 
 58 | 
 59 | def partition_all(n, bytes):
 60 |     """ Partition bytes into evenly sized blocks
 61 | 
 62 |     The final block holds the remainder and so may not be of equal size
 63 | 
 64 |     >>> list(partition_all(2, b'Hello'))
 65 |     [b'He', b'll', b'o']
 66 | 
 67 |     See Also:
 68 |         toolz.partition_all
 69 |     """
 70 |     if len(bytes) < n:  # zero copy fast common case
 71 |         yield bytes
 72 |     else:
 73 |         for i in range(0, len(bytes), n):
 74 |             yield bytes[i: i+n]
 75 | 
 76 | 
 77 | def nested_get(ind, coll, lazy=False):
 78 |     """ Get nested index from collection
 79 | 
 80 |     Examples
 81 |     --------
 82 | 
 83 |     >>> nested_get(1, 'abc')
 84 |     'b'
 85 |     >>> nested_get([1, 0], 'abc')
 86 |     ['b', 'a']
 87 |     >>> nested_get([[1, 0], [0, 1]], 'abc')
 88 |     [['b', 'a'], ['a', 'b']]
 89 |     """
 90 |     if isinstance(ind, list):
 91 |         if lazy:
 92 |             return (nested_get(i, coll, lazy=lazy) for i in ind)
 93 |         else:
 94 |             return [nested_get(i, coll, lazy=lazy) for i in ind]
 95 |     else:
 96 |         return coll[ind]
 97 | 
 98 | 
 99 | def flatten(seq):
100 |     """
101 | 
102 |     >>> list(flatten([1]))
103 |     [1]
104 | 
105 |     >>> list(flatten([[1, 2], [1, 2]]))
106 |     [1, 2, 1, 2]
107 | 
108 |     >>> list(flatten([[[1], [2]], [[1], [2]]]))
109 |     [1, 2, 1, 2]
110 | 
111 |     >>> list(flatten(((1, 2), (1, 2)))) # Don't flatten tuples
112 |     [(1, 2), (1, 2)]
113 | 
114 |     >>> list(flatten((1, 2, [3, 4]))) # support heterogeneous
115 |     [1, 2, 3, 4]
116 |     """
117 |     for item in seq:
118 |         if isinstance(item, list):
119 |             yield from flatten(item)
120 |         else:
121 |             yield item
122 | 
123 | 
124 | def suffix(key, term):
125 |     """ suffix a key with a suffix
126 | 
127 |     Works if they key is a string or a tuple
128 | 
129 |     >>> suffix('x', '.dtype')
130 |     'x.dtype'
131 |     >>> suffix(('a', 'b', 'c'), '.dtype')
132 |     ('a', 'b', 'c.dtype')
133 |     """
134 |     if isinstance(key, str):
135 |         return key + term
136 |     elif isinstance(key, tuple):
137 |         return key[:-1] + (suffix(key[-1], term),)
138 |     else:
139 |         return suffix(str(key), term)
140 | 
141 | 
142 | def extend(key, term):
143 |     """ extend a key with a another element in a tuple
144 | 
145 |     Works if they key is a string or a tuple
146 | 
147 |     >>> extend('x', '.dtype')
148 |     ('x', '.dtype')
149 |     >>> extend(('a', 'b', 'c'), '.dtype')
150 |     ('a', 'b', 'c', '.dtype')
151 |     """
152 |     if isinstance(term, tuple):
153 |         pass
154 |     elif isinstance(term, str):
155 |         term = (term,)
156 |     else:
157 |         term = (str(term),)
158 | 
159 |     if not isinstance(key, tuple):
160 |         key = (key,)
161 | 
162 |     return key + term
163 | 


--------------------------------------------------------------------------------
/partd/zmq.py:
--------------------------------------------------------------------------------
  1 | import zmq
  2 | import logging
  3 | from itertools import chain
  4 | from bisect import bisect
  5 | import socket
  6 | from operator import add
  7 | from time import sleep, time
  8 | from toolz import accumulate, topk, pluck, merge, keymap
  9 | import uuid
 10 | from collections import defaultdict
 11 | from contextlib import contextmanager, suppress
 12 | from threading import Thread, Lock
 13 | from datetime import datetime
 14 | from multiprocessing import Process
 15 | import traceback
 16 | import sys
 17 | from .dict import Dict
 18 | from .file import File
 19 | from .buffer import Buffer
 20 | from . import core
 21 | 
 22 | 
 23 | tuple_sep = b'-|-'
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | @contextmanager
 29 | def logerrors():
 30 |     try:
 31 |         yield
 32 |     except Exception as e:
 33 |         logger.exception(e)
 34 |         raise
 35 | 
 36 | 
 37 | class Server:
 38 |     def __init__(self, partd=None, bind=None, start=True, block=False,
 39 |             hostname=None):
 40 |         self.context = zmq.Context()
 41 |         if partd is None:
 42 |             partd = Buffer(Dict(), File())
 43 |         self.partd = partd
 44 | 
 45 |         self.socket = self.context.socket(zmq.ROUTER)
 46 | 
 47 |         if hostname is None:
 48 |             hostname = socket.gethostname()
 49 |         if isinstance(bind, str):
 50 |             bind = bind.encode()
 51 |         if bind is None:
 52 |             port = self.socket.bind_to_random_port('tcp://*')
 53 |         else:
 54 |             self.socket.bind(bind)
 55 |             port = int(bind.split(':')[-1].rstrip('/'))
 56 |         self.address = ('tcp://%s:%d' % (hostname, port)).encode()
 57 | 
 58 |         self.status = 'created'
 59 | 
 60 |         self.partd.lock.acquire()
 61 |         self._lock = Lock()
 62 |         self._socket_lock = Lock()
 63 | 
 64 |         if start:
 65 |             self.start()
 66 | 
 67 |         if block:
 68 |             self.block()
 69 | 
 70 |     def start(self):
 71 |         if self.status != 'run':
 72 |             self.status = 'run'
 73 |             self._listen_thread = Thread(target=self.listen)
 74 |             self._listen_thread.start()
 75 |             logger.debug('Start server at %s', self.address)
 76 | 
 77 |     def block(self):
 78 |         """ Block until all threads close """
 79 |         try:
 80 |             self._listen_thread.join()
 81 |         except AttributeError:
 82 |             pass
 83 | 
 84 |     def listen(self):
 85 |         with logerrors():
 86 |             logger.debug('Start listening %s', self.address)
 87 |             while self.status != 'closed':
 88 |                 if not self.socket.poll(100):
 89 |                     continue
 90 | 
 91 |                 with self._socket_lock:
 92 |                     payload = self.socket.recv_multipart()
 93 | 
 94 |                 address, command, payload = payload[0], payload[1], payload[2:]
 95 |                 logger.debug('Server receives %s %s', address, command)
 96 |                 if command == b'close':
 97 |                     logger.debug('Server closes')
 98 |                     self.ack(address)
 99 |                     self.status = 'closed'
100 |                     break
101 |                     # self.close()
102 | 
103 |                 elif command == b'append':
104 |                     keys, values = payload[::2], payload[1::2]
105 |                     keys = list(map(deserialize_key, keys))
106 |                     data = dict(zip(keys, values))
107 |                     self.partd.append(data, lock=False)
108 |                     logger.debug('Server appends %d keys', len(data))
109 |                     self.ack(address)
110 | 
111 |                 elif command == b'iset':
112 |                     key, value = payload
113 |                     key = deserialize_key(key)
114 |                     self.partd.iset(key, value, lock=False)
115 |                     self.ack(address)
116 | 
117 |                 elif command == b'get':
118 |                     keys = list(map(deserialize_key, payload))
119 |                     logger.debug('get %s', keys)
120 |                     result = self.get(keys)
121 |                     self.send_to_client(address, result)
122 |                     self.ack(address, flow_control=False)
123 | 
124 |                 elif command == b'delete':
125 |                     keys = list(map(deserialize_key, payload))
126 |                     logger.debug('delete %s', keys)
127 |                     self.partd.delete(keys, lock=False)
128 |                     self.ack(address, flow_control=False)
129 | 
130 |                 elif command == b'syn':
131 |                     self.ack(address)
132 | 
133 |                 elif command == b'drop':
134 |                     self.drop()
135 |                     self.ack(address)
136 | 
137 |                 else:
138 |                     logger.debug("Unknown command: %s", command)
139 |                     raise ValueError("Unknown command: " + command)
140 | 
141 |     def send_to_client(self, address, result):
142 |         with logerrors():
143 |             if not isinstance(result, list):
144 |                 result = [result]
145 |             with self._socket_lock:
146 |                 self.socket.send_multipart([address] + result)
147 | 
148 |     def ack(self, address, flow_control=True):
149 |         with logerrors():
150 |             logger.debug('Server sends ack')
151 |             self.send_to_client(address, b'ack')
152 | 
153 |     def append(self, data):
154 |         self.partd.append(data, lock=False)
155 |         logger.debug('Server appends %d keys', len(data))
156 | 
157 |     def drop(self):
158 |         with logerrors():
159 |             self.partd.drop()
160 | 
161 |     def get(self, keys):
162 |         with logerrors():
163 |             logger.debug('Server gets keys: %s', keys)
164 |             with self._lock:
165 |                 result = self.partd.get(keys, lock=False)
166 |             return result
167 | 
168 |     def close(self):
169 |         logger.debug('Server closes')
170 |         self.status = 'closed'
171 |         self.block()
172 |         with suppress(zmq.error.ZMQError):
173 |             self.socket.close(1)
174 |         with suppress(zmq.error.ZMQError):
175 |             self.context.destroy(3)
176 |         self.partd.lock.release()
177 | 
178 |     def __enter__(self):
179 |         self.start()
180 |         return self
181 | 
182 |     def __exit__(self, *args):
183 |         self.close()
184 |         self.partd.__exit__(*args)
185 | 
186 | 
187 | def keys_to_flush(lengths, fraction=0.1, maxcount=100000):
188 |     """ Which keys to remove
189 | 
190 |     >>> lengths = {'a': 20, 'b': 10, 'c': 15, 'd': 15,
191 |     ...            'e': 10, 'f': 25, 'g': 5}
192 |     >>> keys_to_flush(lengths, 0.5)
193 |     ['f', 'a']
194 |     """
195 |     top = topk(max(len(lengths) // 2, 1),
196 |                lengths.items(),
197 |                key=1)
198 |     total = sum(lengths.values())
199 |     cutoff = min(maxcount, max(1,
200 |                    bisect(list(accumulate(add, pluck(1, top))),
201 |                           total * fraction)))
202 |     result = [k for k, v in top[:cutoff]]
203 |     assert result
204 |     return result
205 | 
206 | 
207 | def serialize_key(key):
208 |     """
209 | 
210 |     >>> serialize_key('x')
211 |     b'x'
212 |     >>> serialize_key(('a', 'b', 1))
213 |     b'a-|-b-|-1'
214 |     """
215 |     if isinstance(key, tuple):
216 |         return tuple_sep.join(map(serialize_key, key))
217 |     if isinstance(key, bytes):
218 |         return key
219 |     if isinstance(key, str):
220 |         return key.encode()
221 |     return str(key).encode()
222 | 
223 | 
224 | def deserialize_key(text):
225 |     """
226 | 
227 |     >>> deserialize_key(b'x')
228 |     b'x'
229 |     >>> deserialize_key(b'a-|-b-|-1')
230 |     (b'a', b'b', b'1')
231 |     """
232 |     if tuple_sep in text:
233 |         return tuple(text.split(tuple_sep))
234 |     else:
235 |         return text
236 | 
237 | 
238 | from .core import Interface
239 | from .file import File
240 | 
241 | 
242 | class Client(Interface):
243 |     def __init__(self, address=None, create_server=False, **kwargs):
244 |         self.address = address
245 |         self.context = zmq.Context()
246 |         self.socket = self.context.socket(zmq.DEALER)
247 |         logger.debug('Client connects to %s', address)
248 |         self.socket.connect(address)
249 |         self.send(b'syn', [], ack_required=False)
250 |         self.lock = NotALock()  # Server sequentializes everything
251 |         Interface.__init__(self)
252 | 
253 |     def __getstate__(self):
254 |         return {'address': self.address}
255 | 
256 |     def __setstate__(self, state):
257 |         self.__init__(state['address'])
258 |         logger.debug('Reconstruct client from pickled state')
259 | 
260 |     def send(self, command, payload, recv=False, ack_required=True):
261 |         if ack_required:
262 |             ack = self.socket.recv_multipart()
263 |             assert ack == [b'ack']
264 |         logger.debug('Client sends command: %s', command)
265 |         self.socket.send_multipart([command] + payload)
266 |         if recv:
267 |             result = self.socket.recv_multipart()
268 |         else:
269 |             result = None
270 |         return result
271 | 
272 |     def _get(self, keys, lock=None):
273 |         """
274 | 
275 |         Lock argument is ignored.  Everything is sequential (I think)
276 |         """
277 |         logger.debug('Client gets %s %s', self.address, keys)
278 |         keys = list(map(serialize_key, keys))
279 |         return self.send(b'get', keys, recv=True)
280 | 
281 |     def append(self, data, lock=None):
282 |         logger.debug('Client appends %s %s', self.address, str(len(data)) + ' keys')
283 |         data = keymap(serialize_key, data)
284 |         payload = list(chain.from_iterable(data.items()))
285 |         self.send(b'append', payload)
286 | 
287 |     def _delete(self, keys, lock=None):
288 |         logger.debug('Client deletes %s %s', self.address, str(len(keys)) + ' keys')
289 |         keys = list(map(serialize_key, keys))
290 |         self.send(b'delete', keys)
291 | 
292 |     def _iset(self, key, value):
293 |         self.send(b'iset', [serialize_key(key), value])
294 | 
295 |     def drop(self):
296 |         self.send(b'drop', [])
297 |         sleep(0.05)
298 | 
299 |     def close_server(self):
300 |         self.send(b'close', [])
301 | 
302 |     def close(self):
303 |         if hasattr(self, 'server_process'):
304 |             with suppress(zmq.error.ZMQError):
305 |                 self.close_server()
306 |             self.server_process.join()
307 |         with suppress(zmq.error.ZMQError):
308 |             self.socket.close(1)
309 |         with suppress(zmq.error.ZMQError):
310 |             self.context.destroy(1)
311 | 
312 |     def __exit__(self, type, value, traceback):
313 |         self.drop()
314 |         self.close()
315 | 
316 |     def __del__(self):
317 |         self.close()
318 | 
319 | 
320 | class NotALock:
321 |     def acquire(self): pass
322 |     def release(self): pass
323 | 
324 |     def __enter__(self):
325 |         return self
326 | 
327 |     def __exit__(self, *args):
328 |         pass
329 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.2", "versioneer[toml]==0.29"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "partd"
 7 | description = "Appendable key-value storage"
 8 | maintainers = [{name = "Matthew Rocklin", email = "mrocklin@gmail.com"}]
 9 | license = {text = "BSD"}
10 | keywords = []
11 | classifiers = [
12 |     "Programming Language :: Python :: 3",
13 |     "Programming Language :: Python :: 3.9",
14 |     "Programming Language :: Python :: 3.10",
15 |     "Programming Language :: Python :: 3.11",
16 |     "Programming Language :: Python :: 3.12",
17 | ]
18 | readme = "README.rst"
19 | urls = {Homepage = "http://github.com/dask/partd/"}
20 | requires-python = ">=3.9"
21 | dynamic = ["version"]
22 | dependencies = [
23 |     "locket",
24 |     "toolz",
25 | ]
26 | 
27 | [project.optional-dependencies]
28 | complete = [
29 |     "numpy >= 1.20.0",
30 |     "pandas >=1.3",
31 |     "pyzmq",
32 |     "blosc",
33 | ]
34 | 
35 | [tool.setuptools]
36 | packages = ["partd"]
37 | zip-safe = false
38 | include-package-data = false
39 | 
40 | [tool.versioneer]
41 | VCS = "git"
42 | style = "pep440"
43 | versionfile_source = "partd/_version.py"
44 | versionfile_build = "partd/_version.py"
45 | tag_prefix = ""
46 | parentdir_prefix = "partd-"
47 | 
48 | [tool.pytest.ini_options]
49 | addopts = "--strict-markers --strict-config"
50 | filterwarnings = ["error"]
51 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | locket
2 | toolz
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import versioneer
 6 | from setuptools import setup
 7 | 
 8 | setup(
 9 |     version=versioneer.get_version(),
10 |     cmdclass=versioneer.get_cmdclass(),
11 | )
12 | 


--------------------------------------------------------------------------------