├── .gitignore
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── doc
    ├── RELEASE_NOTES.md
    ├── code_overview.rst
    └── writing_codelets.rst
├── examples
    ├── Water diffusion.ipynb
    ├── group_data_items.py
    ├── import_modules.py
    ├── internal_files.py
    ├── internal_modules.py
    ├── plotting.py
    ├── ref_to_library.py
    ├── ref_to_simple.py
    ├── simple.py
    ├── snapshot.py
    └── test_internal_files.py
├── lib
    └── activepapers
    │   ├── __init__.py
    │   ├── builtins2.py
    │   ├── builtins3.py
    │   ├── cli.py
    │   ├── contents.py
    │   ├── execution.py
    │   ├── exploration.py
    │   ├── library.py
    │   ├── standardlib.py
    │   ├── standardlib2.py
    │   ├── standardlib3.py
    │   ├── storage.py
    │   ├── url.py
    │   ├── url2.py
    │   ├── url3.py
    │   ├── utility.py
    │   ├── utility2.py
    │   ├── utility3.py
    │   └── version.py
├── scripts
    └── aptool
├── setup.py
└── tests
    ├── foo
        ├── __init__.py
        └── bar.py
    ├── run_all_tests.sh
    ├── test_basics.py
    ├── test_exploration.py
    ├── test_features.py
    ├── test_library.py
    ├── test_python_modules.py
    └── test_references.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | MANIFEST
 2 | build/
 3 | dist/
 4 | doc/build/
 5 | *.pyc
 6 | *pycache*
 7 | examples/*.ap
 8 | tests/*.ap
 9 | lib/*.egg-info
10 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ==================================
 2 |  The ActivePapers licensing terms
 3 | ==================================
 4 | 
 5 | ActivePapers is licensed under the terms of the Modified BSD License
 6 | (also known as New or Revised BSD), as follows:
 7 | 
 8 | Copyright (c) 2013, ActivePapers Development Team
 9 | 
10 | All rights reserved.
11 | 
12 | Redistribution and use in source and binary forms, with or without
13 | modification, are permitted provided that the following conditions are met:
14 | 
15 | Redistributions of source code must retain the above copyright notice, this
16 | list of conditions and the following disclaimer.
17 | 
18 | Redistributions in binary form must reproduce the above copyright notice, this
19 | list of conditions and the following disclaimer in the documentation and/or
20 | other materials provided with the distribution.
21 | 
22 | Neither the name of the ActivePapers Development Team nor the names of its
23 | contributors may be used to endorse or promote products derived from this
24 | software without specific prior written permission.
25 | 
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
27 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
28 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29 | DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
30 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 | 
37 | 
38 | About the ActivePapers Development Team
39 | ---------------------------------------
40 | 
41 | The ActivePapers project was started by Konrad Hinsen (CNRS, France).
42 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md LICENSE.txt
2 | recursive-include tests *.py
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | As of 2024, this project is archived and unmaintained. While is has achieved its
 2 | mission of demonstrating that unifying computational reproducibility and
 3 | provenance tracking is doable and useful, it has also demonstrated that Python
 4 | is not a suitable platform to build on for reproducible research. Breaking
 5 | changes at all layers of the software stack are too frequent. The ActivePapers
 6 | framework itself (this project) uses an API that was removed in Python 3.9,
 7 | and while it can be updated with reasonable effort, there is little point
 8 | in doing so: Published ActivePapers cannot be expected to work with a current
 9 | Python stack for more than a year.
10 | 
11 | If you came here because you wish to re-run a published ActivePaper, the best
12 | advice I can give is to use [Guix](https://guix.gnu.org/) with its
13 | [time-machine](https://guix.gnu.org/manual/en/html_node/Invoking-guix-time_002dmachine.html)
14 | feature to re-create a Python stack close in time to the paper you are working with.
15 | The ActivePapers infrastructre is packaged in Guix as `python-activepapers`.
16 | 
17 | If you came here to learn about reproducible research practices, the best advice
18 | I can give is not to use Python.
19 | 
20 | The following text is the README from 2018.
21 | 
22 | <br>
23 | 
24 | ActivePapers is a tool for working with executable papers, which
25 | combine data, code, and documentation in single-file packages,
26 | suitable for publication as supplementary material or on sites such as
27 | [figshare](http://figshare.com).
28 | 
29 | The ActivePapers Python edition requires Python 2.7 or Python 3.3 to 3.5.
30 | It also relies on the following libraries:
31 | 
32 |   - NumPy 1.6 or later (http://numpy.scipy.org/)
33 |   - HDF5 1.8.7 or later (http://www.hdfgroup.org/HDF5/)
34 |   - h5py 2.2 or later (http://www.h5py.org/)
35 |   - tempdir 0.6 or later (http://pypi.python.org/pypi/tempdir/)
36 | 
37 | Installation of ActivePapers.Py:
38 | 
39 |     python setup.py install
40 | 
41 | This installs the ActivePapers Python library and the command-line
42 | tool "aptool" for managing ActivePapers.
43 | 
44 | For documentation, see the
45 | [ActivePapers Web site](http://www.activepapers.org/python-edition/).
46 | 
47 | ActivePapers development takes place
48 | [on Github](http://github.com/activepapers/activepapers-python).
49 | 
50 | Runnning the tests also requires the [tempdir](https://pypi.python.org/pypi/tempdir/) library and either the 
51 | [nose](http://pypi.python.org/pypi/nose/) or the [pytest](http://pytest.org) testing framework. The recommended way to run the tests is
52 | 
53 | ```
54 | cd tests
55 | ./run_all_tests.sh nosetests
56 | ```
57 | or
58 | ```
59 | cd tests
60 | ./run_all_tests.sh py.test
61 | ```
62 | 
63 | This launches the test runner on each test script individually. The simpler approach of simply running `nosetests` or `py.test` in directory `tests` leads to a few test failures because the testing framework's import handling conflicts with the implementation of internal modules in ActivePapers.
64 | 


--------------------------------------------------------------------------------
/doc/RELEASE_NOTES.md:
--------------------------------------------------------------------------------
 1 | Release 0.2.2
 2 | -------------
 3 | 
 4 | Improvements:
 5 | 
 6 |  - Provide a way to skip network-dependent
 7 |    tests in restricted environments
 8 |    (environment variable NO_NETWORK_ACCESS=1)
 9 | 
10 | Bug fixes:
11 | 
12 |  - Prevent a crash when no home directory is defined
13 | 
14 |  - Fix a bug in accesses to nested data groups.
15 | 
16 | Release 0.2.1
17 | -------------
18 | 
19 | Improvements:
20 | 
21 |  - Internal text files are opened as utf8 rather than ascii.
22 | 
23 | Bug fixes:
24 | 
25 |  - Prevent crashes when using Python modules in ActivePapers
26 |    from scripts not managed by ActivePapers (using activepapers.exploration).
27 | 
28 | Release 0.2
29 | -----------
30 | 
31 | New features:
32 | 
33 |  - Read-only access to code and data from an ActivePaper in plain
34 |    Python scripts. This facilitates developing and testing code
35 |    that will later be integrated into an ActivePaper.
36 | 
37 |  - Calclets have read-only access to code and to stack traces,
38 |    allowing limited forms of introspection.
39 | 
40 |  - Internal files can be opened in binary mode.
41 | 
42 | Bug fixes:
43 | 
44 |  - Improved compatibility with recent versions of Python and h5py.
45 | 
46 | Release 0.1.4
47 | -------------
48 | 
49 | New features:
50 | 
51 |   - Python scripts are stored using UTF-8 encoding rather than ASCII.
52 | 
53 |   - Internal files can be opened using an option "encoding" argument.
54 |     If this is used, strings read from and written to such files
55 |     are unicode strings.
56 | 
57 | Bug fixes:
58 | 
59 |  - A change in importlib in Python 3.4 broke the import of modules
60 |    stored in an ActivePaper.
61 | 
62 | Release 0.1.3
63 | -------------
64 | 
65 | New feature:
66 | 
67 |  - There is now a generic module activepapers.contents that can be
68 |    imported from any Python script in order to provide read-only
69 |    access to the contents of the ActivePaper that is located in the
70 |    current directory. This is meant as an aid to codelet development.
71 | 
72 | Bug fixes:
73 | 
74 |  - Broken downloads from Zenodo, following a modification of the contents
75 |    of the Zenodo landing pages. Actually, Zenodo went back to the
76 |    landing page format it had before ActivePapers release 0.1.2,
77 |    so ActivePapers also went back to how it downloaded files before.
78 | 
79 | 
80 | Release 0.1.2
81 | -------------
82 | 
83 | This is a bugfix release, fixing the following issues:
84 | 
85 |  - A compatibility problem with h5py 2.3
86 | 
87 |  - Broken downloads from Zenodo, following a modification of the contents
88 |    of the Zenodo landing pages.
89 | 
90 |  - Syntax errors in codelets were not reported correctly.
91 | 


--------------------------------------------------------------------------------
/doc/code_overview.rst:
--------------------------------------------------------------------------------
 1 | Overview of the ActivePapers implementation
 2 | ===========================================
 3 | 
 4 | There is currently little documentation in the code. Don't worry,
 5 | this will change.
 6 | 
 7 | The command-line tool is in ``scripts/aptool``. It contains just
 8 | the user interface, based on ``argparse``. The code that actually
 9 | implements the commands is in the module ``activepapers.cli``.
10 | 
11 | The ActivePapers Python library is in ``lib``. The main modules
12 | are:
13 | 
14 | ``activepapers.storage``
15 |   Takes care of storing and retrieving data (both the contents of an
16 |   ActivePaper and bookkeeping information) in an HDF5 file. Most
17 |   of this module consists of the large class ``ActivePaper``.
18 |   The class ``InternalFile`` handles the file interface to datasets
19 |   (``activepapers.contents.open``). The class ``APNode`` handles
20 |   references to contents in other ActivePapers.
21 | 
22 | ``activepapers.execution``
23 | 
24 |   Manages the execution of codelets (classes ``Codelet``, ``Calclet``,
25 |   and ``Importlet``), which includes restricted rights for calclets
26 |   and access to modules stored inside an ActivePapers for both
27 |   calclets and importlets. Tracing of dependencies during the
28 |   execution of a codelet is also handled here (classes
29 |   ``AttrWrapper``, ``DatasetWrapper``, and ``DataGroup``).
30 | 
31 | ``activepapers.library``
32 |   Manages the local library of ActivePapers. Downloads
33 |   DOI references automatically if possible (which currently
34 |   means DOIs from figshare).
35 | 
36 | ``activepapers.cli``
37 |   Contains the implementation of the subcommands of ``aptool``.
38 | 
39 | The remaining modules provide support code. Several of them are
40 | divided into three parts: ``activepapers.X``, ``activepapers.X2``, and
41 | ``activepapers.X3``. The modules ending in ``2`` or ``3`` contain code
42 | specific to Python 2 or Python 3. The generic one imports the right
43 | language-specific module and perhaps adds some code that works with
44 | both dialects.
45 | 
46 | ``activepapers.url``
47 |   A thin wrapper around the URL-related libraries, which differ between
48 |   Python 2 and Python 3.
49 | 
50 | ``activepapers.standardlib``
51 |   Defines the subset of the standard library that is accessible from
52 |   codelets.
53 | 
54 | ``activepapers.builtins``
55 |   Defines the subset of the builtin definitions that is accessible from
56 |   codelets.
57 | 
58 | ``activepapers.utility``
59 |   Small functions that are used a lot in both ``activepapers.storage``
60 |   and ``activepapers.execution``.
61 | 
62 | ``activepapers.version``
63 |   The version number of the library, stored in a single place.
64 | 
65 | 
66 | Note the absence of ``activepapers.contents``, which is the module
67 | through which codelets access the contents of an ActivePaper.  It is
68 | created dynamically each time a codelet is run, see the class
69 | ``activepapers.execution.Codelet``.
70 | 


--------------------------------------------------------------------------------
/doc/writing_codelets.rst:
--------------------------------------------------------------------------------
  1 | Writing codelets
  2 | ================
  3 | 
  4 | Scripts inside an ActivePaper are called "codelets", which come in two
  5 | varieties: calclets and importlets. As their names indicate, they are
  6 | ideally small, using code from modules to do most of the work. The
  7 | only difference between calclets and importlets is that calclets run
  8 | in a restricted environment, whereas importlets have full access to
  9 | the computer's resources: files, installed Python modules, network,
 10 | etc. Calclets represent the reproducible part of an ActivePaper's
 11 | computations.  Importlets most probably don't work on anyone else's
 12 | computer, and thus should be used only when absolutely necessary. The
 13 | main reason for using an importlet, as its name suggests, is importing
 14 | data from the outside world into an ActivePaper.
 15 | 
 16 | Restricted environment execution
 17 | --------------------------------
 18 | 
 19 | Calclets are run in a modified Python environment, which includes a
 20 | subset of the Python standard library, the NumPy library, the
 21 | ActivePapers library, and all Python modules stored inside the
 22 | ActivePaper, directly or through references. The subset of the
 23 | standard library includes everything needed for computation, but no
 24 | I/O, network access, or platform-specific
 25 | modules. ActivePapers-compliant I/O is provided through the
 26 | ActivePapers library, as explained below.
 27 | 
 28 | Since Python does not provide secure restricted environments, the
 29 | restrictions are really no more than encouragements to respect the
 30 | rules. You can get around all of them with some ingenuity, but this
 31 | documentation won't tell you how. Keep this in mind when running other
 32 | people's code: if you have resons to suspect malicious intents, look
 33 | at the code before running it.
 34 | 
 35 | Importlets are run in an augmented environment. They have access to
 36 | everything a standard Python script can use, but they can (and must,
 37 | in order to be useful) also use the I/O functionality from the
 38 | ActivePaper library to write data to the ActivePaper.
 39 | 
 40 | Accessing additional Python modules
 41 | -----------------------------------
 42 | 
 43 | When a calclet tries to use a module that is not part of the restricted
 44 | environment described above, ActivePapers aborts with an error message.
 45 | The right solution for that problem is to include that module's source
 46 | code in the ActivePaper, or to package it as a separate ActivePaper and
 47 | access it through a reference.
 48 | 
 49 | Unfortunately, this is not always possible. The most common technical
 50 | obstacle are extension modules, which are not allowed in an
 51 | ActivePaper. Licensing restrictions can also prevent re-publication in
 52 | an ActivePaper. For such situations, ActivePapers provides a way to
 53 | extend the restricted execution environment by additional modules and
 54 | packages. This is done when the ActivePaper is created using ``aptool``,
 55 | using the ``-d`` option to ``aptool create``.
 56 | 
 57 | Note that adding a module to the restricted execution environment
 58 | means that anyone working with your ActivePaper will have to have
 59 | install the additional modules and packages, in versions compatible to
 60 | the ones you used.
 61 | 
 62 | 
 63 | I/O in ActivePapers
 64 | -------------------
 65 | 
 66 | The module ``activepapers.contents`` provides two ways to read and write
 67 | data: a file-like approach, and direct dataset access using the
 68 | `h5py <http://www.h5py.org/>`_ library.
 69 | 
 70 | File-like I/O is the easiest to use, and since it is very compatible
 71 | to the Python library's file protocol, it can be used with many
 72 | existing Python libraries. Here is a simple example:
 73 | 
 74 |     from activepapers.contents import open
 75 | 
 76 |     with open('numbers', 'w') as f:
 77 |         for i in range(10):
 78 |             f.write(str(i)+'\\n')
 79 | 
 80 | You can use the ``open`` function just like you would use the standard
 81 | Python ``open`` function, the only difference being that you pass it a
 82 | dataset name rather than a filename. The above example creates the
 83 | dataset ``/data/numbers``, i.e. the dataset names are relative to the
 84 | ActivePaper's `data` group.
 85 | 
 86 | There is also ``open_documentation``, which works in the same way but
 87 | accesses datasets relative to the top-level ``documentation`` group.
 88 | Datasets in this group are meant for human consumption, not for
 89 | input to other calclets.
 90 | 
 91 | Direct use of HDF5 datasets through ``h5py`` provides much more
 92 | powerful data management options, in particular for large binary
 93 | datasets.  The following example stores the same data as the preceding
 94 | one, but as a binary dataset:
 95 | 
 96 |     from activepapers.contents import data
 97 |     import numpy as np
 98 |     data['numbers] = np.arange(10)
 99 | 
100 | The ``data`` object in the module ``activepapers.contents`` behaves
101 | much like a group object from ``h5py``, the only difference being that
102 | all data accesses are tracked for creating the dependency graph in the
103 | ActivePaper. Most code based on ``h5py`` should work in an
104 | ActivePaper, with the exception of code that tests objects for being
105 | instances of specific h5py classes.
106 | 


--------------------------------------------------------------------------------
/examples/group_data_items.py:
--------------------------------------------------------------------------------
 1 | # This example illustrates how to turn a group withh everything it
 2 | # contains into a single data item for the purpose of dependency
 3 | # tracking.
 4 | 
 5 | from activepapers.storage import ActivePaper
 6 | import numpy as np
 7 | 
 8 | paper = ActivePaper("group_data_items.ap", "w")
 9 | 
10 | script = paper.create_calclet("script1",
11 | """
12 | from activepapers.contents import data
13 | import numpy as np
14 | 
15 | numbers = data.create_group("numbers")
16 | numbers.mark_as_data_item()
17 | numbers.create_dataset("pi", data=np.pi)
18 | numbers.create_dataset("e", data=np.e)
19 | """)
20 | script.run()
21 | 
22 | script = paper.create_calclet("script2",
23 | """
24 | from activepapers.contents import data
25 | import numpy as np
26 | 
27 | numbers = data["numbers"]
28 | data.create_dataset("result", data=numbers["pi"][...]*numbers["e"][...])
29 | """)
30 | script.run()
31 | 
32 | # Check that only /data/numbers is tracked, not
33 | # /data/numbers/pi or /data/numbers/e
34 | for level in paper.dependency_hierarchy():
35 |     print [item.name for item in level]
36 | 
37 | paper.close()
38 | 


--------------------------------------------------------------------------------
/examples/import_modules.py:
--------------------------------------------------------------------------------
 1 | from activepapers.storage import ActivePaper
 2 | import numpy as np
 3 | import os, sys
 4 | 
 5 | # The modules imported here are located in ../tests.
 6 | script_path = os.path.dirname(sys.argv[0])
 7 | tests_path = os.path.join(script_path, '..', 'tests')
 8 | module_path = [os.path.abspath(tests_path)]
 9 | 
10 | paper = ActivePaper("import_modules.ap", "w")
11 | 
12 | # The source code of imported modules is embedded into the paper. Only
13 | # Python source code modules can be imported, i.e. neither extension
14 | # modules nor bytecode module (.pyc).
15 | # The module_path parameter is a list of directories that can contain
16 | # modules. If not specified, it defaults to sys.path
17 | paper.import_module('foo', module_path)
18 | paper.import_module('foo.bar', module_path)
19 | 
20 | script = paper.create_calclet("test",
21 | """
22 | import foo
23 | from foo.bar import frobnicate
24 | assert frobnicate(foo.__version__) == '42'
25 | """)
26 | script.run()
27 | 
28 | paper.close()
29 | 


--------------------------------------------------------------------------------
/examples/internal_files.py:
--------------------------------------------------------------------------------
 1 | from activepapers.storage import ActivePaper
 2 | import numpy as np
 3 | 
 4 | paper = ActivePaper("internal_files.ap", "w")
 5 | 
 6 | script = paper.create_calclet("write",
 7 | """
 8 | from activepapers.contents import open
 9 | 
10 | with open('numbers', 'w') as f:
11 |     for i in range(10):
12 |         f.write(str(i)+'\\n')
13 | """)
14 | script.run()
15 | 
16 | script = paper.create_calclet("read1",
17 | """
18 | from activepapers.contents import open
19 | 
20 | f = open('numbers')
21 | for i in range(10):
22 |     assert f.readline().strip() == str(i)
23 | f.close()
24 | """)
25 | script.run()
26 | 
27 | script = paper.create_calclet("read2",
28 | """
29 | from activepapers.contents import open
30 | 
31 | f = open('numbers')
32 | data = [int(line.strip()) for line in f]
33 | f.close()
34 | assert data == list(range(10))
35 | """)
36 | script.run()
37 | 
38 | script = paper.create_calclet("convert_to_binary",
39 | """
40 | from activepapers.contents import open
41 | import struct
42 | 
43 | with open('numbers') as f:
44 |     data = [int(line.strip()) for line in f]
45 | f = open('binary_numbers', 'wb')
46 | f.write(struct.pack(len(data)*'h', *data))
47 | f.close()
48 | """)
49 | script.run()
50 | 
51 | script = paper.create_calclet("read_binary",
52 | """
53 | from activepapers.contents import open
54 | import struct
55 | 
56 | f = open('binary_numbers', 'rb')
57 | assert struct.unpack(10*'h', f.read()) == tuple(range(10))
58 | f.close()
59 | """)
60 | script.run()
61 | 
62 | paper.close()
63 | 


--------------------------------------------------------------------------------
/examples/internal_modules.py:
--------------------------------------------------------------------------------
 1 | from activepapers.storage import ActivePaper
 2 | import numpy as np
 3 | 
 4 | paper = ActivePaper("internal_modules.ap", "w")
 5 | 
 6 | paper.add_module("my_math",
 7 | """
 8 | import numpy as np
 9 | 
10 | def my_func(x):
11 |     return np.sin(x)
12 | """)
13 | 
14 | 
15 | paper.data.create_dataset("frequency", data = 0.2)
16 | paper.data.create_dataset("time", data=0.1*np.arange(100))
17 | 
18 | calc_sine = paper.create_calclet("calc_sine",
19 | """
20 | from activepapers.contents import data
21 | import numpy as np
22 | from my_math import my_func
23 | 
24 | frequency = data['frequency'][...]
25 | time = data['time'][...]
26 | data.create_dataset("sine", data=my_func(2.*np.pi*frequency*time))
27 | """)
28 | calc_sine.run()
29 | 
30 | paper.close()
31 | 


--------------------------------------------------------------------------------
/examples/plotting.py:
--------------------------------------------------------------------------------
 1 | from activepapers.storage import ActivePaper
 2 | import numpy as np
 3 | 
 4 | paper = ActivePaper("plotting.ap", "w",
 5 |                     dependencies = ["matplotlib"])
 6 | 
 7 | paper.data.create_dataset("frequency", data = 0.2)
 8 | paper.data.create_dataset("time", data=0.1*np.arange(100))
 9 | 
10 | plot_sine = paper.create_calclet("plot_sine",
11 | """
12 | from activepapers.contents import open, data
13 | import matplotlib
14 | # Make matplotlib ignore the user's .matplotlibrc
15 | matplotlib.rcdefaults()
16 | # Use the SVG backend. Must be done *before* importing pyplot.
17 | matplotlib.use('SVG')
18 | import matplotlib.pyplot as plt
19 | 
20 | import numpy as np
21 | 
22 | frequency = data['frequency'][...]
23 | time = data['time'][...]
24 | sine = np.sin(2.*np.pi*frequency*time)
25 | 
26 | plt.plot(time, sine)
27 | # Save plot to a file, which is simulated by a HDF5 byte array
28 | with open('sine_plot.svg', 'w') as output:
29 |     plt.savefig(output)
30 | """)
31 | plot_sine.run()
32 | 
33 | paper.close()
34 | 


--------------------------------------------------------------------------------
/examples/ref_to_library.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ['ACTIVEPAPERS_LIBRARY'] = os.getcwd()
 3 | 
 4 | from activepapers.storage import ActivePaper
 5 | import numpy as np
 6 | 
 7 | paper = ActivePaper("ref_to_library.ap", "w")
 8 | 
 9 | paper.data.create_dataset("frequency", data = 0.2)
10 | paper.data.create_dataset("time", data=0.1*np.arange(100))
11 | 
12 | paper.create_module_ref("my_math", "local:internal_modules")
13 | 
14 | calc_sine = paper.create_calclet("calc_sine",
15 | """
16 | from activepapers.contents import data
17 | import numpy as np
18 | from my_math import my_func
19 | 
20 | frequency = data['frequency'][...]
21 | time = data['time'][...]
22 | data.create_dataset("sine", data=my_func(2.*np.pi*frequency*time))
23 | """)
24 | calc_sine.run()
25 | 
26 | paper.close()
27 | 


--------------------------------------------------------------------------------
/examples/ref_to_simple.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ['ACTIVEPAPERS_LIBRARY'] = os.getcwd()
 3 | 
 4 | from activepapers.storage import ActivePaper
 5 | import numpy as np
 6 | 
 7 | paper = ActivePaper("ref_to_simple.ap", "w")
 8 | 
 9 | paper.create_data_ref("frequency", "local:simple")
10 | paper.create_data_ref("time", "local:simple", "time")
11 | 
12 | paper.create_code_ref("calc_sine", "local:simple", "calc_sine")
13 | paper.run_codelet('calc_sine')
14 | 
15 | paper.close()
16 | 


--------------------------------------------------------------------------------
/examples/simple.py:
--------------------------------------------------------------------------------
 1 | from activepapers.storage import ActivePaper
 2 | import numpy as np
 3 | 
 4 | paper = ActivePaper("simple.ap", "w")
 5 | 
 6 | paper.data.create_dataset("frequency", data = 0.2)
 7 | paper.data.create_dataset("time", data=0.1*np.arange(100))
 8 | 
 9 | calc_sine = paper.create_calclet("calc_sine",
10 | """
11 | from activepapers.contents import data
12 | import numpy as np
13 | 
14 | frequency = data['frequency'][...]
15 | time = data['time'][...]
16 | data.create_dataset("sine", data=np.sin(2.*np.pi*frequency*time))
17 | """)
18 | calc_sine.run()
19 | 
20 | paper.close()
21 | 


--------------------------------------------------------------------------------
/examples/snapshot.py:
--------------------------------------------------------------------------------
 1 | from activepapers.storage import ActivePaper
 2 | import numpy as np
 3 | 
 4 | paper = ActivePaper("snapshot.ap", "w")
 5 | 
 6 | paper.data.create_dataset("frequency", data = 0.2)
 7 | paper.data.create_dataset("time", data=0.1*np.arange(100))
 8 | 
 9 | calc_angular = paper.create_calclet("calc_angular",
10 | """
11 | from activepapers.contents import data, snapshot
12 | import numpy as np
13 | 
14 | frequency = data['frequency'][...]
15 | time = data['time'][...]
16 | data.create_dataset("sine", data=np.sin(2.*np.pi*frequency*time))
17 | snapshot('snapshot_1.ap')
18 | data.create_dataset("cosine", data=np.cos(2.*np.pi*frequency*time))
19 | snapshot('snapshot_2.ap')
20 | data.create_dataset("tangent", data=np.tan(2.*np.pi*frequency*time))
21 | """)
22 | calc_angular.run()
23 | 
24 | paper.close()
25 | 


--------------------------------------------------------------------------------
/examples/test_internal_files.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import unittest
  4 | import itertools
  5 | import time
  6 | from array import array
  7 | from weakref import proxy
  8 | try:
  9 |     import threading
 10 | except ImportError:
 11 |     threading = None
 12 | 
 13 | from test import test_support
 14 | from test.test_support import TESTFN, run_unittest
 15 | from UserList import UserList
 16 | 
 17 | class AutoFileTests(unittest.TestCase):
 18 |     # file tests for which a test file is automatically set up
 19 | 
 20 |     def setUp(self):
 21 |         self.f = open(TESTFN, 'wb')
 22 | 
 23 |     def tearDown(self):
 24 |         if self.f:
 25 |             self.f.close()
 26 |         os.remove(TESTFN)
 27 | 
 28 |     def testWeakRefs(self):
 29 |         # verify weak references
 30 |         p = proxy(self.f)
 31 |         p.write('teststring')
 32 |         self.assertEqual(self.f.tell(), p.tell())
 33 |         self.f.close()
 34 |         self.f = None
 35 |         self.assertRaises(ReferenceError, getattr, p, 'tell')
 36 | 
 37 |     def testAttributes(self):
 38 |         # verify expected attributes exist
 39 |         f = self.f
 40 |         with test_support.check_py3k_warnings():
 41 |             softspace = f.softspace
 42 |         f.name     # merely shouldn't blow up
 43 |         f.mode     # ditto
 44 |         f.closed   # ditto
 45 | 
 46 |         with test_support.check_py3k_warnings():
 47 |             # verify softspace is writable
 48 |             f.softspace = softspace    # merely shouldn't blow up
 49 | 
 50 |         # verify the others aren't
 51 |         for attr in 'name', 'mode', 'closed':
 52 |             self.assertRaises((AttributeError, TypeError), setattr, f, attr, 'oops')
 53 | 
 54 |     def testReadinto(self):
 55 |         # verify readinto
 56 |         self.f.write('12')
 57 |         self.f.close()
 58 |         a = array('c', 'x'*10)
 59 |         self.f = open(TESTFN, 'rb')
 60 |         n = self.f.readinto(a)
 61 |         self.assertEqual('12', a.tostring()[:n])
 62 | 
 63 |     def testWritelinesUserList(self):
 64 |         # verify writelines with instance sequence
 65 |         l = UserList(['1', '2'])
 66 |         self.f.writelines(l)
 67 |         self.f.close()
 68 |         self.f = open(TESTFN, 'rb')
 69 |         buf = self.f.read()
 70 |         self.assertEqual(buf, '12')
 71 | 
 72 |     def testWritelinesIntegers(self):
 73 |         # verify writelines with integers
 74 |         self.assertRaises(TypeError, self.f.writelines, [1, 2, 3])
 75 | 
 76 |     def testWritelinesIntegersUserList(self):
 77 |         # verify writelines with integers in UserList
 78 |         l = UserList([1,2,3])
 79 |         self.assertRaises(TypeError, self.f.writelines, l)
 80 | 
 81 |     def testWritelinesNonString(self):
 82 |         # verify writelines with non-string object
 83 |         class NonString:
 84 |             pass
 85 | 
 86 |         self.assertRaises(TypeError, self.f.writelines,
 87 |                           [NonString(), NonString()])
 88 | 
 89 |     def testRepr(self):
 90 |         # verify repr works
 91 |         self.assertTrue(repr(self.f).startswith("<open file '" + TESTFN))
 92 | 
 93 |     def testErrors(self):
 94 |         self.f.close()
 95 |         self.f = open(TESTFN, 'rb')
 96 |         f = self.f
 97 |         self.assertEqual(f.name, TESTFN)
 98 |         self.assertTrue(not f.isatty())
 99 |         self.assertTrue(not f.closed)
100 | 
101 |         self.assertRaises(TypeError, f.readinto, "")
102 |         f.close()
103 |         self.assertTrue(f.closed)
104 | 
105 |     def testMethods(self):
106 |         methods = ['fileno', 'flush', 'isatty', 'next', 'read', 'readinto',
107 |                    'readline', 'readlines', 'seek', 'tell', 'truncate',
108 |                    'write', '__iter__']
109 |         deprecated_methods = ['xreadlines']
110 |         if sys.platform.startswith('atheos'):
111 |             methods.remove('truncate')
112 | 
113 |         # __exit__ should close the file
114 |         self.f.__exit__(None, None, None)
115 |         self.assertTrue(self.f.closed)
116 | 
117 |         for methodname in methods:
118 |             method = getattr(self.f, methodname)
119 |             # should raise on closed file
120 |             self.assertRaises(ValueError, method)
121 |         with test_support.check_py3k_warnings():
122 |             for methodname in deprecated_methods:
123 |                 method = getattr(self.f, methodname)
124 |                 self.assertRaises(ValueError, method)
125 |         self.assertRaises(ValueError, self.f.writelines, [])
126 | 
127 |         # file is closed, __exit__ shouldn't do anything
128 |         self.assertEqual(self.f.__exit__(None, None, None), None)
129 |         # it must also return None if an exception was given
130 |         try:
131 |             1 // 0
132 |         except:
133 |             self.assertEqual(self.f.__exit__(*sys.exc_info()), None)
134 | 
135 |     def testReadWhenWriting(self):
136 |         self.assertRaises(IOError, self.f.read)
137 | 
138 |     def testNastyWritelinesGenerator(self):
139 |         def nasty():
140 |             for i in range(5):
141 |                 if i == 3:
142 |                     self.f.close()
143 |                 yield str(i)
144 |         self.assertRaises(ValueError, self.f.writelines, nasty())
145 | 
146 |     def testIssue5677(self):
147 |         # Remark: Do not perform more than one test per open file,
148 |         # since that does NOT catch the readline error on Windows.
149 |         data = 'xxx'
150 |         for mode in ['w', 'wb', 'a', 'ab']:
151 |             for attr in ['read', 'readline', 'readlines']:
152 |                 self.f = open(TESTFN, mode)
153 |                 self.f.write(data)
154 |                 self.assertRaises(IOError, getattr(self.f, attr))
155 |                 self.f.close()
156 | 
157 |             self.f = open(TESTFN, mode)
158 |             self.f.write(data)
159 |             self.assertRaises(IOError, lambda: [line for line in self.f])
160 |             self.f.close()
161 | 
162 |             self.f = open(TESTFN, mode)
163 |             self.f.write(data)
164 |             self.assertRaises(IOError, self.f.readinto, bytearray(len(data)))
165 |             self.f.close()
166 | 
167 |         for mode in ['r', 'rb', 'U', 'Ub', 'Ur', 'rU', 'rbU', 'rUb']:
168 |             self.f = open(TESTFN, mode)
169 |             self.assertRaises(IOError, self.f.write, data)
170 |             self.f.close()
171 | 
172 |             self.f = open(TESTFN, mode)
173 |             self.assertRaises(IOError, self.f.writelines, [data, data])
174 |             self.f.close()
175 | 
176 |             self.f = open(TESTFN, mode)
177 |             self.assertRaises(IOError, self.f.truncate)
178 |             self.f.close()
179 | 
180 | class OtherFileTests(unittest.TestCase):
181 | 
182 |     def testBadModeArgument(self):
183 |         # verify that we get a sensible error message for bad mode argument
184 |         bad_mode = "qwerty"
185 |         try:
186 |             f = open(TESTFN, bad_mode)
187 |         except ValueError, msg:
188 |             if msg.args[0] != 0:
189 |                 s = str(msg)
190 |                 if TESTFN in s or bad_mode not in s:
191 |                     self.fail("bad error message for invalid mode: %s" % s)
192 |             # if msg.args[0] == 0, we're probably on Windows where there may
193 |             # be no obvious way to discover why open() failed.
194 |         else:
195 |             f.close()
196 |             self.fail("no error for invalid mode: %s" % bad_mode)
197 | 
198 |     def testSetBufferSize(self):
199 |         # make sure that explicitly setting the buffer size doesn't cause
200 |         # misbehaviour especially with repeated close() calls
201 |         for s in (-1, 0, 1, 512):
202 |             try:
203 |                 f = open(TESTFN, 'w', s)
204 |                 f.write(str(s))
205 |                 f.close()
206 |                 f.close()
207 |                 f = open(TESTFN, 'r', s)
208 |                 d = int(f.read())
209 |                 f.close()
210 |                 f.close()
211 |             except IOError, msg:
212 |                 self.fail('error setting buffer size %d: %s' % (s, str(msg)))
213 |             self.assertEqual(d, s)
214 | 
215 |     def testTruncateOnWindows(self):
216 |         os.unlink(TESTFN)
217 | 
218 |         def bug801631():
219 |             # SF bug <http://www.python.org/sf/801631>
220 |             # "file.truncate fault on windows"
221 |             f = open(TESTFN, 'wb')
222 |             f.write('12345678901')   # 11 bytes
223 |             f.close()
224 | 
225 |             f = open(TESTFN,'rb+')
226 |             data = f.read(5)
227 |             if data != '12345':
228 |                 self.fail("Read on file opened for update failed %r" % data)
229 |             if f.tell() != 5:
230 |                 self.fail("File pos after read wrong %d" % f.tell())
231 | 
232 |             f.truncate()
233 |             if f.tell() != 5:
234 |                 self.fail("File pos after ftruncate wrong %d" % f.tell())
235 | 
236 |             f.close()
237 |             size = os.path.getsize(TESTFN)
238 |             if size != 5:
239 |                 self.fail("File size after ftruncate wrong %d" % size)
240 | 
241 |         try:
242 |             bug801631()
243 |         finally:
244 |             os.unlink(TESTFN)
245 | 
246 |     def testIteration(self):
247 |         # Test the complex interaction when mixing file-iteration and the
248 |         # various read* methods. Ostensibly, the mixture could just be tested
249 |         # to work when it should work according to the Python language,
250 |         # instead of fail when it should fail according to the current CPython
251 |         # implementation.  People don't always program Python the way they
252 |         # should, though, and the implemenation might change in subtle ways,
253 |         # so we explicitly test for errors, too; the test will just have to
254 |         # be updated when the implementation changes.
255 |         dataoffset = 16384
256 |         filler = "ham\n"
257 |         assert not dataoffset % len(filler), \
258 |             "dataoffset must be multiple of len(filler)"
259 |         nchunks = dataoffset // len(filler)
260 |         testlines = [
261 |             "spam, spam and eggs\n",
262 |             "eggs, spam, ham and spam\n",
263 |             "saussages, spam, spam and eggs\n",
264 |             "spam, ham, spam and eggs\n",
265 |             "spam, spam, spam, spam, spam, ham, spam\n",
266 |             "wonderful spaaaaaam.\n"
267 |         ]
268 |         methods = [("readline", ()), ("read", ()), ("readlines", ()),
269 |                    ("readinto", (array("c", " "*100),))]
270 | 
271 |         try:
272 |             # Prepare the testfile
273 |             bag = open(TESTFN, "w")
274 |             bag.write(filler * nchunks)
275 |             bag.writelines(testlines)
276 |             bag.close()
277 |             # Test for appropriate errors mixing read* and iteration
278 |             for methodname, args in methods:
279 |                 f = open(TESTFN)
280 |                 if f.next() != filler:
281 |                     self.fail, "Broken testfile"
282 |                 meth = getattr(f, methodname)
283 |                 try:
284 |                     meth(*args)
285 |                 except ValueError:
286 |                     pass
287 |                 else:
288 |                     self.fail("%s%r after next() didn't raise ValueError" %
289 |                                      (methodname, args))
290 |                 f.close()
291 | 
292 |             # Test to see if harmless (by accident) mixing of read* and
293 |             # iteration still works. This depends on the size of the internal
294 |             # iteration buffer (currently 8192,) but we can test it in a
295 |             # flexible manner.  Each line in the bag o' ham is 4 bytes
296 |             # ("h", "a", "m", "\n"), so 4096 lines of that should get us
297 |             # exactly on the buffer boundary for any power-of-2 buffersize
298 |             # between 4 and 16384 (inclusive).
299 |             f = open(TESTFN)
300 |             for i in range(nchunks):
301 |                 f.next()
302 |             testline = testlines.pop(0)
303 |             try:
304 |                 line = f.readline()
305 |             except ValueError:
306 |                 self.fail("readline() after next() with supposedly empty "
307 |                           "iteration-buffer failed anyway")
308 |             if line != testline:
309 |                 self.fail("readline() after next() with empty buffer "
310 |                           "failed. Got %r, expected %r" % (line, testline))
311 |             testline = testlines.pop(0)
312 |             buf = array("c", "\x00" * len(testline))
313 |             try:
314 |                 f.readinto(buf)
315 |             except ValueError:
316 |                 self.fail("readinto() after next() with supposedly empty "
317 |                           "iteration-buffer failed anyway")
318 |             line = buf.tostring()
319 |             if line != testline:
320 |                 self.fail("readinto() after next() with empty buffer "
321 |                           "failed. Got %r, expected %r" % (line, testline))
322 | 
323 |             testline = testlines.pop(0)
324 |             try:
325 |                 line = f.read(len(testline))
326 |             except ValueError:
327 |                 self.fail("read() after next() with supposedly empty "
328 |                           "iteration-buffer failed anyway")
329 |             if line != testline:
330 |                 self.fail("read() after next() with empty buffer "
331 |                           "failed. Got %r, expected %r" % (line, testline))
332 |             try:
333 |                 lines = f.readlines()
334 |             except ValueError:
335 |                 self.fail("readlines() after next() with supposedly empty "
336 |                           "iteration-buffer failed anyway")
337 |             if lines != testlines:
338 |                 self.fail("readlines() after next() with empty buffer "
339 |                           "failed. Got %r, expected %r" % (line, testline))
340 |             # Reading after iteration hit EOF shouldn't hurt either
341 |             f = open(TESTFN)
342 |             try:
343 |                 for line in f:
344 |                     pass
345 |                 try:
346 |                     f.readline()
347 |                     f.readinto(buf)
348 |                     f.read()
349 |                     f.readlines()
350 |                 except ValueError:
351 |                     self.fail("read* failed after next() consumed file")
352 |             finally:
353 |                 f.close()
354 |         finally:
355 |             os.unlink(TESTFN)
356 | 
357 | class FileSubclassTests(unittest.TestCase):
358 | 
359 |     def testExit(self):
360 |         # test that exiting with context calls subclass' close
361 |         class C(file):
362 |             def __init__(self, *args):
363 |                 self.subclass_closed = False
364 |                 file.__init__(self, *args)
365 |             def close(self):
366 |                 self.subclass_closed = True
367 |                 file.close(self)
368 | 
369 |         with C(TESTFN, 'w') as f:
370 |             pass
371 |         self.assertTrue(f.subclass_closed)
372 | 
373 | 
374 | @unittest.skipUnless(threading, 'Threading required for this test.')
375 | class FileThreadingTests(unittest.TestCase):
376 |     # These tests check the ability to call various methods of file objects
377 |     # (including close()) concurrently without crashing the Python interpreter.
378 |     # See #815646, #595601
379 | 
380 |     def setUp(self):
381 |         self._threads = test_support.threading_setup()
382 |         self.f = None
383 |         self.filename = TESTFN
384 |         with open(self.filename, "w") as f:
385 |             f.write("\n".join("0123456789"))
386 |         self._count_lock = threading.Lock()
387 |         self.close_count = 0
388 |         self.close_success_count = 0
389 |         self.use_buffering = False
390 | 
391 |     def tearDown(self):
392 |         if self.f:
393 |             try:
394 |                 self.f.close()
395 |             except (EnvironmentError, ValueError):
396 |                 pass
397 |         try:
398 |             os.remove(self.filename)
399 |         except EnvironmentError:
400 |             pass
401 |         test_support.threading_cleanup(*self._threads)
402 | 
403 |     def _create_file(self):
404 |         if self.use_buffering:
405 |             self.f = open(self.filename, "w+", buffering=1024*16)
406 |         else:
407 |             self.f = open(self.filename, "w+")
408 | 
409 |     def _close_file(self):
410 |         with self._count_lock:
411 |             self.close_count += 1
412 |         self.f.close()
413 |         with self._count_lock:
414 |             self.close_success_count += 1
415 | 
416 |     def _close_and_reopen_file(self):
417 |         self._close_file()
418 |         # if close raises an exception thats fine, self.f remains valid so
419 |         # we don't need to reopen.
420 |         self._create_file()
421 | 
422 |     def _run_workers(self, func, nb_workers, duration=0.2):
423 |         with self._count_lock:
424 |             self.close_count = 0
425 |             self.close_success_count = 0
426 |         self.do_continue = True
427 |         threads = []
428 |         try:
429 |             for i in range(nb_workers):
430 |                 t = threading.Thread(target=func)
431 |                 t.start()
432 |                 threads.append(t)
433 |             for _ in xrange(100):
434 |                 time.sleep(duration/100)
435 |                 with self._count_lock:
436 |                     if self.close_count-self.close_success_count > nb_workers+1:
437 |                         if test_support.verbose:
438 |                             print 'Q',
439 |                         break
440 |             time.sleep(duration)
441 |         finally:
442 |             self.do_continue = False
443 |             for t in threads:
444 |                 t.join()
445 | 
446 |     def _test_close_open_io(self, io_func, nb_workers=5):
447 |         def worker():
448 |             self._create_file()
449 |             funcs = itertools.cycle((
450 |                 lambda: io_func(),
451 |                 lambda: self._close_and_reopen_file(),
452 |             ))
453 |             for f in funcs:
454 |                 if not self.do_continue:
455 |                     break
456 |                 try:
457 |                     f()
458 |                 except (IOError, ValueError):
459 |                     pass
460 |         self._run_workers(worker, nb_workers)
461 |         if test_support.verbose:
462 |             # Useful verbose statistics when tuning this test to take
463 |             # less time to run but still ensuring that its still useful.
464 |             #
465 |             # the percent of close calls that raised an error
466 |             percent = 100. - 100.*self.close_success_count/self.close_count
467 |             print self.close_count, ('%.4f ' % percent),
468 | 
469 |     def test_close_open(self):
470 |         def io_func():
471 |             pass
472 |         self._test_close_open_io(io_func)
473 | 
474 |     def test_close_open_flush(self):
475 |         def io_func():
476 |             self.f.flush()
477 |         self._test_close_open_io(io_func)
478 | 
479 |     def test_close_open_iter(self):
480 |         def io_func():
481 |             list(iter(self.f))
482 |         self._test_close_open_io(io_func)
483 | 
484 |     def test_close_open_isatty(self):
485 |         def io_func():
486 |             self.f.isatty()
487 |         self._test_close_open_io(io_func)
488 | 
489 |     def test_close_open_print(self):
490 |         def io_func():
491 |             print >> self.f, ''
492 |         self._test_close_open_io(io_func)
493 | 
494 |     def test_close_open_print_buffered(self):
495 |         self.use_buffering = True
496 |         def io_func():
497 |             print >> self.f, ''
498 |         self._test_close_open_io(io_func)
499 | 
500 |     def test_close_open_read(self):
501 |         def io_func():
502 |             self.f.read(0)
503 |         self._test_close_open_io(io_func)
504 | 
505 |     def test_close_open_readinto(self):
506 |         def io_func():
507 |             a = array('c', 'xxxxx')
508 |             self.f.readinto(a)
509 |         self._test_close_open_io(io_func)
510 | 
511 |     def test_close_open_readline(self):
512 |         def io_func():
513 |             self.f.readline()
514 |         self._test_close_open_io(io_func)
515 | 
516 |     def test_close_open_readlines(self):
517 |         def io_func():
518 |             self.f.readlines()
519 |         self._test_close_open_io(io_func)
520 | 
521 |     def test_close_open_seek(self):
522 |         def io_func():
523 |             self.f.seek(0, 0)
524 |         self._test_close_open_io(io_func)
525 | 
526 |     def test_close_open_tell(self):
527 |         def io_func():
528 |             self.f.tell()
529 |         self._test_close_open_io(io_func)
530 | 
531 |     def test_close_open_truncate(self):
532 |         def io_func():
533 |             self.f.truncate()
534 |         self._test_close_open_io(io_func)
535 | 
536 |     def test_close_open_write(self):
537 |         def io_func():
538 |             self.f.write('')
539 |         self._test_close_open_io(io_func)
540 | 
541 |     def test_close_open_writelines(self):
542 |         def io_func():
543 |             self.f.writelines('')
544 |         self._test_close_open_io(io_func)
545 | 
546 | 
547 | class StdoutTests(unittest.TestCase):
548 | 
549 |     def test_move_stdout_on_write(self):
550 |         # Issue 3242: sys.stdout can be replaced (and freed) during a
551 |         # print statement; prevent a segfault in this case
552 |         save_stdout = sys.stdout
553 | 
554 |         class File:
555 |             def write(self, data):
556 |                 if '\n' in data:
557 |                     sys.stdout = save_stdout
558 | 
559 |         try:
560 |             sys.stdout = File()
561 |             print "some text"
562 |         finally:
563 |             sys.stdout = save_stdout
564 | 
565 |     def test_del_stdout_before_print(self):
566 |         # Issue 4597: 'print' with no argument wasn't reporting when
567 |         # sys.stdout was deleted.
568 |         save_stdout = sys.stdout
569 |         del sys.stdout
570 |         try:
571 |             print
572 |         except RuntimeError as e:
573 |             self.assertEqual(str(e), "lost sys.stdout")
574 |         else:
575 |             self.fail("Expected RuntimeError")
576 |         finally:
577 |             sys.stdout = save_stdout
578 | 
579 |     def test_unicode(self):
580 |         import subprocess
581 | 
582 |         def get_message(encoding, *code):
583 |             code = '\n'.join(code)
584 |             env = os.environ.copy()
585 |             env['PYTHONIOENCODING'] = encoding
586 |             process = subprocess.Popen([sys.executable, "-c", code],
587 |                                        stdout=subprocess.PIPE, env=env)
588 |             stdout, stderr = process.communicate()
589 |             self.assertEqual(process.returncode, 0)
590 |             return stdout
591 | 
592 |         def check_message(text, encoding, expected):
593 |             stdout = get_message(encoding,
594 |                 "import sys",
595 |                 "sys.stdout.write(%r)" % text,
596 |                 "sys.stdout.flush()")
597 |             self.assertEqual(stdout, expected)
598 | 
599 |         # test the encoding
600 |         check_message(u'15\u20ac', "iso-8859-15", "15\xa4")
601 |         check_message(u'15\u20ac', "utf-8", '15\xe2\x82\xac')
602 |         check_message(u'15\u20ac', "utf-16-le", '1\x005\x00\xac\x20')
603 | 
604 |         # test the error handler
605 |         check_message(u'15\u20ac', "iso-8859-1:ignore", "15")
606 |         check_message(u'15\u20ac', "iso-8859-1:replace", "15?")
607 |         check_message(u'15\u20ac', "iso-8859-1:backslashreplace", "15\\u20ac")
608 | 
609 |         # test the buffer API
610 |         for objtype in ('buffer', 'bytearray'):
611 |             stdout = get_message('ascii',
612 |                 'import sys',
613 |                 r'sys.stdout.write(%s("\xe9"))' % objtype,
614 |                 'sys.stdout.flush()')
615 |             self.assertEqual(stdout, "\xe9")
616 | 
617 | 
618 | def test_main():
619 |     # Historically, these tests have been sloppy about removing TESTFN.
620 |     # So get rid of it no matter what.
621 |     try:
622 |         run_unittest(AutoFileTests, OtherFileTests, FileSubclassTests,
623 |             FileThreadingTests, StdoutTests)
624 |     finally:
625 |         if os.path.exists(TESTFN):
626 |             os.unlink(TESTFN)
627 | 
628 | if __name__ == '__main__':
629 |     test_main()
630 | 


--------------------------------------------------------------------------------
/lib/activepapers/__init__.py:
--------------------------------------------------------------------------------
1 | from activepapers.version import version as __version__
2 | 
3 | 


--------------------------------------------------------------------------------
/lib/activepapers/builtins2.py:
--------------------------------------------------------------------------------
1 | from __builtin__ import *
2 | from __builtin__ import __import__
3 | 
4 | del execfile
5 | del eval
6 | del file
7 | del open
8 | del raw_input
9 | 


--------------------------------------------------------------------------------
/lib/activepapers/builtins3.py:
--------------------------------------------------------------------------------
 1 | from builtins import *
 2 | from builtins import __import__
 3 | from builtins import __build_class__
 4 | 
 5 | # The "del exec" was removed and replaced by an equivalent operation
 6 | # in utility3 to avoid a syntax error when processing builtins by
 7 | # Python 2.
 8 | #
 9 | # del exec
10 | del eval
11 | del input
12 | del open
13 | try:
14 |     del quit
15 | except NameError:
16 |     pass
17 | 


--------------------------------------------------------------------------------
/lib/activepapers/cli.py:
--------------------------------------------------------------------------------
  1 | # Command line interface implementation
  2 | 
  3 | import fnmatch
  4 | import itertools as it
  5 | import os
  6 | import re
  7 | import subprocess
  8 | import sys
  9 | import time
 10 | import tempdir
 11 | 
 12 | import numpy
 13 | import h5py
 14 | 
 15 | import activepapers.storage
 16 | from activepapers.utility import ascii, datatype, mod_time, stamp, \
 17 |                                  timestamp, raw_input
 18 | 
 19 | class CLIExit(Exception):
 20 |     pass
 21 | 
 22 | def get_paper(input_filename):
 23 |     if input_filename is not None:
 24 |         return input_filename
 25 |     apfiles = [fn for fn in os.listdir('.') if fn.endswith('.ap')]
 26 |     if len(apfiles) == 1:
 27 |         return apfiles[0]
 28 |     sys.stderr.write("no filename given and ")
 29 |     if apfiles:
 30 |         sys.stderr.write("%d HDF5 files in current directory\n" % len(apfiles))
 31 |     else:
 32 |         sys.stderr.write("no HDF5 file in current directory\n")
 33 |     raise CLIExit
 34 | 
 35 | 
 36 | #
 37 | # Support for checkin/checkout/extract
 38 | #
 39 | 
 40 | extractable_types = ['calclet', 'importlet', 'module', 'file', 'text']
 41 | 
 42 | file_extensions = {('calclet', 'python'): '.py',
 43 |                    ('importlet', 'python'): '.py',
 44 |                    ('module', 'python'): '.py',
 45 |                    ('file', None): '',
 46 |                    ('text', 'HTML'): '.html',
 47 |                    ('text', 'LaTeX'): '.tex',
 48 |                    ('text', 'markdown'): '.md',
 49 |                    ('text', 'reStructuredText'): '.rst',
 50 |                    ('text', None): '.txt'}
 51 | 
 52 | file_languages = dict((_ext, _l)
 53 |                       for (_t, _l), _ext in file_extensions.items())
 54 | 
 55 | def extract_to_file(paper, item, file=None, filename=None, directory=None):
 56 |     if file is None:
 57 |         if filename is not None:
 58 |             filename = os.path.abspath(filename)
 59 |         if directory is not None:
 60 |             directory = os.path.abspath(directory)
 61 |         if filename is not None and directory is not None:
 62 |             if not filename.startswith(directory):
 63 |                 raise ValueError("% not in directory %s"
 64 |                                  % (filename, directory))
 65 |         if filename is None:
 66 |             item_name = item.name.split('/')[1:]
 67 |             filename = os.path.join(directory, *item_name)
 68 |             if '.' not in item_name[-1]:
 69 |                 # Add a file extension using some heuristics
 70 |                 language = item.attrs.get('ACTIVE_PAPER_LANGUAGE', None)
 71 |                 filename += file_extensions.get((datatype(item), language), '')
 72 |         directory, _ = os.path.split(filename)
 73 |         if directory and not os.path.exists(directory):
 74 |             os.makedirs(directory)
 75 |         file = open(filename, 'wb')
 76 |         close = True
 77 |     else:
 78 |         # If a file object is given, no other file specification is allowed
 79 |         assert filename is None
 80 |         assert directory is None
 81 |         close = False
 82 |     dt = datatype(item)
 83 |     if dt in ['file', 'text']:
 84 |         internal = activepapers.storage.InternalFile(item, 'rb')
 85 |         file.write(internal.read())
 86 |     elif dt in extractable_types:
 87 |         file.write(item[...].flat[0])
 88 |     else:
 89 |         raise ValueError("cannot extract dataset %s of type %s"
 90 |                          % (item.name, dt))
 91 |     if close:
 92 |         file.close()
 93 |         mtime = mod_time(item)
 94 |         if mtime:
 95 |             os.utime(filename, (mtime, mtime))
 96 |     return filename
 97 | 
 98 | def update_from_file(paper, filename, type=None,
 99 |                      force_update=False, dry_run=False,
100 |                      dataset_name=None, create_new=True):
101 |     if not os.path.exists(filename):
102 |         raise ValueError("File %s not found" % filename)
103 |     mtime = os.path.getmtime(filename)
104 |     basename = filename
105 |     ext = ''
106 |     if dataset_name is not None:
107 |         item = paper.file.get(dataset_name, None)
108 |         if item is not None:
109 |             basename = item.name
110 |     else:
111 |         item = paper.file.get(basename, None)
112 |         if item is None:
113 |             basename, ext = os.path.splitext(filename)
114 |             item = paper.file.get(basename, None)
115 |     language = file_languages.get(ext, None)
116 |     if item is None:
117 |         if not create_new:
118 |             return
119 |         # Create new item
120 |         if type is None:
121 |             raise ValueError("Datatype required to create new item %s"
122 |                              % basename)
123 |         if type in ['calclet', 'importlet', 'module']:
124 |             if not basename.startswith('code/'):
125 |                 raise ValueError("Items of type %s must be"
126 |                                  " in the code section"
127 |                                  % type)
128 |             if language != 'python':
129 |                 raise ValueError("Items of type %s must be Python code"
130 |                                  % type)
131 |             if type == 'module' and \
132 |                not basename.startswith('code/python-packages/'):
133 |                 raise ValueError("Items of type %s must be in"
134 |                                  "code/python-packages"
135 |                                  % type)
136 |         elif type == 'file':
137 |             if not basename.startswith('data/') \
138 |                and not basename.startswith('documentation/'):
139 |                 raise ValueError("Items of type %s must be"
140 |                                  " in the data or documentation section"
141 |                                  % type)
142 |             basename += ext
143 |         elif type == 'text':
144 |             if not basename.startswith('documentation/'):
145 |                 raise ValueError("Items of type %s must be"
146 |                                  " in the documentation section"
147 |                                  % type)
148 |     else:
149 |         # Update existing item
150 |         if mtime <= mod_time(item) and not force_update:
151 |             if dry_run:
152 |                 sys.stdout.write("Skip %s: file %s is not newer\n"
153 |                                  % (item.name, filename))
154 |             return
155 |         if type is not None and type != datatype(item):
156 |             raise ValueError("Cannot change datatype %s to %s"
157 |                               % (datatype(item), type))
158 |         if type is None:
159 |             type = datatype(item)
160 |         if language is None:
161 |             language = item.attrs.get('ACTIVE_PAPER_LANGUAGE', None)
162 |         if dry_run:
163 |             sys.stdout.write("Delete %s\n" % item.name)
164 |         else:
165 |             del item.parent[item.name.split('/')[-1]]
166 |     if dry_run:
167 |         fulltype = type if language is None else '/'.join((type, language))
168 |         sys.stdout.write("Create item %s of type %s from file %s\n"
169 |                          % (basename, fulltype, filename))
170 |     else:
171 |         if type in ['calclet', 'importlet', 'module']:
172 |             code = open(filename, 'rb').read().decode('utf-8')
173 |             item = paper.store_python_code(basename[5:], code)
174 |             stamp(item, type, {})
175 |             timestamp(item, mtime)
176 |         elif type in ['file', 'text']:
177 |             f = paper.open_internal_file(basename, 'w')
178 |             f.write(open(filename, 'rb').read())
179 |             f.close()
180 |             stamp(f._ds, type, {'ACTIVE_PAPER_LANGUAGE': language})
181 |             timestamp(f._ds, mtime)
182 | 
183 | def directory_pattern(pattern):
184 |     if pattern[-1] in "?*/":
185 |         return None
186 |     return pattern + "/*"
187 | 
188 | def process_patterns(patterns):
189 |     if patterns is None:
190 |         return None
191 |     patterns = sum([(p, directory_pattern(p)) for p in patterns], ())
192 |     patterns = [re.compile(fnmatch.translate(p))
193 |                 for p in patterns
194 |                 if p is not None]
195 |     return patterns
196 | 
197 | #
198 | #  Command handlers called from argparse
199 | #
200 | 
201 | def create(paper, d=None):
202 |     if paper is None:
203 |         sys.stderr.write("no paper given\n")
204 |         raise CLIExit
205 |     paper = activepapers.storage.ActivePaper(paper, 'w', d)
206 |     paper.close()
207 | 
208 | def ls(paper, long, type, pattern):
209 |     paper = get_paper(paper)
210 |     paper = activepapers.storage.ActivePaper(paper, 'r')
211 |     pattern = process_patterns(pattern)
212 |     for item in paper.iter_items():
213 |         name = item.name[1:] # remove initial slash
214 |         dtype = datatype(item)
215 |         if item.attrs.get('ACTIVE_PAPER_DUMMY_DATASET', False):
216 |             dtype = 'dummy'
217 |         if pattern and \
218 |            not any(p.match(name) for p in pattern):
219 |             continue
220 |         if type is not None and dtype != type:
221 |             continue
222 |         if long:
223 |             t = item.attrs.get('ACTIVE_PAPER_TIMESTAMP', None)
224 |             if t is None:
225 |                 sys.stdout.write(21*" ")
226 |             else:
227 |                 sys.stdout.write(time.strftime("%Y-%m-%d/%H:%M:%S  ",
228 |                                                time.localtime(t/1000.)))
229 |             field_len = len("importlet ")  # the longest data type name
230 |             sys.stdout.write((dtype + field_len*" ")[:field_len])
231 |             sys.stdout.write('*' if paper.is_stale(item) else ' ')
232 |         sys.stdout.write(name)
233 |         sys.stdout.write('\n')
234 |     paper.close()
235 | 
236 | def rm(paper, force, pattern):
237 |     paper_name = get_paper(paper)
238 |     paper = activepapers.storage.ActivePaper(paper_name, 'r')
239 |     deps = paper.dependency_graph()
240 |     pattern = process_patterns(pattern)
241 |     if not pattern:
242 |         return
243 |     names = set()
244 |     for item in it.chain(paper.iter_items(), paper.iter_groups()):
245 |         if any(p.match(item.name[1:]) for p in pattern):
246 |             names.add(item.name)
247 |     paper.close()
248 |     if not names:
249 |         return
250 |     while True:
251 |         new_names = set()
252 |         for name in names:
253 |             for dep in deps[name]:
254 |                 new_names.add(dep)
255 |         if new_names - names:
256 |             names |= new_names
257 |         else:
258 |             break
259 |     names = sorted(names)
260 |     if not force:
261 |         for name in names:
262 |             sys.stdout.write(name + '\n')
263 |         while True:
264 |             reply = raw_input("Delete ? (y/n) ")
265 |             if reply in "yn":
266 |                 break
267 |         if reply == 'n':
268 |             return
269 |     paper = activepapers.storage.ActivePaper(paper_name, 'r+')
270 |     most_recent_group = None
271 |     for name in names:
272 |         if most_recent_group and name.startswith(most_recent_group):
273 |             continue
274 |         if isinstance(paper.file[name], h5py.Group):
275 |             most_recent_group = name
276 |         try:
277 |             del paper.file[name]
278 |         except:
279 |             sys.stderr.write("Can't delete %s\n" % name)
280 |     paper.close()
281 | 
282 | def dummy(paper, force, pattern):
283 |     paper_name = get_paper(paper)
284 |     paper = activepapers.storage.ActivePaper(paper_name, 'r')
285 |     deps = paper.dependency_graph()
286 |     pattern = process_patterns(pattern)
287 |     if not pattern:
288 |         return
289 |     names = set()
290 |     for item in paper.iter_items():
291 |         if any(p.match(item.name[1:]) for p in pattern):
292 |             names.add(item.name)
293 |     paper.close()
294 |     if not names:
295 |         return
296 |     names = sorted(names)
297 |     if not force:
298 |         for name in names:
299 |             sys.stdout.write(name + '\n')
300 |         while True:
301 |             reply = raw_input("Replace by dummy datasets? (y/n) ")
302 |             if reply in "yn":
303 |                 break
304 |         if reply == 'n':
305 |             return
306 |     paper = activepapers.storage.ActivePaper(paper_name, 'r+')
307 |     for name in names:
308 |         try:
309 |             paper.replace_by_dummy(name)
310 |         except:
311 |             sys.stderr.write("Can't replace %s by dummy\n" % name)
312 |             raise
313 |     paper.close()
314 | 
315 | def set_(paper, dataset, expr):
316 |     paper = get_paper(paper)
317 |     paper = activepapers.storage.ActivePaper(paper, 'r+')
318 |     value = eval(expr, numpy.__dict__, {})
319 |     try:
320 |         del paper.data[dataset]
321 |     except KeyError:
322 |         pass
323 |     paper.data[dataset] = value
324 |     paper.close()
325 | 
326 | def group(paper, group_name):
327 |     if group_name.startswith('/'):
328 |         group_name = group_name[1:]
329 |     top_level = group_name.split('/')[0]
330 |     if top_level not in ['code', 'data', 'documentation']:
331 |         sys.stderr.write("invalid group name %s\n" % group_name)
332 |         raise CLIExit
333 |     paper = get_paper(paper)
334 |     paper = activepapers.storage.ActivePaper(paper, 'r+')
335 |     paper.file.create_group(group_name)
336 |     paper.close()
337 | 
338 | def extract(paper, dataset, filename):
339 |     paper = get_paper(paper)
340 |     paper = activepapers.storage.ActivePaper(paper, 'r')
341 |     ds = paper.file[dataset]
342 |     try:
343 |         if filename == '-':
344 |             extract_to_file(paper, ds, file=sys.stdout)
345 |         else:
346 |             extract_to_file(paper, ds, filename=filename)
347 |     except ValueError as exc:
348 |         sys.stderr.write(exc.args[0] + '\n')
349 |         raise CLIExit
350 | 
351 | def _script(paper, dataset, filename, run, create_method):
352 |     paper = get_paper(paper)
353 |     paper = activepapers.storage.ActivePaper(paper, 'r+')
354 |     script = open(filename).read()
355 |     codelet = getattr(paper, create_method)(dataset, script)
356 |     if run:
357 |         codelet.run()
358 |     paper.close()
359 | 
360 | def calclet(paper, dataset, filename, run):
361 |     _script(paper, dataset, filename, run, "create_calclet")
362 | 
363 | def importlet(paper, dataset, filename, run):
364 |     _script(paper, dataset, filename, run, "create_importlet")
365 | 
366 | def import_module(paper, module):
367 |     paper = get_paper(paper)
368 |     paper = activepapers.storage.ActivePaper(paper, 'r+')
369 |     paper.import_module(module)
370 |     paper.close()
371 | 
372 | def run(paper, codelet, debug, profile, checkin):
373 |     paper = get_paper(paper)
374 |     with activepapers.storage.ActivePaper(paper, 'r+') as paper:
375 |         if checkin:
376 |             for root, dirs, files in os.walk('code'):
377 |                 for f in files:
378 |                     filename = os.path.join(root, f)
379 |                     try:
380 |                         update_from_file(paper, filename)
381 |                     except ValueError as exc:
382 |                         sys.stderr.write(exc.args[0] + '\n')
383 |         try:
384 |             if profile is None:
385 |                 exc = paper.run_codelet(codelet, debug)
386 |             else:
387 |                 import cProfile, pstats
388 |                 pr = cProfile.Profile()
389 |                 pr.enable()
390 |                 exc = paper.run_codelet(codelet, debug)
391 |                 pr.disable()
392 |                 ps = pstats.Stats(pr)
393 |                 ps.dump_stats(profile)
394 |         except KeyError:
395 |             sys.stderr.write("Codelet %s does not exist\n" % codelet)
396 |             raise CLIExit
397 |         if exc is not None:
398 |             sys.stderr.write(exc)
399 | 
400 | def _find_calclet_for_dummy_or_stale_item(paper_name):
401 |     paper = activepapers.storage.ActivePaper(paper_name, 'r')
402 |     deps = paper.dependency_hierarchy()
403 |     next(deps) # the first set has no dependencies
404 |     calclet = None
405 |     item_name = None
406 |     for item_set in deps:
407 |         for item in item_set:
408 |             if paper.is_dummy(item) or paper.is_stale(item):
409 |                 item_name = item.name
410 |                 calclet = item.attrs['ACTIVE_PAPER_GENERATING_CODELET']
411 |                 break
412 |         # We must del item_set to prevent h5py from crashing when the
413 |         # file is closed. Presumably there are HDF5 handles being freed
414 |         # as a consequence of the del.
415 |         del item_set
416 |         if calclet is not None:
417 |             break
418 |     paper.close()
419 |     return calclet, item_name
420 | 
421 | def update(paper, verbose):
422 |     paper_name = get_paper(paper)
423 |     while True:
424 |         calclet, item_name = _find_calclet_for_dummy_or_stale_item(paper_name)
425 |         if calclet is None:
426 |             break
427 |         if verbose:
428 |             sys.stdout.write("Dataset %s is stale or dummy, running %s\n"
429 |                              % (item_name, calclet))
430 |             sys.stdout.flush()
431 |         paper = activepapers.storage.ActivePaper(paper_name, 'r+')
432 |         paper.run_codelet(calclet)
433 |         paper.close()
434 | 
435 | def checkin(paper, type, file, force, dry_run):
436 |     paper = get_paper(paper)
437 |     paper = activepapers.storage.ActivePaper(paper, 'r+')
438 |     cwd = os.path.abspath(os.getcwd())
439 |     for filename in file:
440 |         filename = os.path.abspath(filename)
441 |         if not filename.startswith(cwd):
442 |             sys.stderr.write("File %s is not in the working directory\n"
443 |                              % filename)
444 |             raise CLIExit
445 |         filename = filename[len(cwd)+1:]
446 | 
447 |         def update(filename):
448 |             try:
449 |                 update_from_file(paper, filename, type, force, dry_run)
450 |             except ValueError as exc:
451 |                 sys.stderr.write(exc.args[0] + '\n')
452 | 
453 |         if os.path.isdir(filename):
454 |             for root, dirs, files in os.walk(filename):
455 |                 for f in files:
456 |                     update(os.path.join(root, f))
457 |         else:
458 |             update(filename)
459 | 
460 |     paper.close()
461 | 
462 | def checkout(paper, type, pattern, dry_run):
463 |     paper = get_paper(paper)
464 |     paper = activepapers.storage.ActivePaper(paper, 'r')
465 |     pattern = process_patterns(pattern)
466 |     for item in paper.iter_items():
467 |         name = item.name[1:] # remove initial slash
468 |         dtype = datatype(item)
469 |         if pattern and \
470 |            not any(p.match(name) for p in pattern):
471 |             continue
472 |         if type is not None and dtype != type:
473 |             continue
474 |         try:
475 |             extract_to_file(paper, item, directory=os.getcwd())
476 |         except ValueError:
477 |             sys.stderr.write("Skipping %s: data type %s not extractable\n"
478 |                              % (item.name, datatype(item)))
479 |     paper.close()
480 | 
481 | def ln(paper, reference, name):
482 |     ref_parts = reference.split(':')
483 |     if len(ref_parts) != 3:
484 |         sys.stderr.write('Invalid reference %s\n' % reference)
485 |         raise CLIExit
486 |     ref_type, ref_name, ref_path = ref_parts
487 |     with activepapers.storage.ActivePaper(get_paper(paper), 'r+') as paper:
488 |         if ref_path == '':
489 |             ref_path = None
490 |         paper.create_ref(name, ref_type + ':' + ref_name, ref_path)
491 |     
492 | def cp(paper, reference, name):
493 |     ref_parts = reference.split(':')
494 |     if len(ref_parts) != 3:
495 |         sys.stderr.write('Invalid reference %s\n' % reference)
496 |         raise CLIExit
497 |     ref_type, ref_name, ref_path = ref_parts
498 |     with activepapers.storage.ActivePaper(get_paper(paper), 'r+') as paper:
499 |         if ref_path == '':
500 |             ref_path = None
501 |         paper.create_copy(name, ref_type + ':' + ref_name, ref_path)
502 | 
503 | def refs(paper, verbose):
504 |     paper = get_paper(paper)
505 |     paper = activepapers.storage.ActivePaper(paper, 'r')
506 |     refs = paper.external_references()
507 |     paper.close()
508 |     sorted_refs = sorted(refs.keys())
509 |     for ref in sorted_refs:
510 |         sys.stdout.write(ref.decode('utf-8') + '\n')
511 |         if verbose:
512 |             links, copies = refs[ref]
513 |             if links:
514 |                 sys.stdout.write("  links:\n")
515 |                 for l in links:
516 |                     sys.stdout.write("    %s\n" % l)
517 |             if copies:
518 |                 sys.stdout.write("  copies:\n")
519 |                 for c in copies:
520 |                     sys.stdout.write("    %s\n" % c)
521 | 
522 | def edit(paper, dataset):
523 |     editor = os.getenv("EDITOR", "vi")
524 |     paper_name = get_paper(paper)
525 |     with tempdir.TempDir() as t:
526 |         paper = activepapers.storage.ActivePaper(paper_name, 'r')
527 |         ds = paper.file[dataset]
528 |         try:
529 |             filename = extract_to_file(paper, ds, directory=str(t))
530 |         except ValueError as exc:
531 |             sys.stderr.write(exc.args[0] + '\n')
532 |             raise CLIExit
533 |         finally:
534 |             paper.close()
535 |         ret = subprocess.call([editor, filename])
536 |         if ret == 0:
537 |             paper = activepapers.storage.ActivePaper(paper_name, 'r+')
538 |             try:
539 |                 update_from_file(paper, filename,
540 |                                  dataset_name=dataset, create_new=False)
541 |             finally:
542 |                 paper.close()
543 | 
544 | def console(paper, modify):
545 |     import code
546 |     paper = get_paper(paper)
547 |     paper = activepapers.storage.ActivePaper(paper, 'r+' if modify else 'r')
548 |     data = paper.data
549 |     environment = {'data': paper.data}
550 |     code.interact(banner = "ActivePapers interactive console",
551 |                   local = environment)
552 |     paper.close()
553 | 
554 | def ipython(paper, modify):
555 |     import IPython
556 |     paper = get_paper(paper)
557 |     paper = activepapers.storage.ActivePaper(paper, 'r+' if modify else 'r')
558 |     data = paper.data
559 |     IPython.embed()
560 |     paper.close()
561 | 


--------------------------------------------------------------------------------
/lib/activepapers/contents.py:
--------------------------------------------------------------------------------
 1 | # This module is not used by code running inside an ActivePaper,
 2 | # because the ActivePaper runtime system (execution.py) creates
 3 | # a specific module on the fly. This generic module
 4 | # is used when activepapers.contents is imported from a
 5 | # standard Python script. It is meant to be facilitate
 6 | # development of codelets for ActivePaper in a standard
 7 | # Python development environment.
 8 | 
 9 | 
10 | # Locate the (hopefully only) ActivePaper in the current directory
11 | import os
12 | apfiles = [fn for fn in os.listdir('.') if fn.endswith('.ap')]
13 | if len(apfiles) != 1:
14 |     raise IOError("directory contains %s ActivePapers" % len(apfiles))
15 | del os
16 | 
17 | # Open the paper read-only
18 | from activepapers.storage import ActivePaper
19 | _paper = ActivePaper(apfiles[0], 'r')
20 | del apfiles
21 | del ActivePaper
22 | 
23 | # Emulate the internal activepapers.contents module
24 | data = _paper.data
25 | 
26 | def _open(filename, mode, section):
27 |     from activepapers.utility import path_in_section
28 |     path = path_in_section(filename, section)
29 |     if not path.startswith('/'):
30 |         path = section + '/' + path
31 |     assert mode == 'r'
32 |     return _paper.open_internal_file(path, 'r', None)
33 | 
34 | def open(filename, mode='r'):
35 |     return _open(filename, mode, '/data')
36 | 
37 | def open_documentation(filename, mode='r'):
38 |     return _open(filename, mode, '/documentation')
39 | 
40 | def exception_traceback():
41 |     raise NotImplementedError()
42 | 
43 | # Make the code in the ActivePapers importable
44 | import activepapers.execution
45 | def _get_codelet_and_paper():
46 |     return None, _paper
47 | activepapers.execution.get_codelet_and_paper = _get_codelet_and_paper
48 | del _get_codelet_and_paper
49 | 


--------------------------------------------------------------------------------
/lib/activepapers/execution.py:
--------------------------------------------------------------------------------
  1 | import imp
  2 | import collections
  3 | import os
  4 | import sys
  5 | import threading
  6 | import traceback
  7 | import weakref
  8 | import logging
  9 | 
 10 | import h5py
 11 | import numpy as np
 12 | 
 13 | import activepapers.utility
 14 | from activepapers.utility import ascii, utf8, isstring, execcode, \
 15 |                                  codepath, datapath, path_in_section, owner, \
 16 |                                  datatype, language, \
 17 |                                  timestamp, stamp, ms_since_epoch
 18 | import activepapers.standardlib
 19 | 
 20 | #
 21 | # A codelet is a Python script inside a paper.
 22 | #
 23 | # Codelets come in several varieties:
 24 | #
 25 | #  - Calclets can only access datasets inside the paper.
 26 | #    Their computations are reproducible.
 27 | #
 28 | #  - Importlets create datasets in the paper based on external resources.
 29 | #    Their results are not reproducible, and in general they are not
 30 | #    executable in a different environment. They are stored as documentation
 31 | #    and for manual re-execution.
 32 | #
 33 | 
 34 | class Codelet(object):
 35 | 
 36 |     def __init__(self, paper, node):
 37 |         self.paper = paper
 38 |         self.node = node
 39 |         self._dependencies = None
 40 |         assert node.name.startswith('/code/')
 41 |         self.path = node.name
 42 | 
 43 |     def dependency_attributes(self):
 44 |         if self._dependencies is None:
 45 |             return {'ACTIVE_PAPER_GENERATING_CODELET': self.path}
 46 |         else:
 47 |             deps = list(self._dependencies)
 48 |             deps.append(ascii(self.path))
 49 |             deps.sort()
 50 |             return {'ACTIVE_PAPER_GENERATING_CODELET': self.path,
 51 |                     'ACTIVE_PAPER_DEPENDENCIES': deps}
 52 | 
 53 |     def add_dependency(self, dependency):
 54 |         pass
 55 | 
 56 |     def owns(self, node):
 57 |         return owner(node) == self.path
 58 | 
 59 |     def _open_file(self, path, mode, encoding, section):
 60 |         if path.startswith(os.path.expanduser('~')):
 61 |             # Catch obvious attempts to access real files
 62 |             # rather than internal ones.
 63 |             raise IOError((13, "Permission denied: '%s'" % path))
 64 |         path = path_in_section(path, section)
 65 |         if not path.startswith('/'):
 66 |             path = section + '/' + path
 67 |         f = self.paper.open_internal_file(path, mode, encoding, self)
 68 |         f._set_attribute_callback(self.dependency_attributes)
 69 |         if mode[0] == 'r':
 70 |             self.add_dependency(f._ds.name)
 71 |         return f
 72 | 
 73 |     def open_data_file(self, path, mode='r', encoding=None):
 74 |         return self._open_file(path, mode, encoding, '/data')
 75 | 
 76 |     def open_documentation_file(self, path, mode='r', encoding=None):
 77 |         return self._open_file(path, mode, encoding, '/documentation')
 78 | 
 79 |     def exception_traceback(self):
 80 |         from traceback import extract_tb, print_exc
 81 |         import sys
 82 |         tb = sys.exc_info()[2]
 83 |         node, line, fn_name, _ = extract_tb(tb, limit=2)[1]
 84 |         paper_id, path = node.split(':')
 85 |         return CodeFile(self.paper, self.paper.file[path]), line, fn_name
 86 | 
 87 |     def _run(self, environment):
 88 |         logging.info("Running %s %s"
 89 |                      % (self.__class__.__name__.lower(), self.path))
 90 |         self.paper.remove_owned_by(self.path)
 91 |         # A string uniquely identifying the paper from which the
 92 |         # calclet is called. Used in Importer.
 93 |         script = utf8(self.node[...].flat[0])
 94 |         script = compile(script, ':'.join([self.paper._id(), self.path]), 'exec')
 95 |         self._contents_module = imp.new_module('activepapers.contents')
 96 |         self._contents_module.data = DataGroup(self.paper, None,
 97 |                                                self.paper.data_group, self)
 98 |         self._contents_module.code = CodeGroup(self.paper,
 99 |                                                self.paper.code_group)
100 |         self._contents_module.open = self.open_data_file
101 |         self._contents_module.open_documentation = self.open_documentation_file
102 |         self._contents_module.snapshot = self.paper.snapshot
103 |         self._contents_module.exception_traceback = self.exception_traceback
104 | 
105 |         # The remaining part of this method is not thread-safe because
106 |         # of the way the global state in sys.modules is modified.
107 |         with codelet_lock:
108 |             try:
109 |                 codelet_registry[(self.paper._id(), self.path)] = self
110 |                 for name, module in self.paper._local_modules.items():
111 |                     assert name not in sys.modules
112 |                     sys.modules[name] = module
113 |                 sys.modules['activepapers.contents'] = self._contents_module
114 |                 execcode(script, environment)
115 |             finally:
116 |                 del codelet_registry[(self.paper._id(), self.path)]
117 |                 self._contents_module = None
118 |                 if 'activepapers.contents' in sys.modules:
119 |                     del sys.modules['activepapers.contents']
120 |                 for name, module in self.paper._local_modules.items():
121 |                     del sys.modules[name]
122 | 
123 | codelet_lock = threading.Lock()
124 | 
125 | #
126 | # Importlets are run in the normal Python environment, with in
127 | # addition access to the special module activepapers.contents.
128 | #
129 | # All data generation is traced during importlet execution in order to
130 | # build the dependency graph.
131 | #
132 | # Importlets are be allowed to read dataset except those they
133 | # generated themselves. This is not enforced at the moment.
134 | #
135 | 
136 | class Importlet(Codelet):
137 | 
138 |     def run(self):
139 |         environment = {'__builtins__': activepapers.utility.builtins.__dict__}
140 |         self._run(environment)
141 | 
142 |     def track_and_check_import(self, module_name):
143 |         return
144 | 
145 | #
146 | # Calclets are run in a restricted execution environment:
147 | #  - many items removed from __builtins__
148 | #  - modified __import__ for tracking and verifying imports
149 | #  - an import hook for accessing modules stored in the paper
150 | #
151 | # All data access and data generation is traced during calclet
152 | # execution in order to build the dependency graph.
153 | #
154 | 
155 | class Calclet(Codelet):
156 | 
157 |     def run(self):
158 |         self._dependencies = set()
159 |         environment = {'__builtins__':
160 |                        activepapers.utility.ap_builtins.__dict__}
161 |         self._run(environment)
162 | 
163 |     def add_dependency(self, dependency):
164 |         assert isinstance(self._dependencies, set)
165 |         self._dependencies.add(ascii(dependency))
166 | 
167 |     def track_and_check_import(self, module_name):
168 |         if module_name == 'activepapers.contents':
169 |             return
170 |         node = self.paper.get_local_module(module_name)
171 |         if node is None:
172 |             top_level = module_name.split('.')[0]
173 |             if top_level not in self.paper.dependencies \
174 |                and top_level not in activepapers.standardlib.allowed_modules \
175 |                and top_level not in ['numpy', 'h5py']:
176 |                 raise ImportError("import of %s not allowed" % module_name)
177 |         else:
178 |             if datatype(node) != "module":
179 |                 node = node.get("__init__", None)
180 |             if node is not None and node.in_paper(self.paper):
181 |                 self.add_dependency(node.name)
182 | 
183 | 
184 | #
185 | # The attrs attribute of datasets and groups is wrapped
186 | # by a class that makes the attributes used by ACTIVE_PAPERS
187 | # invisible to calclet code.
188 | #
189 | 
190 | class AttrWrapper(collections.MutableMapping):
191 | 
192 |     def __init__(self, node):
193 |         self._node = node
194 | 
195 |     @classmethod
196 |     def forbidden(cls, key):
197 |         return isstring(key) and key.startswith('ACTIVE_PAPER')
198 | 
199 |     def __len__(self):
200 |         return len([k for k in self._node.attrs
201 |                     if not AttrWrapper.forbidden(k)])
202 | 
203 |     def __iter__(self):
204 |         for k in self._node.attrs:
205 |             if not AttrWrapper.forbidden(k):
206 |                 yield k
207 | 
208 |     def __contains__(self, item):
209 |         if AttrWrapper.forbidden(item):
210 |             return False
211 |         return item in self._node.attrs
212 | 
213 |     def __getitem__(self, item):
214 |         if AttrWrapper.forbidden(item):
215 |             raise KeyError(item)
216 |         return self._node.attrs[item]
217 | 
218 |     def __setitem__(self, item, value):
219 |         if AttrWrapper.forbidden(item):
220 |             raise ValueError(item)
221 |         self._node.attrs[item] = value
222 | 
223 |     def __delitem__(self, item):
224 |         if AttrWrapper.forbidden(item):
225 |             raise KeyError(item)
226 |         del self._node.attrs[item]
227 | 
228 | 
229 | #
230 | # Datasets are wrapped by a class that traces all accesses for
231 | # building the dependency graph.
232 | #
233 | 
234 | class DatasetWrapper(object):
235 | 
236 |     def __init__(self, parent, ds, codelet):
237 |         self._parent = parent
238 |         self._node = ds
239 |         self._codelet = codelet
240 |         self.attrs = AttrWrapper(ds)
241 |         self.ref = ds.ref
242 | 
243 |     @property
244 |     def parent(self):
245 |         return self._parent
246 | 
247 |     def __len__(self):
248 |         return len(self._node)
249 | 
250 |     def __getitem__(self, item):
251 |         return self._node[item]
252 | 
253 |     def __setitem__(self, item, value):
254 |         self._node[item] = value
255 |         stamp(self._node, "data", self._codelet.dependency_attributes())
256 | 
257 |     def __getattr__(self, attr):
258 |         return getattr(self._node, attr)
259 | 
260 |     def read_direct(dest, source_sel=None, dest_sel=None):
261 |         return self._node.read_direct(dest, source_sel, dest_sel)
262 | 
263 |     def resize(self, size, axis=None):
264 |         self._node.resize(size, axis)
265 |         stamp(self._node, "data", self._codelet.dependency_attributes())
266 | 
267 |     def write_direct(source, source_sel=None, dest_sel=None):
268 |         self._node.write_direct(source, source_sel, dest_sel)
269 |         stamp(self._node, "data", self._codelet.dependency_attributes())
270 | 
271 |     def __repr__(self):
272 |         codelet = owner(self._node)
273 |         if codelet is None:
274 |             owned = ""
275 |         else:
276 |             owned = " generated by %s" % codelet
277 |         lines = ["Dataset %s%s" % (self._node.name, owned)]
278 |         nelems = np.product(self._node.shape)
279 |         if nelems < 100:
280 |             lines.append(str(self._node[...]))
281 |         else:
282 |             lines.append("shape %s, dtype %s"
283 |                          % (repr(self._node.shape), str(self._node.dtype)))
284 |         return "\n".join(lines)
285 | 
286 | #
287 | # DataGroup is a wrapper class for the "data" group in a paper.
288 | # The wrapper traces access and creation of subgroups and datasets
289 | # for building the dependency graph. It also maintains the illusion
290 | # that the data subgroup is all there is in the HDF5 file.
291 | #
292 | 
293 | class DataGroup(object):
294 | 
295 |     def __init__(self, paper, parent, h5group, codelet, data_item=None):
296 |         self._paper = paper
297 |         self._parent = parent if parent is not None else self
298 |         self._node = h5group
299 |         self._codelet = codelet
300 |         self._data_item = data_item
301 |         if self._data_item is None and datatype(h5group) == "data":
302 |             self._data_item = self
303 |         self.attrs = AttrWrapper(h5group)
304 |         self.ref = h5group.ref
305 |         self.name = h5group.name
306 | 
307 |     @property
308 |     def parent(self):
309 |         return self._parent
310 | 
311 |     def _wrap_and_track_dependencies(self, node):
312 |         ap_type = datatype(node)
313 |         if ap_type == 'reference':
314 |             from activepapers.storage import dereference
315 |             paper, node = dereference(node)
316 |             if node.name.startswith('/data/'):
317 |                 node = paper.data[node.name[6:]]
318 |             elif isinstance(node, h5py.Group):
319 |                 node = DataGroup(paper, None, node, None, None)
320 |             else:
321 |                 node = DatasetWrapper(None, node, None)
322 |         else:
323 |             if self._codelet is not None:
324 |                 if ap_type is not None and ap_type != "group":
325 |                     self._codelet.add_dependency(node.name
326 |                                                  if self._data_item is None
327 |                                                  else self._data_item.name)
328 |                 codelet = owner(node)
329 |                 if codelet is not None \
330 |                    and datatype(self._node[codelet]) == "calclet":
331 |                     self._codelet.add_dependency(codelet)
332 |             if isinstance(node, h5py.Group):
333 |                 node = DataGroup(self._paper, self, node,
334 |                                  self._codelet, self._data_item)
335 |             else:
336 |                 node = DatasetWrapper(self, node, self._codelet)
337 |         return node
338 | 
339 |     def _stamp_new_node(self, node, ap_type):
340 |         if self._data_item:
341 |             stamp(self._data_item._node, "data",
342 |                   self._codelet.dependency_attributes())
343 |         else:
344 |             stamp(node, ap_type, self._codelet.dependency_attributes())
345 | 
346 |     def __len__(self):
347 |         return len(self._node)
348 | 
349 |     def __iter__(self):
350 |         for x in self._node:
351 |             yield x
352 | 
353 |     def __getitem__(self, path_or_ref):
354 |         if isstring(path_or_ref):
355 |             path = datapath(path_or_ref)
356 |         else:
357 |             path = self._node[path_or_ref].name
358 |             assert path.startswith('/data')
359 |         path = path.split('/')
360 |         if path[0] == '':
361 |             # datapath() ensures that path must start with
362 |             # ['', 'data'] in this case. Move up the parent
363 |             # chain to the root of the /data hierarchy.
364 |             path = path[2:]
365 |             node = self
366 |             while node is not node.parent:
367 |                 node = node.parent
368 |         else:
369 |             node = self
370 |         for element in path:
371 |             node = node._wrap_and_track_dependencies(node._node[element])
372 |         return node
373 | 
374 |     def get(self, path, default=None):
375 |         try:
376 |             return self[path]
377 |         except KeyError:
378 |             return default
379 | 
380 |     def __setitem__(self, path, value):
381 |         path = datapath(path)
382 |         needs_stamp = False
383 |         if isinstance(value, (DataGroup, DatasetWrapper)):
384 |             value = value._node
385 |         else:
386 |             needs_stamp = True
387 |         self._node[path] = value
388 |         if needs_stamp:
389 |             node = self._node[path]
390 |             stamp(node, "data", self._codelet.dependency_attributes())
391 | 
392 |     def __delitem__(self, path):
393 |         test = self._node[datapath(path)]
394 |         if owner(test) == self._codelet.path:
395 |             del self._node[datapath(path)]
396 |         else:
397 |             raise ValueError("%s trying to remove data created by %s"
398 |                              % (str(self._codelet.path), str(owner(test))))
399 | 
400 |     def create_group(self, path):
401 |         group = self._node.create_group(datapath(path))
402 |         self._stamp_new_node(group, "group")
403 |         return DataGroup(self._paper, self, group,
404 |                          self._codelet, self._data_item)
405 | 
406 |     def require_group(self, path):
407 |         group = self._node.require_group(datapath(path))
408 |         self._stamp_new_node(group, "group")
409 |         return DataGroup(self._paper, self, group,
410 |                          self._codelet, self._data_item)
411 | 
412 |     def mark_as_data_item(self):
413 |         stamp(self._node, "data", self._codelet.dependency_attributes())
414 |         self._data_item = self
415 | 
416 |     def create_dataset(self, path, *args, **kwargs):
417 |         ds = self._node.create_dataset(datapath(path), *args, **kwargs)
418 |         self._stamp_new_node(ds, "data")
419 |         return DatasetWrapper(self, ds, self._codelet)
420 | 
421 |     def require_dataset(self, path, *args, **kwargs):
422 |         ds = self._node.require_dataset(datapath(path), *args, **kwargs)
423 |         self._stamp_new_node(ds, "data")
424 |         return DatasetWrapper(self, ds, self._codelet)
425 | 
426 |     def visit(self, func):
427 |         self._node.visit(func)
428 | 
429 |     def visititems(self, func):
430 |         self._node.visititems(func)
431 | 
432 |     def copy(source, dest, name=None):
433 |         raise NotImplementedError("not yet implemented")
434 | 
435 |     def flush(self):
436 |         self._paper.flush()
437 | 
438 |     def __repr__(self):
439 |         codelet = owner(self._node)
440 |         if codelet is None:
441 |             owned = ""
442 |         else:
443 |             owned = " generated by %s" % codelet
444 |         items = list(self._node)
445 |         if not items:
446 |             lines = ["Empty group %s%s" % (self._node.name, owned)]
447 |         else:
448 |             lines = ["Group %s%s containing" % (self._node.name, owned)]
449 |             lines.extend("   "+i for i in items)
450 |         return "\n".join(lines)
451 | 
452 | #
453 | # CodeGroup is a wrapper class for the "code" group in a paper.
454 | # The wrapper provide read-only access to codelets and modules.
455 | #
456 | 
457 | class CodeGroup(object):
458 | 
459 |     def __init__(self, paper, node):
460 |         self._paper = paper
461 |         self._node = node
462 | 
463 |     def __len__(self):
464 |         return len(self._node)
465 | 
466 |     def __iter__(self):
467 |         for x in self._node:
468 |             yield x
469 | 
470 |     def __getitem__(self, path_or_ref):
471 |         if isstring(path_or_ref):
472 |             path = codepath(path_or_ref)
473 |         else:
474 |             path = self._node[path_or_ref].name
475 |             assert path.startswith('/code')
476 |         node = self._node[path]
477 |         if isinstance(node, h5py.Group):
478 |             return CodeGroup(self._paper, node)
479 |         else:
480 |             return CodeFile(self._paper, node)
481 | 
482 |     def __repr__(self):
483 |         return "<CodeGroup %s>" % self._node.name
484 | 
485 | class CodeFile(object):
486 | 
487 |     def __init__(self, paper, node):
488 |         self._paper = paper
489 |         self._node = node
490 |         self.type = datatype(node)
491 |         self.language = language(node)
492 |         self.name = node.name
493 |         self.code = utf8(node[...].flat[0])
494 | 
495 |     def __repr__(self):
496 |         return "<%s %s (%s)>" % (self.type, self.name, self.language)
497 | 
498 | #
499 | # Initialize a paper registry that permits finding a paper
500 | # object through a unique id stored in the codelet names,
501 | # and a codelet registry for retrieving active codelets.
502 | #
503 | 
504 | paper_registry = weakref.WeakValueDictionary()
505 | codelet_registry = weakref.WeakValueDictionary()
506 | 
507 | #
508 | # Identify calls from inside a codelet in order to apply
509 | # the codelet-specific import rules.
510 | #
511 | 
512 | def get_codelet_and_paper():
513 |     """
514 |     :returns: the codelet from which this function was called,
515 |               and the paper containing it. Both values are None
516 |               if there is no codelet in the call chain.
517 |     """
518 |     # Get the name of the source code file of the current
519 |     # module, which is also the module containing the Codelet class.
520 |     this_module = __file__
521 |     if os.path.splitext(this_module)[1] in ['.pyc', '.pyo']:
522 |             this_module = this_module[:-1]
523 |     # Get call stack minus the last entry, which is the
524 |     # method find_module itself.
525 |     stack = traceback.extract_stack()[:-1]
526 |     # Look for the entry corresponding to Codelet.run()
527 |     in_codelet = False
528 |     for filename, line_no, fn_name, command in stack:
529 |         if filename == this_module \
530 |            and command == "execcode(script, environment)":
531 |             in_codelet = True
532 |     if not in_codelet:
533 |         return None, None
534 |     # Look for an entry corresponding to codelet code.
535 |     # Extract its paper_id and use it to look up the paper
536 |     # in the registry.
537 |     for item in stack:
538 |         module_ref = item[0].split(':')
539 |         if len(module_ref) != 2:
540 |             # module_ref is a real filename
541 |             continue
542 |         paper_id, codelet = module_ref
543 |         if not codelet.startswith('/code'):
544 |             # module_ref is something other than a paper:codelet combo
545 |             return None, None
546 |         return codelet_registry.get((paper_id, codelet), None), \
547 |                paper_registry.get(paper_id, None)
548 |     return None, None
549 | 
550 | #
551 | # Install an importer for accessing Python modules inside papers
552 | #
553 | 
554 | class Importer(object):
555 | 
556 |     def find_module(self, fullname, path=None):
557 |         codelet, paper = get_codelet_and_paper()
558 |         if paper is None:
559 |             return None
560 |         node = paper.get_local_module(fullname)
561 |         if node is None:
562 |             # No corresponding node found
563 |             return None
564 |         is_package = False
565 |         if node.is_group():
566 |             # Node is a group, so this should be a package
567 |             if '__init__' not in node:
568 |                 # Not a package
569 |                 return None
570 |             is_package = True
571 |             node = node['__init__']
572 |         if datatype(node) != "module" \
573 |            or ascii(node.attrs.get("ACTIVE_PAPER_LANGUAGE", "")) != "python":
574 |             # Node found but is not a Python module
575 |             return None
576 |         return ModuleLoader(paper, fullname, node, is_package)
577 | 
578 | 
579 | class ModuleLoader(object):
580 | 
581 |     def __init__(self, paper, fullname, node, is_package):
582 |         self.paper = paper
583 |         self.fullname = fullname
584 |         self.node = node
585 |         # Python 3.4 has special treatment for loaders that
586 |         # have an attribute 'is_package'.
587 |         self._is_package = is_package
588 | 
589 |     def load_module(self, fullname):
590 |         assert fullname == self.fullname
591 |         if fullname in sys.modules:
592 |             module = sys.modules[fullname]
593 |             loader = getattr(module, '__loader__', None)
594 |             if isinstance(loader, ModuleLoader):
595 |                 assert loader.paper is self.paper
596 |             return module
597 |         code = compile(ascii(self.node[...].flat[0]),
598 |                        ':'.join([self.paper._id(), self.node.name]),
599 |                        'exec')
600 |         module = imp.new_module(fullname)
601 |         module.__file__ = os.path.abspath(self.node.file.filename) + ':' + \
602 |                           self.node.name
603 |         module.__loader__ = self
604 |         if self._is_package:
605 |             module.__path__ = []
606 |             module.__package__ = fullname
607 |         else:
608 |             module.__package__ = fullname.rpartition('.')[0]
609 |         sys.modules[fullname] = module
610 |         self.paper._local_modules[fullname] = module
611 |         try:
612 |             execcode(code, module.__dict__)
613 |         except:
614 |             del sys.modules[fullname]
615 |             del self.paper._local_modules[fullname]
616 |             raise
617 |         return module
618 | 
619 | sys.meta_path.insert(0, Importer())
620 | 
621 | #
622 | # Install an import hook for intercepting imports from codelets
623 | #
624 | 
625 | standard__import__ = __import__
626 | def ap__import__(*args, **kwargs):
627 |     codelet, paper = get_codelet_and_paper()
628 |     if codelet is not None:
629 |         codelet.track_and_check_import(args[0])
630 |     return standard__import__(*args, **kwargs)
631 | activepapers.utility.ap_builtins.__import__ = ap__import__
632 | 


--------------------------------------------------------------------------------
/lib/activepapers/exploration.py:
--------------------------------------------------------------------------------
 1 | # An API for opening ActivePapers read-only for exploration of their
 2 | # contents, including re-use of the code.
 3 | 
 4 | from activepapers.storage import ActivePaper as ActivePaperStorage
 5 | from activepapers.storage import open_paper_ref
 6 | from activepapers.utility import path_in_section
 7 | 
 8 | class ActivePaper(object):
 9 | 
10 |     def __init__(self, file_or_ref, use_code=True):
11 |         global _paper_for_code
12 |         try:
13 |             self.paper = open_paper_ref(file_or_ref)
14 |         except ValueError:
15 |             self.paper = ActivePaperStorage(file_or_ref, 'r')
16 |         if use_code and ("python-packages" not in self.paper.code_group \
17 |                          or len(self.paper.code_group["python-packages"]) == 0):
18 |             # The paper contains no importable modules or packages.
19 |             use_code = False
20 |         if use_code and _paper_for_code is not None:
21 |             raise IOError("Only one ActivePaper per process can use code.")
22 |         self.data = self.paper.data
23 |         self.documentation = self.paper.documentation_group
24 |         self.code = self.paper.code_group
25 |         try:
26 |             self.__doc__ = self.open_documentation('README').read()
27 |         except KeyError:
28 |             pass
29 |         if use_code:
30 |             _paper_for_code = self.paper
31 | 
32 |     def close(self):
33 |         global _paper_for_code
34 |         if _paper_for_code is self.paper:
35 |             _paper_for_code = None
36 | 
37 |     def _open(self, path, section, mode='r'):
38 |         if mode not in ['r', 'rb']:
39 |             raise ValueError("invalid mode: " + repr(mode))
40 |         path = path_in_section(path, section)
41 |         if not path.startswith('/'):
42 |             path = section + '/' + path
43 |         return self.paper.open_internal_file(path, mode, 'utf8', None)
44 | 
45 |     def open(self, path, mode='r'):
46 |         return self._open(path, '/data', mode)
47 | 
48 |     def open_documentation(self, path, mode='r'):
49 |         return self._open(path, '/documentation', mode)
50 | 
51 |     def read_code(self, file):
52 |         return self.code[file][...].ravel()[0].decode('utf-8')
53 | 
54 | _paper_for_code = None
55 | def _get_codelet_and_paper():
56 |     return None, _paper_for_code
57 | import activepapers.execution
58 | activepapers.execution.get_codelet_and_paper = _get_codelet_and_paper
59 | del _get_codelet_and_paper
60 | 


--------------------------------------------------------------------------------
/lib/activepapers/library.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from activepapers import url
  4 | 
  5 | #
  6 | # The ACTIVEPAPERS_LIBRARY environment variable follows the
  7 | # same conventions as PATH under Unix.
  8 | #
  9 | library = os.environ.get('ACTIVEPAPERS_LIBRARY', None)
 10 | if library is None:
 11 |     # This is Unix-only, needs a Windows equivalent
 12 |     home = os.environ.get('HOME', None)
 13 |     if home is None:
 14 |         library = ""
 15 |     else:
 16 |         library = os.path.join(home, '.activepapers')
 17 |         if not os.path.exists(library):
 18 |             try:
 19 |                 os.mkdir(library)
 20 |             except (IOError, OSError):
 21 |                 library = ""
 22 |         if not os.path.exists(library):
 23 |             library = ""
 24 | 
 25 | library = library.split(':')
 26 | 
 27 | def split_paper_ref(paper_ref):
 28 |     index = paper_ref.find(':')
 29 |     if index == -1:
 30 |         raise ValueError("invalid paper reference %s" % paper_ref)
 31 |     return paper_ref[:index].lower(), paper_ref[index+1:]
 32 | 
 33 | 
 34 | #
 35 | # Return the local filename for a paper reference,
 36 | # after downloading the file if required.
 37 | #
 38 | 
 39 | def _get_local_file(label):
 40 |     filename = label + '.ap'
 41 |     for dir in library:
 42 |         full_name = os.path.join(dir, "local", filename)
 43 |         if os.path.exists(full_name):
 44 |             return full_name
 45 |     raise IOError(2, "No such ActivePaper: 'local:%s' (filename: %s)"
 46 |                   % (label, full_name))
 47 | 
 48 | def _get_figshare_doi(label, local_filename):
 49 |     figshare_url = "http://api.figshare.com/v1/articles/%s" % label
 50 |     try:
 51 |         response = url.urlopen(figshare_url)
 52 |         json_data = response.read().decode("utf-8")
 53 |     except url.HTTPError:
 54 |         raise ValueError("Not a figshare DOI: %s" % label)
 55 |     article_details = json.loads(json_data)
 56 |     download_url = article_details['items'][0]['files'][0]['download_url']
 57 |     url.urlretrieve(download_url, local_filename)
 58 |     return local_filename
 59 | 
 60 | def _get_zenodo_doi(label, local_filename):
 61 |     try:
 62 |         # Python 2
 63 |         from HTMLParser import HTMLParser
 64 |         bytes2text = lambda x: x
 65 |     except ImportError:
 66 |         # Python 3
 67 |         from html.parser import HTMLParser
 68 |         def bytes2text(b):
 69 |             return b.decode(encoding="utf8")
 70 |     class ZenodoParser(HTMLParser):
 71 |         def handle_starttag(self, tag, attrs):
 72 |             if tag == "link":
 73 |                 attrs = dict(attrs)
 74 |                 if attrs.get("rel") == "alternate" \
 75 |                    and attrs.get("type") != "application/rss+xml":
 76 |                     self.link_href = attrs.get("href")
 77 |                     self.link_type = attrs.get("type")
 78 | 
 79 |     zenodo_url = "http://dx.doi.org/" + label
 80 |     parser = ZenodoParser()
 81 |     source = url.urlopen(zenodo_url)
 82 |     try:
 83 |         parser.feed(bytes2text(source.read()))
 84 |     finally:
 85 |         source.close()
 86 |     assert parser.link_type == "application/octet-stream"
 87 |     download_url = parser.link_href
 88 |     url.urlretrieve(download_url, local_filename)
 89 |     return local_filename
 90 | 
 91 | def _get_doi(label):
 92 |     local_filename = os.path.join(library[0], label + ".ap")
 93 |     if os.path.exists(local_filename):
 94 |         return local_filename
 95 | 
 96 |     dir_name = os.path.join(library[0], label.split("/")[0])
 97 |     if not os.path.exists(dir_name):
 98 |         os.mkdir(dir_name)
 99 | 
100 |     # There doesn't seem to be a way to download an
101 |     # arbitrary digital object through its DOI. We know
102 |     # know how to do it for figshare and Zenodo, which are
103 |     # each handled by specialized code.
104 | 
105 |     # Figshare
106 |     if 'figshare' in label:
107 |         return _get_figshare_doi(label, local_filename)
108 |     # Zenodo
109 |     elif 'zenodo' in label:
110 |         return _get_zenodo_doi(label, local_filename)
111 |     # Nothing else works for now
112 |     else:
113 |         raise ValueError("Unrecognized DOI: %s" % label)
114 | 
115 | def _get_file_in_cwd(label):
116 |     filename = label + '.ap'
117 |     full_name = os.path.abspath(os.path.join(os.getcwd(), filename))
118 |     if os.path.exists(full_name):
119 |         return full_name
120 |     raise IOError(2, "No such ActivePaper: 'cwd:%s' (filename: %s)"
121 |                   % (label, full_name))
122 | 
123 | download_handlers = {'local': _get_local_file,
124 |                      'doi': _get_doi,
125 |                      'cwd': _get_file_in_cwd}
126 | 
127 | def find_in_library(paper_ref):
128 |     ref_type, label = split_paper_ref(paper_ref)
129 |     handler = download_handlers.get(ref_type)
130 |     assert handler is not None
131 |     return handler(label)
132 | 


--------------------------------------------------------------------------------
/lib/activepapers/standardlib.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | if sys.version_info[0] == 2:
 4 | 
 5 |     from activepapers.standardlib2 import *
 6 | 
 7 | else:
 8 | 
 9 |     from activepapers.standardlib3 import *
10 | 
11 | del sys
12 | 


--------------------------------------------------------------------------------
/lib/activepapers/standardlib2.py:
--------------------------------------------------------------------------------
  1 | # Python 2 standard library modules that are allowed in calclets
  2 | 
  3 | # The following is a complete list of modules in the standard library,
  4 | # obtained from an installation of Python 2.7.3. Only modules starting
  5 | # with an underscore were removed. Forbidden modules are commented out.
  6 | # The selection needs a more careful revision.
  7 | 
  8 | allowed_modules = [
  9 | #"BaseHTTPServer",
 10 | "Bastion",
 11 | #"CGIHTTPServer",
 12 | "ConfigParser",
 13 | "Cookie",
 14 | #"DocXMLRPCServer",
 15 | "HTMLParser",
 16 | "MimeWriter",
 17 | "Queue",
 18 | #"SimpleHTTPServer",
 19 | #"SimpleXMLRPCServer",
 20 | #"SocketServer",
 21 | "StringIO",
 22 | "UserDict",
 23 | "UserList",
 24 | "UserString",
 25 | "abc",
 26 | "aifc",
 27 | #"antigravity",
 28 | #"anydbm",
 29 | #"argparse",
 30 | #"ast",
 31 | "asynchat",
 32 | "asyncore",
 33 | "atexit",
 34 | #"audiodev",
 35 | "base64",
 36 | #"bdb",
 37 | "binhex",
 38 | "bisect",
 39 | #"bsddb",
 40 | #"cProfile",
 41 | "calendar",
 42 | #"cgi",
 43 | #"cgitb",
 44 | "chunk",
 45 | "cmd",
 46 | "code",
 47 | "codecs",
 48 | "codeop",
 49 | "collections",
 50 | "colorsys",
 51 | "commands",
 52 | "compileall",
 53 | "compiler",
 54 | "config",
 55 | "contextlib",
 56 | "cookielib",
 57 | "copy",
 58 | "copy_reg",
 59 | "csv",
 60 | #"ctypes",
 61 | #"curses",
 62 | #"dbhash",
 63 | "decimal",
 64 | "difflib",
 65 | "dircache",
 66 | #"dis",
 67 | #"distutils",
 68 | #"doctest",
 69 | #"dumbdbm",
 70 | #"dummy_thread",
 71 | #"dummy_threading",
 72 | #"email",
 73 | "encodings",
 74 | #"filecmp",
 75 | #"fileinput",
 76 | "fnmatch",
 77 | "formatter",
 78 | "fpformat",
 79 | "fractions",
 80 | #"ftplib",
 81 | "functools",
 82 | "genericpath",
 83 | #"getopt",
 84 | #"getpass",
 85 | "gettext",
 86 | #"glob",
 87 | #"gzip",
 88 | "hashlib",
 89 | "heapq",
 90 | "hmac",
 91 | "hotshot",
 92 | "htmlentitydefs",
 93 | "htmllib",
 94 | #"httplib",
 95 | #"idlelib",
 96 | "ihooks",
 97 | #"imaplib",
 98 | "imghdr",
 99 | #"importlib",
100 | #"imputil",
101 | "inspect",
102 | "io",
103 | "json",
104 | "keyword",
105 | "lib2to3",
106 | "linecache",
107 | "locale",
108 | "logging",
109 | #"mailbox",
110 | #"mailcap",
111 | "markupbase",
112 | "md5",
113 | "mhlib",
114 | "mimetools",
115 | "mimetypes",
116 | "mimify",
117 | #"modulefinder",
118 | "multifile",
119 | #"multiprocessing",
120 | #"mutex",
121 | #"netrc",
122 | "new",
123 | #"nntplib",
124 | #"ntpath",
125 | #"nturl2path",
126 | "numbers",
127 | "opcode",
128 | "optparse",
129 | "os",
130 | #"os2emxpath",
131 | #"pdb.doc",
132 | #"pdb",
133 | "pickle",
134 | "pickletools",
135 | "pipes",
136 | "pkgutil",
137 | "plistlib",
138 | "popen2",
139 | "poplib",
140 | "posixfile",
141 | "posixpath",
142 | "pprint",
143 | "profile",
144 | "pstats",
145 | #"pty",
146 | "py_compile",
147 | "pyclbr",
148 | #"pydoc",
149 | #"pydoc_data",
150 | "quopri",
151 | "random",
152 | "re",
153 | "repr",
154 | "rexec",
155 | "rfc822",
156 | "rlcompleter",
157 | "robotparser",
158 | "runpy",
159 | "sched",
160 | "sets",
161 | "sgmllib",
162 | "sha",
163 | "shelve",
164 | "shlex",
165 | "shutil",
166 | #"site",
167 | #"smtpd",
168 | #"smtplib",
169 | #"sndhdr",
170 | #"socket",
171 | #"sqlite3",
172 | "sre",
173 | "sre_compile",
174 | "sre_constants",
175 | "sre_parse",
176 | #"ssl",
177 | #"stat",
178 | #"statvfs",
179 | "string",
180 | "stringold",
181 | "stringprep",
182 | "struct",
183 | #"subprocess",
184 | #"sunau",
185 | #"sunaudio",
186 | "symbol",
187 | "symtable",
188 | "sysconfig",
189 | "tabnanny",
190 | #"tarfile",
191 | #"telnetlib",
192 | "tempfile",
193 | "test",
194 | "textwrap",
195 | #"this",
196 | #"threading",
197 | "timeit",
198 | "token",
199 | "tokenize",
200 | #"trace",
201 | #"traceback",
202 | "tty",
203 | "types",
204 | #"unittest",
205 | #"urllib",
206 | #"urllib2",
207 | "urlparse",
208 | "user",
209 | "uu",
210 | #"uuid",
211 | "warnings",
212 | #"wave",
213 | "weakref",
214 | #"webbrowser",
215 | #"whichdb",
216 | "wsgiref",
217 | "xdrlib",
218 | "xml",
219 | "xmllib",
220 | "xmlrpclib",
221 | "zipfile",
222 | 
223 | ## extension modules
224 | 
225 | #"OSATerminology",
226 | "array",
227 | #"audioop",
228 | #"autoGIL",
229 | "binascii",
230 | #"bsddb185",
231 | "bz2",
232 | "cPickle",
233 | "cStringIO",
234 | "cmath",
235 | "crypt",
236 | "datetime",
237 | #"dbm",
238 | #"fcntl",
239 | #"future_builtins",
240 | #"gdbm",
241 | #"gestalt",
242 | #"grp",
243 | #"icglue",
244 | "itertools",
245 | "math",
246 | #"mmap",
247 | "nis",
248 | "operator",
249 | "parser",
250 | "pyexpat",
251 | #"readline",
252 | #"resource",
253 | #"select",
254 | "strop",
255 | #"syslog",
256 | #"termios",
257 | "time",
258 | "unicodedata",
259 | "zlib",
260 | ]
261 | 
262 | 
263 | 


--------------------------------------------------------------------------------
/lib/activepapers/standardlib3.py:
--------------------------------------------------------------------------------
  1 | # Python 3 standard library modules that are allowed in calclets
  2 | 
  3 | # The following is a complete list of modules in the standard library,
  4 | # obtained from an installation of Python 3.3. Only modules starting
  5 | # with an underscore were removed. Forbidden modules are commented out.
  6 | # The selection needs a more careful revision.
  7 | 
  8 | allowed_modules = [
  9 | "abc",
 10 | "aifc",
 11 | #"antigravity",
 12 | #"argparse",
 13 | "ast",
 14 | #"asynchat",
 15 | #"asyncore",
 16 | "base64",
 17 | "bdb",
 18 | "binhex",
 19 | "bisect",
 20 | "bz2",
 21 | #"cProfile",
 22 | "calendar",
 23 | #"cgi",
 24 | #"cgitb",
 25 | "chunk",
 26 | "cmd",
 27 | "code",
 28 | "codecs",
 29 | "codeop",
 30 | "collections",
 31 | "colorsys",
 32 | "compileall",
 33 | #"concurrent",
 34 | "configparser",
 35 | "contextlib",
 36 | "copy",
 37 | "copyreg",
 38 | "crypt",
 39 | "csv",
 40 | #"ctypes",
 41 | #"curses",
 42 | "datetime",
 43 | #"dbm",
 44 | "decimal",
 45 | "difflib",
 46 | #"dis",
 47 | #"distutils",
 48 | #"doctest",
 49 | #"dummy_threading",
 50 | #"email",
 51 | "encodings",
 52 | "filecmp",
 53 | "fileinput",
 54 | "fnmatch",
 55 | "formatter",
 56 | "fractions",
 57 | #"ftplib",
 58 | "functools",
 59 | #"genericpath",
 60 | #"getopt",
 61 | #"getpass",
 62 | "gettext",
 63 | #"glob",
 64 | "gzip",
 65 | "hashlib",
 66 | "heapq",
 67 | "hmac",
 68 | "html",
 69 | #"http",
 70 | #"idlelib",
 71 | #"imaplib",
 72 | "imghdr",
 73 | #"imp",
 74 | #"importlib",
 75 | "inspect",
 76 | #"io",
 77 | #"ipaddress",
 78 | "json",
 79 | "keyword",
 80 | "lib2to3",
 81 | "linecache",
 82 | "locale",
 83 | "logging",
 84 | "lzma",
 85 | #"macpath",
 86 | #"macurl2path",
 87 | #"mailbox",
 88 | #"mailcap",
 89 | "mimetypes",
 90 | #"modulefinder",
 91 | #"multiprocessing",
 92 | #"netrc",
 93 | #"nntplib",
 94 | #"ntpath",
 95 | #"nturl2path",
 96 | "numbers",
 97 | "opcode",
 98 | #"optparse",
 99 | "os",
100 | "os2emxpath",
101 | #"pdb",
102 | "pickle",
103 | "pickletools",
104 | "pipes",
105 | #"pkgutil",
106 | "plistlib",
107 | "poplib",
108 | "posixpath",
109 | "pprint",
110 | "profile",
111 | "pstats",
112 | "pty",
113 | "py_compile",
114 | "pyclbr",
115 | "pydoc",
116 | "pydoc_data",
117 | "queue",
118 | "quopri",
119 | "random",
120 | "re",
121 | "reprlib",
122 | "rlcompleter",
123 | "runpy",
124 | "sched",
125 | "shelve",
126 | "shlex",
127 | "shutil",
128 | #"site",
129 | #"smtpd",
130 | #"smtplib",
131 | #"sndhdr",
132 | #"socket",
133 | #"socketserver",
134 | #"sqlite3",
135 | "sre_compile",
136 | "sre_constants",
137 | "sre_parse",
138 | #"ssl",
139 | #"stat",
140 | "string",
141 | "stringprep",
142 | "struct",
143 | #"subprocess",
144 | #"sunau",
145 | "symbol",
146 | "symtable",
147 | "sysconfig",
148 | "tabnanny",
149 | "tarfile",
150 | #"telnetlib",
151 | #"tempfile",
152 | "test",
153 | "textwrap",
154 | #"this",
155 | #"threading",
156 | "timeit",
157 | #"tkinter",
158 | "token",
159 | "tokenize",
160 | "trace",
161 | #"traceback",
162 | "tty",
163 | "turtle",
164 | "turtledemo",
165 | "types",
166 | #"unittest",
167 | #"urllib",
168 | "uu",
169 | #"uuid",
170 | "venv",
171 | "warnings",
172 | #"wave",
173 | "weakref",
174 | #"webbrowser",
175 | #"wsgiref",
176 | "xdrlib",
177 | "xml",
178 | "xmlrpc",
179 | "zipfile",
180 | 
181 | ## extension modules
182 | 
183 | "array",
184 | "atexit",
185 | #"audioop",
186 | "binascii",
187 | "bz2",
188 | "cmath",
189 | "crypt",
190 | #"fcntl",
191 | #"grp",
192 | "math",
193 | #"mmap",
194 | #"nis",
195 | "parser",
196 | "pyexpat",
197 | #"readline",
198 | #"resource",
199 | #"select",
200 | #"syslog",
201 | #"termios",
202 | "time",
203 | "unicodedata",
204 | "zlib",
205 | ]
206 | 


--------------------------------------------------------------------------------
/lib/activepapers/storage.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import getpass
  3 | import imp
  4 | import importlib
  5 | import io
  6 | import itertools as it
  7 | import os
  8 | import socket
  9 | import sys
 10 | import weakref
 11 | 
 12 | import numpy as np
 13 | import h5py
 14 | 
 15 | from activepapers.utility import ascii, utf8, h5vstring, isstring, execcode, \
 16 |                                  codepath, datapath, owner, mod_time, \
 17 |                                  datatype, timestamp, stamp, ms_since_epoch
 18 | from activepapers.execution import Calclet, Importlet, DataGroup, paper_registry
 19 | from activepapers.library import find_in_library
 20 | import activepapers.version
 21 | 
 22 | readme_text = """
 23 | This file is an ActivePaper (Python edition).
 24 | 
 25 | For more information about ActivePapers see:
 26 | 
 27 |   http://www.activepapers.org/
 28 | """
 29 | 
 30 | 
 31 | #
 32 | # The ActivePaper class is the only one in this library
 33 | # meant to be used directly by client code.
 34 | #
 35 | 
 36 | class ActivePaper(object):
 37 | 
 38 |     def __init__(self, filename, mode="r", dependencies=None):
 39 |         self.filename = filename
 40 |         self.file = h5py.File(filename, mode)
 41 |         self.open = True
 42 |         self.writable = False
 43 |         if mode[0] == 'r':
 44 |             assert dependencies is None
 45 |             if ascii(self.file.attrs['DATA_MODEL']) != 'active-papers-py':
 46 |                 raise ValueError("File %s is not an ActivePaper" % filename)
 47 |             self.code_group = self.file["code"]
 48 |             self.data_group = self.file["data"]
 49 |             self.documentation_group = self.file["documentation"]
 50 |             self.writable = '+' in mode
 51 |             self.history = self.file['history']
 52 |             deps = self.file.get('external-dependencies/'
 53 |                                  'python-packages', None)
 54 |             if deps is None:
 55 |                 self.dependencies = []
 56 |             else:
 57 |                 self.dependencies = [ascii(n) for n in deps]
 58 |             for module_name in self.dependencies:
 59 |                 importlib.import_module(module_name)
 60 |         elif mode[0] == 'w':
 61 |             self.file.attrs['DATA_MODEL'] = ascii('active-papers-py')
 62 |             self.file.attrs['DATA_MODEL_MAJOR_VERSION'] = 0
 63 |             self.file.attrs['DATA_MODEL_MINOR_VERSION'] = 1
 64 |             self.code_group = self.file.create_group("code")
 65 |             self.data_group = self.file.create_group("data")
 66 |             self.documentation_group = self.file.create_group("documentation")
 67 |             deps = self.file.create_group('external-dependencies')
 68 |             if dependencies is None:
 69 |                 self.dependencies = []
 70 |             else:
 71 |                 for module_name in dependencies:
 72 |                     assert isstring(module_name)
 73 |                     importlib.import_module(module_name)
 74 |                 self.dependencies = dependencies
 75 |                 ds = deps.create_dataset('python-packages',
 76 |                                          dtype = h5vstring,
 77 |                                          shape = (len(dependencies),))
 78 |                 ds[:] = dependencies
 79 |             htype = np.dtype([('opened', np.int64),
 80 |                               ('closed', np.int64),
 81 |                               ('platform', h5vstring),
 82 |                               ('hostname', h5vstring),
 83 |                               ('username', h5vstring)]
 84 |                              + [(name+"_version", h5vstring)
 85 |                                 for name in ['activepapers','python',
 86 |                                              'numpy', 'h5py', 'hdf5'] 
 87 |                                             + self.dependencies])
 88 |             self.history = self.file.create_dataset("history", shape=(0,),
 89 |                                                     dtype=htype,
 90 |                                                     chunks=(1,),
 91 |                                                     maxshape=(None,))
 92 |             readme = self.file.create_dataset("README",
 93 |                                               dtype=h5vstring, shape = ())
 94 |             readme[...] = readme_text
 95 |             self.writable = True
 96 | 
 97 |         if self.writable:
 98 |             self.update_history(close=False)
 99 | 
100 |         import activepapers.utility
101 |         self.data = DataGroup(self, None, self.data_group, ExternalCode(self))
102 |         self.imported_modules = {}
103 | 
104 |         self._local_modules = {}
105 | 
106 |         paper_registry[self._id()] = self
107 | 
108 |     def _id(self):
109 |         return hex(id(self))[2:]
110 | 
111 |     def update_history(self, close):
112 |         if close:
113 |             entry = tuple(self.history[-1])
114 |             self.history[-1] = (entry[0], ms_since_epoch()) + entry[2:]
115 |         else:
116 |             self.history.resize((1+len(self.history),))
117 |             def getversion(name):
118 |                 if hasattr(sys.modules[name], '__version__'):
119 |                     return getattr(sys.modules[name], '__version__')
120 |                 else:
121 |                     return 'unknown'
122 |             self.history[-1] = (ms_since_epoch(), 0,
123 |                                 sys.platform,
124 |                                 socket.getfqdn(),
125 |                                 getpass.getuser(),
126 |                                 activepapers.__version__,
127 |                                 sys.version.split()[0],
128 |                                 np.__version__,
129 |                                 h5py.version.version,
130 |                                 h5py.version.hdf5_version) \
131 |                                + tuple(getversion(m) for m in self.dependencies)
132 | 
133 |     def close(self):
134 |         if self.open:
135 |             if self.writable:
136 |                 self.update_history(close=True)
137 |             del self._local_modules
138 |             self.open = False
139 |             try:
140 |                 self.file.close()
141 |             except:
142 |                 pass
143 |             paper_id = hex(id(self))[2:]
144 |             try:
145 |                 del paper_registry[paper_id]
146 |             except KeyError:
147 |                 pass
148 | 
149 |     def assert_is_open(self):
150 |         if not self.open:
151 |             raise ValueError("ActivePaper %s has been closed" % self.filename)
152 | 
153 |     def __enter__(self):
154 |         return self
155 | 
156 |     def __exit__(self, exc_type, exc_val, exc_tb):
157 |         self.close()
158 |         return False
159 | 
160 |     def flush(self):
161 |         self.file.flush()
162 | 
163 |     def _create_ref(self, path, paper_ref, ref_path, group, prefix):
164 |         if ref_path is None:
165 |             ref_path = path
166 |         if group is None:
167 |             group = 'file'
168 |         if prefix is None:
169 |             prefix = ''
170 |         else:
171 |             prefix += '/'
172 |         paper = open_paper_ref(paper_ref)
173 |         # Access the item to make sure it exists
174 |         item = getattr(paper, group)[ref_path]
175 |         ref_dtype = np.dtype([('paper_ref', h5vstring), ('path', h5vstring)])
176 |         ds = getattr(self, group).require_dataset(path, shape=(),
177 |                                                   dtype=ref_dtype)
178 |         ds[...] = (paper_ref, prefix + ref_path)
179 |         stamp(ds, 'reference', {})
180 |         return ds
181 | 
182 |     def create_ref(self, path, paper_ref, ref_path=None):
183 |         return self._create_ref(path, paper_ref, ref_path, None, None)
184 | 
185 |     def create_data_ref(self, path, paper_ref, ref_path=None):
186 |         return self._create_ref(path, paper_ref, ref_path,
187 |                                 'data_group', '/data')
188 | 
189 |     def create_code_ref(self, path, paper_ref, ref_path=None):
190 |         return self._create_ref(path, paper_ref, ref_path,
191 |                                 'code_group', '/code')
192 | 
193 |     def create_module_ref(self, path, paper_ref, ref_path=None):
194 |         path = "python-packages/" + path
195 |         if ref_path is not None:
196 |             ref_path = "python-packages/" + ref_path
197 |         return self.create_code_ref(path, paper_ref, ref_path)
198 | 
199 |     def create_copy(self, path, paper_ref, ref_path=None):
200 |         if ref_path is None:
201 |             ref_path = path
202 |         paper = open_paper_ref(paper_ref)
203 |         item = paper.file[ref_path]
204 |         self.file.copy(item, path, expand_refs=True)
205 |         copy = self.file[path]
206 |         self._delete_dependency_attributes(copy)
207 |         timestamp(copy, mod_time(item))
208 |         ref_dtype = np.dtype([('paper_ref', h5vstring), ('path', h5vstring)])
209 |         copy.attrs.create('ACTIVE_PAPER_COPIED_FROM',
210 |                           shape=(), dtype=ref_dtype,
211 |                           data=np.array((paper_ref, ref_path), dtype=ref_dtype))
212 |         return copy
213 | 
214 |     def _delete_dependency_attributes(self, node):
215 |         for attr_name in ['ACTIVE_PAPER_GENERATING_CODELET',
216 |                           'ACTIVE_PAPER_DEPENDENCIES']:
217 |             if attr_name in node.attrs:
218 |                 del node.attrs[attr_name]
219 |         if isinstance(node, h5py.Group):
220 |             for item in node:
221 |                 self._delete_dependency_attributes(node[item])
222 | 
223 |     def store_python_code(self, path, code):
224 |         self.assert_is_open()
225 |         if not isstring(code):
226 |             raise TypeError("Python code must be a string (is %s)"
227 |                             % str(type(code)))
228 |         ds = self.code_group.require_dataset(path,
229 |                                              dtype=h5vstring, shape = ())
230 |         ds[...] = code.encode('utf-8')
231 |         ds.attrs['ACTIVE_PAPER_LANGUAGE'] = "python"
232 |         return ds
233 | 
234 |     def add_module(self, name, module_code):
235 |         path = codepath('/'.join(['', 'python-packages'] + name.split('.')))
236 |         ds = self.store_python_code(path, module_code)
237 |         stamp(ds, "module", {})
238 | 
239 |     def import_module(self, name, python_path=sys.path):
240 |         if name in self.imported_modules:
241 |             return self.imported_modules[name]
242 |         if '.' in name:
243 |             # Submodule, add the underlying package first
244 |             package, _, module = name.rpartition('.')
245 |             path = [self.import_module(package, python_path)]
246 |         else:
247 |             module = name
248 |             path = python_path
249 |         file, filename, (suffix, mode, kind) = imp.find_module(module, path)
250 |         if kind == imp.PKG_DIRECTORY:
251 |             package = filename
252 |             file = open(os.path.join(filename, '__init__.py'))
253 |             name = name + '/__init__'
254 |         else:
255 |             package = None
256 |             if file is None:
257 |                 raise ValueError("%s is not a Python module" % name)
258 |             if kind != imp.PY_SOURCE:
259 |                 file.close()
260 |                 raise ValueError("%s is not a Python source code file"
261 |                                  % filename)
262 |         self.add_module(name, ascii(file.read()))
263 |         file.close()
264 |         self.imported_modules[name] = package
265 |         return package
266 | 
267 |     def get_local_module(self, name):
268 |         path = codepath('/'.join(['', 'python-packages'] + name.split('.')))
269 |         return APNode(self.code_group).get(path, None)
270 |         
271 |     def create_calclet(self, path, script):
272 |         path = codepath(path)
273 |         if not path.startswith('/'):
274 |             path = '/'.join([self.code_group.name, path])
275 |         ds = self.store_python_code(path, script)
276 |         stamp(ds, "calclet", {})
277 |         return Calclet(self, ds)
278 | 
279 |     def create_importlet(self, path, script):
280 |         path = codepath(path)
281 |         if not path.startswith('/'):
282 |             path = '/'.join([self.code_group.name, path])
283 |         ds = self.store_python_code(path, script)
284 |         stamp(ds, "importlet", {})
285 |         return Importlet(self, ds)
286 | 
287 |     def run_codelet(self, path, debug=False):
288 |         if path.startswith('/'):
289 |             assert path.startswith('/code/')
290 |             path = path[6:]
291 |         node = APNode(self.code_group)[path]
292 |         class_ = {'calclet': Calclet, 'importlet': Importlet}[datatype(node)]
293 |         try:
294 |             class_(self, node).run()
295 |             return None
296 |         except Exception:
297 |             # TODO: preprocess traceback to show only the stack frames
298 |             #       in the codelet.
299 |             import traceback
300 | 
301 |             type, value, trace = sys.exc_info()
302 |             stack = traceback.extract_tb(trace)
303 |             del trace
304 | 
305 |             while stack:
306 |                 if stack[0][2] == 'execcode':
307 |                     del stack[0]
308 |                     break
309 |                 del stack[0]
310 |             
311 |             fstack = []
312 |             for filename, lineno, fn_name, code in stack:
313 |                 if ':' in filename:
314 |                     paper_id, codelet = filename.split(':')
315 |                     paper = paper_registry.get(paper_id)
316 |                     if paper is None:
317 |                         paper_name = '<ActivePaper>'
318 |                     else:
319 |                         paper_name = '<%s>' % paper.file.filename
320 |                     filename = ':'.join([paper_name, codelet])
321 |                     if code is None and paper is not None:
322 |                         script = utf8(paper.file[codelet][...].flat[0])
323 |                         code = script.split('\n')[lineno-1]
324 |                 fstack.append((filename, lineno, fn_name, code))
325 | 
326 |             tb_text = ''.join(["Traceback (most recent call last):\n"] + \
327 |                               traceback.format_list(fstack) + \
328 |                               traceback.format_exception_only(type, value))
329 |             if debug:
330 |                 sys.stderr.write(tb_text)
331 |                 import pdb
332 |                 pdb.post_mortem()
333 |             else:
334 |                 return tb_text
335 | 
336 |     def calclets(self):
337 |         return dict((item.name,
338 |                      Calclet(self, item))
339 |                     for item in self.iter_items()
340 |                     if datatype(item) == 'calclet')
341 | 
342 |     def remove_owned_by(self, codelet):
343 |         def owned(group):
344 |             nodes = []
345 |             for node in group.values():
346 |                 if owner(node) == codelet:
347 |                     nodes.append(node.name)
348 |                 elif isinstance(node, h5py.Group) \
349 |                    and datatype(node) != 'data':
350 |                     nodes.extend(owned(node))
351 |             return nodes
352 |         for group in [self.code_group,
353 |                       self.data_group,
354 |                       self.documentation_group]:
355 |             for node_name in owned(group):
356 |                 del self.file[node_name]
357 | 
358 |     def replace_by_dummy(self, item_name):
359 |         item = self.file[item_name]
360 |         codelet = owner(item)
361 |         assert codelet is not None
362 |         dtype = datatype(item)
363 |         mtime = mod_time(item)
364 |         deps = item.attrs.get('ACTIVE_PAPER_DEPENDENCIES')
365 |         del self.file[item_name]
366 |         ds = self.file.create_dataset(item_name,
367 |                                       data=np.zeros((), dtype=np.int))
368 |         stamp(ds, dtype,
369 |               dict(ACTIVE_PAPER_GENERATING_CODELET=codelet,
370 |                    ACTIVE_PAPER_DEPENDENCIES=list(deps)))
371 |         timestamp(ds, mtime)
372 |         ds.attrs['ACTIVE_PAPER_DUMMY_DATASET'] = True
373 |         
374 |     def is_dummy(self, item):
375 |         return item.attrs.get('ACTIVE_PAPER_DUMMY_DATASET', False)
376 | 
377 |     def iter_items(self):
378 |         """
379 |         Iterate over the items in a paper.
380 |         """
381 |         def walk(group):
382 |             for node in group.values():
383 |                 if isinstance(node, h5py.Group) \
384 |                    and datatype(node) != 'data':
385 |                     for gnode in walk(node):
386 |                         yield gnode
387 |                 else:
388 |                     yield node
389 |         for group in [self.code_group,
390 |                       self.data_group,
391 |                       self.documentation_group]:
392 |             for node in walk(group):
393 |                 yield node
394 | 
395 |     def iter_groups(self):
396 |         """
397 |         Iterate over the groups in a paper that are not items.
398 |         """
399 |         def walk(group):
400 |             for node in group.values():
401 |                 if isinstance(node, h5py.Group) \
402 |                    and datatype(node) != 'data':
403 |                     yield node
404 |                     for subnode in walk(node):
405 |                         yield subnode
406 |         for group in [self.code_group,
407 |                       self.data_group,
408 |                       self.documentation_group]:
409 |             for node in walk(group):
410 |                 yield node
411 | 
412 |     def iter_dependencies(self, item):
413 |         """
414 |         Iterate over the dependencies of a given item in a paper.
415 |         """
416 |         if 'ACTIVE_PAPER_DEPENDENCIES' in item.attrs:
417 |             for dep in item.attrs['ACTIVE_PAPER_DEPENDENCIES']:
418 |                 yield self.file[dep]
419 | 
420 |     def is_stale(self, item):
421 |         t = mod_time(item)
422 |         for dep in self.iter_dependencies(item):
423 |             if mod_time(dep) > t:
424 |                 return True
425 |         return False
426 | 
427 |     def external_references(self):
428 |         def process(node, refs):
429 |             if datatype(node) == 'reference':
430 |                 paper_ref, ref_path = node[()]
431 |                 refs[paper_ref][0].add(ref_path)
432 |             elif 'ACTIVE_PAPER_COPIED_FROM' in node.attrs:
433 |                 source = node.attrs['ACTIVE_PAPER_COPIED_FROM']
434 |                 paper_ref, ref_path = source
435 |                 if h5py.version.version_tuple[:2] <= (2, 2):
436 |                     # h5py 2.2 returns a wrong dtype
437 |                     paper_ref = paper_ref.flat[0]
438 |                     ref_path = ref_path.flat[0]
439 |                 refs[paper_ref][1].add(ref_path)
440 |             if isinstance(node, h5py.Group):
441 |                 for item in node:
442 |                     process(node[item], refs)
443 |             return refs
444 | 
445 |         refs = collections.defaultdict(lambda: (set(), set()))
446 |         for node in [self.code_group, self.data_group,
447 |                      self.documentation_group]:
448 |             process(node, refs)
449 |         return refs
450 | 
451 |     def has_dependencies(self, item):
452 |         """
453 |         :param item: an item in a paper
454 |         :type item: h5py.Node
455 |         :return: True if the item has any dependencies
456 |         :rtype: bool
457 |         """
458 |         return 'ACTIVE_PAPER_DEPENDENCIES' in item.attrs \
459 |                 and len(item.attrs['ACTIVE_PAPER_DEPENDENCIES']) > 0
460 | 
461 |     def dependency_graph(self):
462 |         """
463 |         :return: a dictionary mapping the name of each item to the
464 |                  set of the names of the items that depend on it
465 |         :rtype: dict
466 |         """
467 |         graph = collections.defaultdict(set)
468 |         for item in it.chain(self.iter_items(), self.iter_groups()):
469 |             for dep in self.iter_dependencies(item):
470 |                 graph[dep.name].add(item.name)
471 |         return graph
472 | 
473 |     def dependency_hierarchy(self):
474 |         """
475 |         Generator yielding a sequence of sets of HDF5 paths
476 |         such that the items in each set depend only on the items
477 |         in the preceding sets.
478 |         """
479 |         known = set()
480 |         unknown = set()
481 |         for item in self.iter_items():
482 |             d = (item.name,
483 |                  frozenset(dep.name for dep in self.iter_dependencies(item)))
484 |             if len(d[1]) > 0:
485 |                 unknown.add(d)
486 |             else:
487 |                 known.add(d[0])
488 |         yield set(self.file[p] for p in known)
489 |         while len(unknown) > 0:
490 |             next = set(p for p, d in unknown if d <= known)
491 |             if len(next) == 0:
492 |                 raise ValueError("cyclic dependencies")
493 |             known |= next
494 |             unknown = set((p, d) for p, d in unknown if p not in next)
495 |             yield set(self.file[p] for p in next)
496 | 
497 |     def rebuild(self, filename):
498 |         """
499 |         Rebuild all the dependent items in the paper in a new file.
500 |         First all items without dependencies are copied to the new
501 |         file, then all the calclets are run in the new file in the
502 |         order determined by the dependency graph in the original file.
503 |         """
504 |         deps = self.dependency_hierarchy()
505 |         with ActivePaper(filename, 'w') as clone:
506 |             for item in next(deps):
507 |                 # Make sure all the groups in the path exist
508 |                 path = item.name.split('/')
509 |                 name = path[-1]
510 |                 groups = path[:-1]
511 |                 dest = clone.file
512 |                 while groups:
513 |                     group_name = groups[0]
514 |                     if len(group_name) > 0:
515 |                         if group_name not in dest:
516 |                             dest.create_group(group_name)
517 |                         dest = dest[group_name]
518 |                     del groups[0]
519 |                 clone.file.copy(item, item.name, expand_refs=True)
520 |                 timestamp(clone.file[item.name])
521 |             for items in deps:
522 |                 calclets = set(item.attrs['ACTIVE_PAPER_GENERATING_CODELET']
523 |                                for item in items)
524 |                 for calclet in calclets:
525 |                     clone.run_codelet(calclet)
526 | 
527 |     def snapshot(self, filename):
528 |         """
529 |         Make a copy of the ActivePaper in its current state.
530 |         This is meant to be used form inside long-running
531 |         codelets in order to permit external monitoring of
532 |         the progress, given that HDF5 files being written cannot
533 |         be read simultaneously.
534 |         """
535 |         self.file.flush()
536 |         clone = h5py.File(filename, 'w')
537 |         for item in self.file:
538 |             clone.copy(self.file[item], item, expand_refs=True)
539 |         for attr_name in self.file.attrs:
540 |             clone.attrs[attr_name] = self.file.attrs[attr_name]
541 |         clone.close()
542 | 
543 |     def open_internal_file(self, path, mode='r', encoding=None, creator=None):
544 |         # path is always relative to the root group
545 |         if path.startswith('/'):
546 |             path = path[1:]
547 |         if not path.startswith('data/') \
548 |            and not path.startswith('documentation/'):
549 |             raise IOError((13, "Permission denied: '%s'" % path))
550 |         if creator is None:
551 |             creator = ExternalCode(self)
552 |         if mode[0] in ['r', 'a']:
553 |             ds = self.file[path]
554 |         elif mode[0] == 'w':
555 |             test = self.file.get(path, None)
556 |             if test is not None:
557 |                 if not creator.owns(test):
558 |                     raise ValueError("%s trying to overwrite data"
559 |                                      " created by %s"
560 |                                      % (creator.path, owner(test)))
561 |                 del self.file[path]
562 |             ds = self.file.create_dataset(
563 |                        path, shape = (0,), dtype = np.uint8,
564 |                        chunks = (100,), maxshape = (None,))
565 |         else:
566 |             raise ValueError("unknown file mode %s" % mode)
567 |         return InternalFile(ds, mode, encoding)
568 | 
569 | 
570 | #
571 | # A dummy replacement that emulates the interface of Calclet.
572 | #
573 | 
574 | class ExternalCode(object):
575 | 
576 |     def __init__(self, paper):
577 |         self.paper = paper
578 |         self.path = None
579 | 
580 |     def add_dependency(self, dependency):
581 |         pass
582 | 
583 |     def dependency_attributes(self):
584 |         return {}
585 | 
586 |     def owns(self, node):
587 |         # Pretend to be the owner of everything
588 |         return True
589 | 
590 | 
591 | #
592 | # A Python file interface for byte array datasets
593 | #
594 | 
595 | class InternalFile(io.IOBase):
596 | 
597 |     def __init__(self, ds, mode, encoding=None):
598 |         self._ds = ds
599 |         self._mode = mode
600 |         self._encoding = encoding
601 |         self._position = 0
602 |         self._closed = False
603 |         self._binary = 'b' in mode
604 |         self._get_attributes = lambda: {}
605 |         self._stamp()
606 | 
607 |     def readable(self):
608 |         return True
609 | 
610 |     def writable(self):
611 |         return self._mode[0] == 'w' or '+' in self._mode
612 | 
613 |     @property
614 |     def closed(self):
615 |         return self._closed
616 | 
617 |     @property
618 |     def mode(self):
619 |         return self._mode
620 | 
621 |     @property
622 |     def name(self):
623 |         return self._ds.name
624 | 
625 |     def _check_if_open(self):
626 |         if self._closed:
627 |             raise ValueError("file has been closed")
628 | 
629 |     def _convert(self, data):
630 |         if self._binary:
631 |             return data
632 |         elif self._encoding is not None:
633 |             return data.decode(self._encoding)
634 |         else:
635 |             return ascii(data)
636 | 
637 |     def _set_attribute_callback(self, callback):
638 |         self._get_attributes = callback
639 | 
640 |     def _stamp(self):
641 |         if self.writable():
642 |             stamp(self._ds, "file", self._get_attributes())
643 | 
644 |     def close(self):
645 |         self._closed = True
646 |         self._stamp()
647 | 
648 |     def flush(self):
649 |         self._check_if_open()
650 | 
651 |     def isatty(self):
652 |         return False
653 | 
654 |     def __next__(self):
655 |         self._check_if_open()
656 |         if self._position == len(self._ds):
657 |             raise StopIteration
658 |         return self.readline()
659 |     next = __next__ # for Python 2
660 | 
661 |     def __iter__(self):
662 |         return self
663 | 
664 |     def __enter__(self):
665 |         return self
666 | 
667 |     def __exit__(self, exc_type, exc_val, exc_tb):
668 |         self.close()
669 |         return False
670 | 
671 |     def read(self, size=None):
672 |         self._check_if_open()
673 |         if size is None:
674 |             size = len(self._ds)-self._position
675 |         if size == 0:
676 |             return ''
677 |         else:
678 |             new_position = self._position + size
679 |             data = self._ds[self._position:new_position]
680 |             self._position = new_position
681 |             return self._convert(data.tostring())
682 | 
683 |     def readline(self, size=None):
684 |         self._check_if_open()
685 |         remaining = len(self._ds) - self._position
686 |         if remaining == 0:
687 |             return self._convert('')
688 |         for l in range(min(100, remaining), remaining+100, 100):
689 |             data = self._ds[self._position:self._position+l]
690 |             eols = np.nonzero(data == 10)[0]
691 |             if len(eols) > 0:
692 |                 n = eols[0]+1
693 |                 self._position += n
694 |                 return self._convert(data[:n].tostring())
695 |         self._position = len(self._ds)
696 |         return self._convert(data.tostring())
697 | 
698 |     def readlines(self, sizehint=None):
699 |         self._check_if_open()
700 |         return list(line for line in self)
701 | 
702 |     def seek(self, offset, whence=os.SEEK_SET):
703 |         self._check_if_open()
704 |         file_length = len(self._ds)
705 |         if whence == os.SEEK_SET:
706 |             self._position = offset
707 |         elif whence == os.SEEK_CUR:
708 |             self._position += offset
709 |         elif whence == os.SEEK_END:
710 |             self._position = file_length + offset
711 |         self._position = max(0, min(file_length, self._position))
712 | 
713 |     def tell(self):
714 |         self._check_if_open()
715 |         return self._position
716 | 
717 |     def truncate(self, size=None):
718 |         self._check_if_open()
719 |         if size is None:
720 |             size = self._position
721 |         self._ds.resize((size,))
722 |         self._stamp()
723 | 
724 |     def write(self, string):
725 |         self._check_if_open()
726 |         if self._mode[0] == 'r':
727 |             raise IOError("File not open for writing")
728 |         if not string:
729 |             # HDF5 crashes when trying to write a zero-length
730 |             # slice, so this must be handled as a special case.
731 |             return
732 |         if self._encoding is not None:
733 |             string = string.encode(self._encoding)
734 |         new_position = self._position + len(string)
735 |         if new_position > len(self._ds):
736 |             self._ds.resize((new_position,))
737 |         self._ds[self._position:new_position] = \
738 |                 np.fromstring(string, dtype=np.uint8)
739 |         self._position = new_position
740 |         self._stamp()
741 | 
742 |     def writelines(self, strings):
743 |         self._check_if_open()
744 |         for line in strings:
745 |             self.write(line)
746 | 
747 | 
748 | #
749 | # A wrapper for nodes that works across references
750 | #
751 | 
752 | class APNode(object):
753 | 
754 |     def __init__(self, h5node, name = None):
755 |         self._h5node = h5node
756 |         self.name = h5node.name if name is None else name
757 | 
758 |     def is_group(self):
759 |         return isinstance(self._h5node, h5py.Group)
760 | 
761 |     def __contains__(self, item):
762 |         return item in self._h5node
763 | 
764 |     def __getitem__(self, item):
765 |         if isinstance(self._h5node, h5py.Group):
766 |             path = item.split('/')
767 |             if path[0] == '':
768 |                 node = APNode(self._h5node.file)
769 |                 path = path[1:]
770 |             else:
771 |                 node = self
772 |             for item in path:
773 |                 node = node._getitem(item)
774 |             return node
775 |         else:
776 |             return self._h5node[item]
777 | 
778 |     def get(self, item, default):
779 |         try:
780 |             return self[item]
781 |         except:
782 |             return default
783 | 
784 |     def _getitem(self, item):
785 |         node = self._h5node
786 |         if datatype(node) == 'reference':
787 |             _, node = dereference(node)
788 |         node = node[item]
789 |         if datatype(node) == 'reference':
790 |             _, node = dereference(node)
791 |         name = self.name
792 |         if not name.endswith('/'): name += '/'
793 |         name += item
794 |         return APNode(node, name)
795 | 
796 |     def __getattr__(self, attrname):
797 |         return getattr(self._h5node, attrname)
798 | 
799 |     def in_paper(self, paper):
800 |         return paper.file.id == self._h5node.file.id
801 | 
802 | #
803 | # A global dictionary mapping paper_refs to papers.
804 | # Each entry disappears when no reference to the paper remains.
805 | #
806 | _papers = weakref.WeakValueDictionary()
807 | 
808 | # # Close all open referenced papers at interpreter exit,
809 | # # in order to prevent "murdered identifiers" in h5py.
810 | # def _cleanup():
811 | #     for paper in activepapers.storage._papers.values():
812 | #         paper.close()
813 | 
814 | # import atexit
815 | # atexit.register(_cleanup)
816 | # del atexit
817 | 
818 | #
819 | # Dereference a reference node
820 | #
821 | def dereference(ref_node):
822 |     assert datatype(ref_node) == 'reference'
823 |     paper_ref, path = ref_node[()]
824 |     paper = open_paper_ref(ascii(paper_ref))
825 |     return paper, paper.file[path]
826 | 
827 | #
828 | # Open a paper given its reference
829 | #
830 | def open_paper_ref(paper_ref):
831 |     if paper_ref in _papers:
832 |         return _papers[paper_ref]
833 |     paper = ActivePaper(find_in_library(paper_ref), "r")
834 |     _papers[paper_ref] = paper
835 |     return paper
836 | 


--------------------------------------------------------------------------------
/lib/activepapers/url.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | # Python 2/3 compatibility issues
 4 | if sys.version_info[0] == 2:
 5 | 
 6 |     from activepapers.url2 import *
 7 | 
 8 | else:
 9 | 
10 |     from activepapers.url3 import *
11 | 


--------------------------------------------------------------------------------
/lib/activepapers/url2.py:
--------------------------------------------------------------------------------
1 | from urllib2 import urlopen, HTTPError
2 | from urllib import urlretrieve
3 | 


--------------------------------------------------------------------------------
/lib/activepapers/url3.py:
--------------------------------------------------------------------------------
1 | from urllib.request import urlopen, urlretrieve, HTTPError
2 | 


--------------------------------------------------------------------------------
/lib/activepapers/utility.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import time
 3 | 
 4 | # Python 2/3 compatibility issues
 5 | if sys.version_info[0] == 2:
 6 | 
 7 |     from activepapers.utility2 import *
 8 | 
 9 | else:
10 | 
11 |     from activepapers.utility3 import *
12 | 
13 | # Various small functions
14 | 
15 | def datatype(node):
16 |     s = node.attrs.get('ACTIVE_PAPER_DATATYPE', None)
17 |     if s is None:
18 |         return s
19 |     else:
20 |         return ascii(s)
21 | 
22 | def owner(node):
23 |     s = node.attrs.get('ACTIVE_PAPER_GENERATING_CODELET', None)
24 |     if s is None:
25 |         return s
26 |     else:
27 |         return ascii(s)
28 | 
29 | def language(node):
30 |     s = node.attrs.get('ACTIVE_PAPER_LANGUAGE', None)
31 |     if s is None:
32 |         return s
33 |     else:
34 |         return ascii(s)
35 | 
36 | def mod_time(node):
37 |     s = node.attrs.get('ACTIVE_PAPER_TIMESTAMP', None)
38 |     if s is None:
39 |         return s
40 |     else:
41 |         return s/1000.
42 | 
43 | def ms_since_epoch():
44 |     return np.int64(1000.*time.time())
45 | 
46 | def timestamp(node, time=None):
47 |     if time is None:
48 |         time = ms_since_epoch()
49 |     else:
50 |         time *= 1000.
51 |     node.attrs['ACTIVE_PAPER_TIMESTAMP'] = time
52 | 
53 | def stamp(node, ap_type, attributes):
54 |     allowed_transformations = {'group': 'data',
55 |                                'data': 'group',
56 |                                'file': 'text'}
57 |     attrs = dict(attributes)
58 |     attrs['ACTIVE_PAPER_DATATYPE'] = ap_type
59 |     for key, value in attrs.items():
60 |         if value is None:
61 |             continue
62 |         if isstring(value):
63 |             previous = node.attrs.get(key, None)
64 |             if previous is None:
65 |                 node.attrs[key] = value
66 |             else:
67 |                 if previous != value:
68 |                     # String attributes can't change when re-stamping...
69 |                     if key == 'ACTIVE_PAPER_DATATYPE' \
70 |                        and allowed_transformations.get(previous) == value:
71 |                         # ...with a few exceptions
72 |                         node.attrs[key] = value
73 |                     else:
74 |                         raise ValueError("%s: %s != %s"
75 |                                          % (key, value, previous))
76 |         elif key == 'ACTIVE_PAPER_DEPENDENCIES':
77 |             node.attrs.create(key, np.array(value, dtype=object),
78 |                               shape = (len(value),), dtype=h5vstring)
79 |         else:
80 |             raise ValueError("unexpected key %s" % key)
81 |     timestamp(node)
82 | 
83 | def path_in_section(path, section):
84 |     if not isstring(path):
85 |         raise ValueError("type %s where string is expected"
86 |                          % str(type(path)))
87 |     if path.startswith("/"):
88 |         return section + path
89 |     else:
90 |         return path
91 | 
92 | def datapath(path):
93 |     return path_in_section(path, "/data")
94 | 
95 | def codepath(path):
96 |     return path_in_section(path, "/code")
97 | 


--------------------------------------------------------------------------------
/lib/activepapers/utility2.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import h5py
 3 | 
 4 | def ascii(string):
 5 |     return string
 6 | 
 7 | def utf8(string):
 8 |     return string.decode('utf-8')
 9 | 
10 | def py_str(byte_string):
11 |     if isinstance(byte_string, np.ndarray):
12 |         return str(byte_string)
13 |     else:
14 |         assert isinstance(byte_string, str)
15 |         return byte_string
16 | 
17 | def isstring(s):
18 |     return isinstance(s, basestring)
19 | 
20 | def execcode(s, globals, locals=None):
21 |     if locals is None:
22 |         exec s in globals
23 |     else:
24 |         exec s in globals, locals
25 | 
26 | h5vstring = h5py.special_dtype(vlen=str)
27 | 
28 | import __builtin__ as builtins
29 | import activepapers.builtins2 as ap_builtins
30 | 
31 | raw_input = builtins.raw_input
32 | 


--------------------------------------------------------------------------------
/lib/activepapers/utility3.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import h5py
 3 | 
 4 | def ascii(string):
 5 |     if isinstance(string, bytes):
 6 |         return bytes.decode(string, 'ASCII')
 7 |     return string
 8 | 
 9 | def utf8(string):
10 |     if isinstance(string, bytes):
11 |         return bytes.decode(string, 'utf-8')
12 |     return string
13 | 
14 | def py_str(byte_string):
15 |     if isinstance(byte_string, np.ndarray):
16 |         byte_string = bytes(byte_string)
17 |     assert isinstance(byte_string, bytes)
18 |     return byte_string.decode('ASCII')
19 | 
20 | def isstring(s):
21 |     return isinstance(s, str)
22 | 
23 | def execcode(code, globals, locals=None):
24 |     if locals is None:
25 |         exec(code, globals)
26 |     else:
27 |         exec(code, globals, locals)
28 | 
29 | h5vstring = h5py.special_dtype(vlen=bytes)
30 | 
31 | import builtins
32 | import activepapers.builtins3 as ap_builtins
33 | # Replace the "del exec" in builtins3 by something that's not a
34 | # syntax error under Python 2.
35 | del ap_builtins.__dict__['exec']
36 | 
37 | raw_input = builtins.input
38 | 


--------------------------------------------------------------------------------
/lib/activepapers/version.py:
--------------------------------------------------------------------------------
1 | version = '0.2.2'
2 | 


--------------------------------------------------------------------------------
/scripts/aptool:
--------------------------------------------------------------------------------
  1 | #!python
  2 | # -*- python -*-
  3 | 
  4 | import argparse
  5 | import logging
  6 | import os
  7 | import sys
  8 | 
  9 | import activepapers
 10 | import activepapers.cli
 11 | 
 12 | 
 13 | ##################################################
 14 | 
 15 | parser = argparse.ArgumentParser(description="Management of ActivePapers")
 16 | parser.add_argument('-p', '--paper', type=str,
 17 |                     help="name of the HDF5 file containing the ActivePaper")
 18 | parser.add_argument('--log', type=str,
 19 |                     help="logging level (default: WARNING)")
 20 | parser.add_argument('--logfile', type=str,
 21 |                     help="name of the file to which logging "
 22 |                          "information is written")
 23 | parser.add_argument('--version', action='version',
 24 |                     version=activepapers.__version__)
 25 | subparsers = parser.add_subparsers(help="commands")
 26 | 
 27 | ##################################################
 28 | 
 29 | create_parser = subparsers.add_parser('create', help="Create a new ActivePaper")
 30 | create_parser.add_argument('-d', metavar='DEPENDENCY',
 31 |                            type=str, action='append',
 32 |                            help="Python packages that the ActivePaper "
 33 |                                 "depends on")
 34 | create_parser.set_defaults(func=activepapers.cli.create)
 35 | 
 36 | ##################################################
 37 | 
 38 | ls_parser = subparsers.add_parser('ls', help="Show datasets")
 39 | ls_parser.add_argument('--long', '-l', action='store_true',
 40 |                        help="long format")
 41 | ls_parser.add_argument('--type', '-t',
 42 |                        help="show only items of the given type")
 43 | ls_parser.add_argument('pattern', nargs='*',
 44 |                        help="name pattern")
 45 | ls_parser.set_defaults(func=activepapers.cli.ls)
 46 | 
 47 | ##################################################
 48 | 
 49 | rm_parser = subparsers.add_parser('rm', help="Remove datasets and "
 50 |                                              "everything depending on them")
 51 | rm_parser.add_argument('--force', '-f', action='store_true',
 52 |                        help="no confirmation prompt")
 53 | rm_parser.add_argument('pattern', nargs='*',
 54 |                        help="name pattern")
 55 | rm_parser.set_defaults(func=activepapers.cli.rm)
 56 | 
 57 | ##################################################
 58 | 
 59 | dummy_parser = subparsers.add_parser('dummy', help="Replace datasets by "
 60 |                                                    "dummies")
 61 | dummy_parser.add_argument('--force', '-f', action='store_true',
 62 |                        help="no confirmation prompt")
 63 | dummy_parser.add_argument('pattern', nargs='*',
 64 |                        help="name pattern")
 65 | dummy_parser.set_defaults(func=activepapers.cli.dummy)
 66 | 
 67 | ##################################################
 68 | 
 69 | set_parser = subparsers.add_parser('set', help="Set dataset to the value "
 70 |                                                "of a Python expression")
 71 | set_parser.add_argument('dataset', type=str, help="dataset name")
 72 | set_parser.add_argument('expr', type=str, help="expression")
 73 | set_parser.set_defaults(func=activepapers.cli.set_)
 74 | 
 75 | ##################################################
 76 | 
 77 | group_parser = subparsers.add_parser('group', help="Create group")
 78 | group_parser.add_argument('group_name', type=str, help="group name")
 79 | group_parser.set_defaults(func=activepapers.cli.group)
 80 | 
 81 | ##################################################
 82 | 
 83 | extract_parser = subparsers.add_parser('extract',
 84 |                                        help="Copy internal file or "
 85 |                                             " source code item to a file")
 86 | extract_parser.add_argument('dataset', type=str, help="dataset name")
 87 | extract_parser.add_argument('filename',type=str,
 88 |                             help="name of file to extract to")
 89 | extract_parser.set_defaults(func=activepapers.cli.extract)
 90 | 
 91 | ##################################################
 92 | 
 93 | calclet_parser = subparsers.add_parser('calclet',
 94 |                                        help="Store a calclet"
 95 |                                             " inside the ActivePaper")
 96 | calclet_parser.add_argument('dataset', type=str, help="dataset name")
 97 | calclet_parser.add_argument('filename',type=str,
 98 |                             help="name of the Python script")
 99 | calclet_parser.add_argument('--run', '-r', action='store_true',
100 |                             help="run the calclet")
101 | calclet_parser.set_defaults(func=activepapers.cli.calclet)
102 | 
103 | ##################################################
104 | 
105 | importlet_parser = subparsers.add_parser('importlet',
106 |                                          help="Store a importlet"
107 |                                               " inside the ActivePaper")
108 | importlet_parser.add_argument('dataset', type=str, help="dataset name")
109 | importlet_parser.add_argument('filename',type=str,
110 |                               help="name of the Python script")
111 | importlet_parser.add_argument('--run', '-r', action='store_true',
112 |                               help="run the importlet")
113 | importlet_parser.set_defaults(func=activepapers.cli.importlet)
114 | 
115 | ##################################################
116 | 
117 | import_parser = subparsers.add_parser('import',
118 |                                       help="Import a Python module"
119 |                                            " into the ActivePaper")
120 | import_parser.add_argument('module',type=str,
121 |                            help="name of the Python module")
122 | import_parser.set_defaults(func=activepapers.cli.import_module)
123 | 
124 | ##################################################
125 | 
126 | run_parser = subparsers.add_parser('run',
127 |                                 help="Run a calclet or importlet")
128 | run_parser.add_argument('codelet', type=str, help="codelet name")
129 | run_parser.add_argument('--debug', '-d', action='store_true',
130 |                          help="drop into the debugger in case of an exception")
131 | run_parser.add_argument('--profile',
132 |                          help="run under profiler control")
133 | run_parser.add_argument('--checkin', '-c', action='store_true',
134 |                          help="do 'checkin code' before running the codelet")
135 | run_parser.set_defaults(func=activepapers.cli.run)
136 | 
137 | ##################################################
138 | 
139 | update_parser = subparsers.add_parser('update',
140 |                                       help="Update dummy or stale datasets "
141 |                                            "by running the required calclets")
142 | update_parser.add_argument('--verbose', '-v', action='store_true',
143 |                            help="show each step being executed")
144 | update_parser.set_defaults(func=activepapers.cli.update)
145 | 
146 | ##################################################
147 | 
148 | checkin_parser = subparsers.add_parser('checkin',
149 |                                        help="Update files, code, and text"
150 |                                             "from the working directory")
151 | checkin_parser.add_argument('--type', '-t',
152 |                              help="ActivePapers datatype")
153 | checkin_parser.add_argument('file', nargs='*',
154 |                              help="filename")
155 | checkin_parser.add_argument('--force', '-f', action='store_true',
156 |                              help="Update even if replacement is older")
157 | checkin_parser.add_argument('--dry-run', '-n', action='store_true',
158 |                              help="Display actions but don't execute them")
159 | checkin_parser.set_defaults(func=activepapers.cli.checkin)
160 | 
161 | ##################################################
162 | 
163 | checkout_parser = subparsers.add_parser('checkout',
164 |                                         help="Extract all files, code, and"
165 |                                              "text to the working directory")
166 | checkout_parser.add_argument('--type', '-t',
167 |                              help="check out only items of the given type")
168 | checkout_parser.add_argument('pattern', nargs='*',
169 |                              help="name pattern")
170 | checkout_parser.add_argument('--dry-run', '-n', action='store_true',
171 |                              help="Display actions but don't execute them")
172 | checkout_parser.set_defaults(func=activepapers.cli.checkout)
173 | 
174 | ##################################################
175 | 
176 | ln_parser = subparsers.add_parser('ln',
177 |                                   help="Create a link to another ActivePaper")
178 | ln_parser.add_argument('reference', type=str, help="reference to a dataset "
179 |                                                    "in another ActivePaper")
180 | ln_parser.add_argument('name', type=str, help="name of the link")
181 | ln_parser.set_defaults(func=activepapers.cli.ln)
182 | 
183 | ##################################################
184 | 
185 | cp_parser = subparsers.add_parser('cp',
186 |                                   help="Copy a dataset or group from "
187 |                                         "another ActivePaper")
188 | cp_parser.add_argument('reference', type=str, help="reference to a dataset "
189 |                                                    "in another ActivePaper")
190 | cp_parser.add_argument('name', type=str, help="name of the copy")
191 | cp_parser.set_defaults(func=activepapers.cli.cp)
192 | 
193 | ##################################################
194 | 
195 | refs_parser = subparsers.add_parser('refs',
196 |                                   help="Show references to other ActivePapers")
197 | refs_parser.add_argument('--verbose', '-v', action='store_true',
198 |                          help="Display referenced items")
199 | refs_parser.set_defaults(func=activepapers.cli.refs)
200 | 
201 | ##################################################
202 | 
203 | edit_parser = subparsers.add_parser('edit',
204 |                                      help="Edit an extractable dataset")
205 | edit_parser.add_argument('dataset', type=str, help="dataset name")
206 | edit_parser.set_defaults(func=activepapers.cli.edit)
207 | 
208 | ##################################################
209 | 
210 | console_parser = subparsers.add_parser('console',
211 |                                        help="Run a Python interactive console"
212 |                                             " inside the ActivePaper")
213 | console_parser.add_argument('--modify', '-m', action='store_true',
214 |                             help="Permit modifications (use with care)")
215 | console_parser.set_defaults(func=activepapers.cli.console)
216 | 
217 | ##################################################
218 | 
219 | ipython_parser = subparsers.add_parser('ipython',
220 |                                        help="Run an IPython shell"
221 |                                             " inside the ActivePaper")
222 | ipython_parser.add_argument('--modify', '-m', action='store_true',
223 |                             help="Permit modifications (use with care)")
224 | ipython_parser.set_defaults(func=activepapers.cli.ipython)
225 | 
226 | ##################################################
227 | 
228 | def setup_logging(log, logfile):
229 |     if log is None:
230 |         log = "WARNING"
231 |     if log not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
232 |         sys.stderr.write("invalid logging level %s\n" % log)
233 |     opts = dict(level=getattr(logging, log),
234 |                 format="%(asctime)s %(levelname)s: %(message)s",
235 |                 datefmt="%Y-%m-%d/%H:%M:%S")
236 |     if logfile is not None:
237 |         opts["filename"] = logfile
238 |         opts["filemode"] = "a"
239 |     logging.basicConfig(**opts)
240 | 
241 | ##################################################
242 | 
243 | parsed_args = parser.parse_args()
244 | try:
245 |     func = parsed_args.func
246 | except AttributeError:
247 |     func = None
248 | args = dict(parsed_args.__dict__)
249 | setup_logging(args['log'], args['logfile'])
250 | try:
251 |     del args['func']
252 | except KeyError:
253 |     pass
254 | del args['log']
255 | del args['logfile']
256 | try:
257 |     if func is not None:
258 |         func(**args)
259 | except activepapers.cli.CLIExit:
260 |     pass
261 | finally:
262 |     logging.shutdown()
263 | 
264 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from setuptools import setup, Command
 4 | import copy
 5 | import os
 6 | import sys
 7 | 
 8 | package_dir = "lib"
 9 | script_dir = "scripts"
10 | 
11 | 
12 | with open('README.md') as file:
13 |     long_description = file.read()
14 |     long_description = long_description[:long_description.find("\n\n")]
15 | 
16 | class Dummy:
17 |     pass
18 | version = Dummy()
19 | exec(open('lib/activepapers/version.py').read(), version.__dict__)
20 | 
21 | setup(name='ActivePapers.Py',
22 |       version=version.version,
23 |       description='Executable papers containing Python code',
24 |       long_description=long_description,
25 |       author='Konrad Hinsen',
26 |       author_email='research@khinsen.fastmail.net',
27 |       url='http://github.com/activepapers/activepapers-python',
28 |       license='BSD',
29 |       package_dir = {'': package_dir},
30 |       packages=['activepapers'],
31 |       scripts=[os.path.join(script_dir, s) for s in os.listdir(script_dir)],
32 |       platforms=['any'],
33 |       install_requires=[
34 |           "numpy>=1.6",
35 |           "h5py>=2.2",
36 |           "tempdir>=0.6"
37 |       ],
38 |       provides=["ActivePapers"],
39 |       classifiers=[
40 |           "Development Status :: 3 - Alpha",
41 |           "Intended Audience :: Science/Research",
42 |           "License :: OSI Approved :: BSD License",
43 |           "Operating System :: OS Independent",
44 |           "Programming Language :: Python :: 2.7",
45 |           "Programming Language :: Python :: 3.4",
46 |           "Programming Language :: Python :: 3.5",
47 |           "Programming Language :: Python :: 3.6",
48 |           "Topic :: Scientific/Engineering",
49 |       ]
50 |   )
51 | 


--------------------------------------------------------------------------------
/tests/foo/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = 42
2 | 


--------------------------------------------------------------------------------
/tests/foo/bar.py:
--------------------------------------------------------------------------------
1 | def frobnicate(x):
2 |     return str(x)
3 | 


--------------------------------------------------------------------------------
/tests/run_all_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | for test in test*.py
4 | do
5 |   $1 $test
6 | done
7 | 


--------------------------------------------------------------------------------
/tests/test_basics.py:
--------------------------------------------------------------------------------
  1 | # Extensive tests on a very simple ActivePaper
  2 | 
  3 | import collections
  4 | import os
  5 | import numpy as np
  6 | import h5py
  7 | import tempdir
  8 | from activepapers.storage import ActivePaper
  9 | from activepapers.utility import ascii
 10 | 
 11 | 
 12 | def make_simple_paper(filename):
 13 | 
 14 |     paper = ActivePaper(filename, "w")
 15 | 
 16 |     #paper.data.create_dataset("frequency", data=0.2)
 17 |     #paper.data.create_dataset("time", data=0.1*np.arange(100))
 18 | 
 19 |     init = paper.create_importlet("initialize",
 20 | """
 21 | from activepapers.contents import data
 22 | import numpy as np
 23 | 
 24 | data['frequency'] = 0.2
 25 | data['time'] = 0.1*np.arange(100)
 26 | """)
 27 |     init.run()
 28 | 
 29 |     calc_sine = paper.create_calclet("calc_sine",
 30 | """
 31 | from activepapers.contents import data
 32 | import numpy as np
 33 | 
 34 | frequency = data['frequency'][...]
 35 | time = data['time'][...]
 36 | data.create_dataset("sine", data=np.sin(2.*np.pi*frequency*time))
 37 | """)
 38 |     calc_sine.run()
 39 | 
 40 |     paper.close()
 41 | 
 42 | 
 43 | def make_paper_with_internal_module(filename):
 44 | 
 45 |     paper = ActivePaper(filename, "w")
 46 | 
 47 |     paper.add_module("my_math",
 48 | """
 49 | import numpy as np
 50 | 
 51 | def my_func(x):
 52 |     return np.sin(x)
 53 | """)
 54 | 
 55 |     paper.data.create_dataset("frequency", data=0.2)
 56 |     paper.data.create_dataset("time", data=0.1*np.arange(100))
 57 | 
 58 |     calc_sine = paper.create_calclet("calc_sine",
 59 | """
 60 | from activepapers.contents import data
 61 | import numpy as np
 62 | from my_math import my_func
 63 | 
 64 | frequency = data['frequency'][...]
 65 | time = data['time'][...]
 66 | data.create_dataset("sine", data=my_func(2.*np.pi*frequency*time))
 67 | """)
 68 |     calc_sine.run()
 69 | 
 70 |     paper.close()
 71 | 
 72 | 
 73 | def assert_almost_equal(x, y, tolerance):
 74 |     assert (np.fabs(np.array(x)-np.array(y)) < tolerance).all()
 75 | 
 76 | 
 77 | def assert_valid_paper(h5file):
 78 |     assert h5file.attrs['DATA_MODEL'] == ascii('active-papers-py')
 79 |     assert h5file.attrs['DATA_MODEL_MAJOR_VERSION'] == 0
 80 |     assert h5file.attrs['DATA_MODEL_MINOR_VERSION'] == 1
 81 | 
 82 |     for group in ['code', 'data', 'documentation']:
 83 |         assert group in h5file
 84 |         assert isinstance(h5file[group], h5py.Group)
 85 | 
 86 |     history = h5file['history']
 87 |     assert history.shape == (1,)
 88 |     opened = history[0]['opened']
 89 |     closed = history[0]['closed']
 90 |     def check_timestamp(name, node):
 91 |         t = node.attrs.get('ACTIVE_PAPER_TIMESTAMP', None)
 92 |         if t is not None:
 93 |             assert t >= opened
 94 |             assert t <= closed
 95 |     h5file.visititems(check_timestamp)
 96 | 
 97 | 
 98 | def check_hdf5_file(filename, ref_all_paths, ref_deps):
 99 |     h5file = h5py.File(filename, "r")
100 |     all_paths = []
101 |     h5file.visit(all_paths.append)
102 |     all_paths.sort()
103 |     assert all_paths == ref_all_paths
104 |     assert_valid_paper(h5file)
105 |     assert_almost_equal(h5file["data/frequency"][...], 0.2, 1.e-15)
106 |     assert_almost_equal(h5file["data/time"][...],
107 |                         0.1*np.arange(100),
108 |                         1.e-15)
109 |     assert_almost_equal(h5file["data/sine"][...],
110 |                         np.sin(0.04*np.pi*np.arange(100)),
111 |                         1.e-10)
112 |     for path in ['data/frequency', 'data/sine', 'data/time']:
113 |         assert h5file[path].attrs['ACTIVE_PAPER_DATATYPE'] == "data"
114 |         assert h5file[path].attrs['ACTIVE_PAPER_TIMESTAMP'] > 1.e9
115 |     for path in ['code/calc_sine']:
116 |         assert h5file[path].attrs['ACTIVE_PAPER_DATATYPE'] == "calclet"
117 |     deps = h5file["data/sine"].attrs['ACTIVE_PAPER_DEPENDENCIES']
118 |     assert list(ascii(p) for p in deps) \
119 |             == [ascii(p) for p in ref_deps]
120 |     assert h5file["data/sine"].attrs['ACTIVE_PAPER_GENERATING_CODELET'] \
121 |            == "/code/calc_sine"
122 |     h5file.close()
123 | 
124 | 
125 | def check_paper(filename, ref_items, ref_deps, ref_hierarchy):
126 |     paper = ActivePaper(filename, "r")
127 |     items = sorted([item.name for item in paper.iter_items()])
128 |     assert items == ref_items
129 |     items_with_deps = sorted([item.name for item in paper.iter_items()
130 |                               if paper.has_dependencies(item)])
131 |     assert items_with_deps == ['/data/sine']
132 |     deps = dict((ascii(item.name),
133 |                  sorted(list(ascii(dep.name)
134 |                              for dep in paper.iter_dependencies(item))))
135 |                 for item in paper.iter_items())
136 |     assert deps == ref_deps
137 |     graph = collections.defaultdict(set)
138 |     for item, deps in ref_deps.items():
139 |         for d in deps:
140 |             graph[d].add(item)
141 |     assert graph == paper.dependency_graph()
142 |     hierarchy = [sorted([ascii(item.name) for item in items])
143 |                  for items in paper.dependency_hierarchy()]
144 |     assert hierarchy == ref_hierarchy
145 |     calclets = paper.calclets()
146 |     assert len(calclets) == 1
147 |     assert ascii(calclets['/code/calc_sine'].path) == '/code/calc_sine'
148 |     paper.close()
149 | 
150 | 
151 | def test_simple_paper():
152 |     with tempdir.TempDir() as t:
153 |         filename1 = os.path.join(t, "simple1.ap")
154 |         filename2 = os.path.join(t, "simple2.ap")
155 |         make_simple_paper(filename1)
156 |         all_paths = ['README', 'code', 'code/calc_sine', 'code/initialize',
157 |                      'data', 'data/frequency', 'data/sine', 'data/time',
158 |                      'documentation', 'external-dependencies', 'history']
159 |         all_items = ['/code/calc_sine', '/code/initialize', '/data/frequency',
160 |                      '/data/sine', '/data/time']
161 |         all_deps = {'/data/sine': ["/code/calc_sine",
162 |                                    "/data/frequency",
163 |                                    "/data/time"],
164 |                     '/data/time': [],
165 |                     '/data/frequency': [],
166 |                     '/code/calc_sine': [],
167 |                     '/code/initialize': []}
168 |         sine_deps = ["/code/calc_sine",
169 |                      "/data/frequency",
170 |                      "/data/time"]
171 |         hierarchy = [['/code/calc_sine', '/code/initialize',
172 |                       '/data/frequency', '/data/time'],
173 |                      ['/data/sine']]
174 |         check_hdf5_file(filename1, all_paths, sine_deps)
175 |         check_paper(filename1, all_items, all_deps, hierarchy)
176 |         with ActivePaper(filename1, "r") as paper:
177 |             paper.rebuild(filename2)
178 |         check_hdf5_file(filename2, all_paths, sine_deps)
179 |         check_paper(filename2, all_items, all_deps, hierarchy)
180 | 
181 | def test_paper_with_internal_module():
182 |     with tempdir.TempDir() as t:
183 |         filename1 = os.path.join(t, "im1.ap")
184 |         filename2 = os.path.join(t, "im2.ap")
185 |         make_paper_with_internal_module(filename1)
186 |         all_paths = ['README', 'code', 'code/calc_sine',
187 |                      'code/python-packages', 'code/python-packages/my_math',
188 |                      'data', 'data/frequency', 'data/sine', 'data/time',
189 |                      'documentation', 'external-dependencies', 'history']
190 |         all_items = ['/code/calc_sine', '/code/python-packages/my_math',
191 |                      '/data/frequency', '/data/sine', '/data/time']
192 |         all_deps = {'/data/sine': ["/code/calc_sine",
193 |                                    "/code/python-packages/my_math",
194 |                                    "/data/frequency",
195 |                                    "/data/time"],
196 |                     '/data/time': [],
197 |                     '/data/frequency': [],
198 |                     '/code/calc_sine': [],
199 |                     '/code/python-packages/my_math': []}
200 |         sine_deps = ["/code/calc_sine",
201 |                      "/code/python-packages/my_math",
202 |                      "/data/frequency",
203 |                      "/data/time"]
204 |         hierarchy = [['/code/calc_sine', '/code/python-packages/my_math',
205 |                       '/data/frequency', '/data/time'],
206 |                      ['/data/sine']]
207 |         check_hdf5_file(filename1, all_paths, sine_deps)
208 |         check_paper(filename1, all_items, all_deps, hierarchy)
209 |         with ActivePaper(filename1, "r") as paper:
210 |             paper.rebuild(filename2)
211 |         check_hdf5_file(filename2, all_paths, sine_deps)
212 |         check_paper(filename2, all_items, all_deps, hierarchy)
213 | 


--------------------------------------------------------------------------------
/tests/test_exploration.py:
--------------------------------------------------------------------------------
 1 | # Test the exploration module
 2 | 
 3 | import os
 4 | import numpy as np
 5 | import tempdir
 6 | from activepapers.storage import ActivePaper
 7 | from activepapers import library
 8 | from activepapers.exploration import ActivePaper as ActivePaperExploration
 9 | 
10 | def make_local_paper(filename):
11 | 
12 |     paper = ActivePaper(filename, "w")
13 | 
14 |     paper.data.create_dataset("frequency", data=0.2)
15 |     paper.data.create_dataset("time", data=0.1*np.arange(100))
16 | 
17 |     paper.add_module("my_math",
18 | """
19 | import numpy as np
20 | 
21 | def my_func(x):
22 |     return np.sin(x)
23 | """)
24 | 
25 |     paper.close()
26 | 
27 | def check_local_paper(filename):
28 |     ap = ActivePaperExploration(filename)
29 |     from my_math import my_func
30 |     frequency = ap.data['frequency'][...]
31 |     time = ap.data['time'][...]
32 |     sine = my_func(2.*np.pi*frequency*time)
33 |     assert (sine == np.sin(2.*np.pi*frequency*time)).all()
34 |     ap.close()
35 | 
36 | def test_local_paper():
37 |     with tempdir.TempDir() as t:
38 |         filename = os.path.join(t, "test.ap")
39 | 
40 |         make_local_paper(filename)
41 |         check_local_paper(filename)
42 | 
43 | if "NO_NETWORK_ACCESS" not in os.environ:
44 |     def test_published_paper():
45 |         with tempdir.TempDir() as t:
46 |             library.library = [t]
47 |             ap = ActivePaperExploration("doi:10.6084/m9.figshare.808595")
48 |             import time_series
49 |             ts = np.arange(10)
50 |             assert time_series.integral(ts, 1)[-1] == 40.5
51 |             ap.close()
52 | 


--------------------------------------------------------------------------------
/tests/test_features.py:
--------------------------------------------------------------------------------
  1 | # Test specific features of ActivePapers
  2 | # coding: utf-8
  3 | 
  4 | import os
  5 | import numpy as np
  6 | import h5py
  7 | import tempdir
  8 | from nose.tools import raises
  9 | from activepapers.storage import ActivePaper
 10 | from activepapers.utility import ascii
 11 | 
 12 | def test_groups_as_items():
 13 |     with tempdir.TempDir() as t:
 14 |         filename = os.path.join(t, "paper.ap")
 15 |         paper = ActivePaper(filename, 'w')
 16 |         group1 = paper.data.create_group('group1')
 17 |         group1.create_dataset('value', data=42)
 18 |         group2 = paper.data.create_group('group2')
 19 |         group2.mark_as_data_item()
 20 |         group2.create_dataset('array', data=np.arange(10))
 21 |         items = sorted([item.name for item in paper.iter_items()])
 22 |         assert items == ['/data/group1/value', '/data/group2']
 23 |         groups = sorted([group.name for group in paper.iter_groups()])
 24 |         assert groups == ['/data/group1']
 25 |         script = paper.create_calclet("script1",
 26 | """
 27 | from activepapers.contents import data
 28 | x1 = data['group2']['array'][...]
 29 | x2 = data['group1']['value'][...]
 30 | data.create_dataset('sum1', data=x1+x2)
 31 | """)
 32 |         script.run()
 33 |         assert (paper.data['sum1'][...] == np.arange(42,52)).all()
 34 |         script = paper.create_calclet("script2",
 35 | """
 36 | from activepapers.contents import data
 37 | x1 = data['/group2/array'][...]
 38 | g = data['group1']
 39 | x2 = g['/group1/value'][...]
 40 | data.create_dataset('sum2', data=x1+x2)
 41 | """)
 42 |         script.run()
 43 |         assert (paper.data['sum2'][...] == np.arange(42,52)).all()
 44 |         deps = [sorted([ascii(item.name) for item in level])
 45 |                 for level in paper.dependency_hierarchy()]
 46 |         assert deps == [['/code/script1', '/code/script2',
 47 |                          '/data/group1/value', '/data/group2'],
 48 |                         ['/data/sum1', '/data/sum2']]
 49 |         deps = paper.data['sum1']._node.attrs['ACTIVE_PAPER_DEPENDENCIES']
 50 |         deps = sorted(ascii(d) for d in deps)
 51 |         assert deps == ['/code/script1',
 52 |                         '/data/group1/value', '/data/group2']
 53 |         deps = paper.data['sum2']._node.attrs['ACTIVE_PAPER_DEPENDENCIES']
 54 |         deps = sorted(ascii(d) for d in deps)
 55 |         assert deps == ['/code/script2',
 56 |                         '/data/group1/value', '/data/group2']
 57 |         paper.close()
 58 | 
 59 | def test_groups():
 60 |     with tempdir.TempDir() as t:
 61 |         filename = os.path.join(t, "paper.ap")
 62 |         paper = ActivePaper(filename, 'w')
 63 |         group = paper.data.create_group('group')
 64 |         subgroup = group.create_group('subgroup')
 65 |         group['data1'] = np.arange(10)
 66 |         group['data2'] = 42
 67 |         assert sorted([g.name for g in paper.iter_groups()]) \
 68 |                == ['/data/group', '/data/group/subgroup']
 69 |         assert sorted(list(node for node in group)) \
 70 |                == ['data1', 'data2', 'subgroup']
 71 |         assert group['data1'][...].shape == (10,)
 72 |         assert group['data2'][...] == 42
 73 |         assert paper.data.parent is paper.data
 74 |         assert group.parent is paper.data
 75 |         assert group['data1'].parent is group
 76 |         assert group['data2'].parent is group
 77 |         script = paper.create_calclet("script",
 78 | """
 79 | from activepapers.contents import data
 80 | assert data.parent is data
 81 | assert data._codelet is not None
 82 | assert data._codelet.path == '/code/script'
 83 | group = data['group']
 84 | assert group.parent is data
 85 | assert group._codelet is not None
 86 | assert group._codelet.path == '/code/script'
 87 | """)
 88 |         script.run()
 89 |         paper.close()
 90 | 
 91 | def test_datasets():
 92 |     with tempdir.TempDir() as t:
 93 |         filename = os.path.join(t, "paper.ap")
 94 |         paper = ActivePaper(filename, 'w')
 95 |         dset = paper.data.create_dataset("MyDataset", (10,10,10), 'f')
 96 |         assert len(dset) == 10
 97 |         assert dset[0, 0, 0].shape == ()
 98 |         assert dset[0, 2:10, 1:9:3].shape == (8, 3)
 99 |         assert dset[:, ::2, 5].shape == (10, 5)
100 |         assert dset[0].shape == (10, 10)
101 |         assert dset[1, 5].shape == (10,)
102 |         assert dset[0, ...].shape == (10, 10)
103 |         assert dset[..., 6].shape == (10, 10)
104 |         array = np.arange(100)
105 |         dset = paper.data.create_dataset("MyArray", data=array)
106 |         assert len(dset) == 100
107 |         assert (dset[array > 50] == np.arange(51, 100)).all()
108 |         dset[:20] = 42
109 |         assert (dset[...] == np.array(20*[42]+list(range(20, 100)))).all()
110 |         paper.data['a_number'] = 42
111 |         assert paper.data['a_number'][()] == 42
112 |         paper.close()
113 | 
114 | def test_attrs():
115 |     with tempdir.TempDir() as t:
116 |         filename = os.path.join(t, "paper.ap")
117 |         paper = ActivePaper(filename, 'w')
118 |         group = paper.data.create_group('group')
119 |         ds = group.create_dataset('value', data=42)
120 |         group.mark_as_data_item()
121 |         assert len(group.attrs) == 0
122 |         group.attrs['foo'] = 'bar'
123 |         assert len(group.attrs) == 1
124 |         assert list(group.attrs) == ['foo']
125 |         assert group.attrs['foo'] == 'bar'
126 |         assert len(ds.attrs) == 0
127 |         ds.attrs['foo'] = 'bar'
128 |         assert len(ds.attrs) == 1
129 |         assert list(ds.attrs) == ['foo']
130 |         assert ds.attrs['foo'] == 'bar'
131 |         paper.close()
132 | 
133 | def test_dependencies():
134 |     with tempdir.TempDir() as t:
135 |         filename = os.path.join(t, "paper.ap")
136 |         paper = ActivePaper(filename, 'w')
137 |         paper.data.create_dataset('e', data = np.e)
138 |         paper.data.create_dataset('pi', data = np.pi)
139 |         script = paper.create_calclet("script",
140 | """
141 | from activepapers.contents import data
142 | import numpy as np
143 | e = data['e'][...]
144 | sum = data.create_dataset('sum', shape=(1,), dtype=np.float)
145 | pi = data['pi'][...]
146 | sum[0] = e+pi
147 | """)
148 |         script.run()
149 |         deps = [ascii(item.name)
150 |                 for item in paper.iter_dependencies(paper.data['sum']._node)]
151 |         assert sorted(deps) == ['/code/script', '/data/e', '/data/pi']
152 |         assert not paper.is_stale(paper.data['sum']._node)
153 |         del paper.data['e']
154 |         paper.data['e'] = 0.
155 |         assert paper.is_stale(paper.data['sum']._node)
156 |         paper.close()
157 | 
158 | def test_internal_files():
159 |     with tempdir.TempDir() as t:
160 |         filename = os.path.join(t, "paper.ap")
161 |         paper = ActivePaper(filename, 'w')
162 |         script = paper.create_calclet("write1",
163 | """
164 | from activepapers.contents import open
165 | 
166 | f = open('numbers1', 'w')
167 | for i in range(10):
168 |     f.write(str(i)+'\\n')
169 | f.close()
170 | """)
171 |         script.run()
172 |         script = paper.create_calclet("write2",
173 | """
174 | from activepapers.contents import open
175 | 
176 | with open('numbers', 'w') as f:
177 |     for i in range(10):
178 |         f.write(str(i)+'\\n')
179 | """)
180 |         script.run()
181 |         script = paper.create_calclet("write3",
182 | """
183 | from activepapers.contents import open
184 | 
185 | with open('empty', 'w') as f:
186 |     pass
187 | """)
188 |         script.run()
189 |         script = paper.create_calclet("write4",
190 | u"""
191 | from activepapers.contents import open
192 | 
193 | with open('utf8', 'w', encoding='utf-8') as f:
194 |     f.write(u'déjà')
195 | """)
196 |         script.run()
197 |         script = paper.create_calclet("read1",
198 | """
199 | from activepapers.contents import open
200 | 
201 | f = open('numbers')
202 | for i in range(10):
203 |     assert f.readline().strip() == str(i)
204 | f.close()
205 | """)
206 |         script.run()
207 |         script = paper.create_calclet("read2",
208 | """
209 | from activepapers.contents import open
210 | 
211 | f = open('numbers')
212 | data = [int(line.strip()) for line in f]
213 | f.close()
214 | assert data == list(range(10))
215 | """)
216 |         script.run()
217 |         script = paper.create_calclet("read3",
218 | """
219 | from activepapers.contents import open
220 | 
221 | f = open('empty')
222 | data = f.read()
223 | f.close()
224 | assert len(data) == 0
225 | """)
226 |         script.run()
227 |         script = paper.create_calclet("read4",
228 | u"""
229 | from activepapers.contents import open
230 | 
231 | f = open('utf8', encoding='utf-8')
232 | data = f.read()
233 | f.close()
234 | assert data == u'déjà'
235 | """)
236 |         script.run()
237 |         script = paper.create_calclet("convert_to_binary",
238 | """
239 | from activepapers.contents import open
240 | import struct
241 | 
242 | with open('numbers') as f:
243 |     data = [int(line.strip()) for line in f]
244 | f = open('binary_numbers', 'wb')
245 | f.write(struct.pack(len(data)*'h', *data))
246 | f.close()
247 | """)
248 |         script.run()
249 |         script = paper.create_calclet("read_binary",
250 | """
251 | from activepapers.contents import open
252 | import struct
253 | 
254 | f = open('binary_numbers', 'rb')
255 | assert struct.unpack(10*'h', f.read()) == tuple(range(10))
256 | f.close()
257 | """)
258 |         script.run()
259 |         script = paper.create_calclet("write_documentation",
260 | """
261 | from activepapers.contents import open_documentation
262 | 
263 | with open_documentation('hello.txt', 'w') as f:
264 |     f.write('Hello world!\\n')
265 | """)
266 |         script.run()
267 |         h = [sorted(list(ascii(item.name) for item in step))
268 |              for step in paper.dependency_hierarchy()]
269 |         print(h)
270 |         assert h == [['/code/convert_to_binary',
271 |                       '/code/read1', '/code/read2', '/code/read3',
272 |                       '/code/read4', '/code/read_binary',
273 |                       '/code/write1', '/code/write2', '/code/write3',
274 |                       '/code/write4', '/code/write_documentation'],
275 |                      ['/data/empty', '/data/numbers', '/data/numbers1',
276 |                       '/data/utf8', '/documentation/hello.txt'],
277 |                      ['/data/binary_numbers']]
278 |         paper.close()
279 | 
280 | @raises(ValueError)
281 | def test_overwrite_internal_file():
282 |     with tempdir.TempDir() as t:
283 |         filename = os.path.join(t, "paper.ap")
284 |         paper = ActivePaper(filename, 'w')
285 |         script = paper.create_calclet("write1",
286 | """
287 | from activepapers.contents import open
288 | f = open('numbers', 'w')
289 | for i in range(10):
290 |     f.write(str(i)+'\\n')
291 | f.close()
292 | """)
293 |         script.run()
294 |         script = paper.create_calclet("write2",
295 | """
296 | from activepapers.contents import open
297 | 
298 | with open('numbers', 'w') as f:
299 |     for i in range(10):
300 |         f.write(str(i)+'\\n')
301 | """)
302 |         script.run()
303 |         paper.close()
304 | 
305 | @raises(ImportError)
306 | def test_import_forbidden():
307 |     # distutils is a forbidden module from the standard library
308 |     with tempdir.TempDir() as t:
309 |         filename = os.path.join(t, "paper.ap")
310 |         paper = ActivePaper(filename, "w")
311 |         script = paper.create_calclet("script",
312 | """
313 | import distutils
314 | """)
315 |         script.run()
316 |         paper.close()
317 | 
318 | def test_snapshots():
319 |     with tempdir.TempDir() as t:
320 |         filename = os.path.join(t, "paper.ap")
321 |         snapshot_1 = os.path.join(t, "snapshot_1.ap")
322 |         snapshot_2 = os.path.join(t, "snapshot_2.ap")
323 |         paper = ActivePaper(filename, 'w')
324 |         paper.data.create_dataset("frequency", data = 0.2)
325 |         paper.data.create_dataset("time", data=0.1*np.arange(100))
326 |         calc_angular = paper.create_calclet("calc_angular",
327 | """
328 | from activepapers.contents import data, snapshot
329 | import numpy as np
330 | 
331 | frequency = data['frequency'][...]
332 | time = data['time'][...]
333 | angular = data.create_group('angular')
334 | angular.attrs['time'] = data['time'].ref
335 | angular.create_dataset("time", data=data['time'].ref)
336 | angular.create_dataset("sine", data=np.sin(2.*np.pi*frequency*time))
337 | snapshot('%s')
338 | angular.create_dataset("cosine", data=np.cos(2.*np.pi*frequency*time))
339 | snapshot('%s')
340 | angular.create_dataset("tangent", data=np.tan(2.*np.pi*frequency*time))
341 | """ % (snapshot_1, snapshot_2))
342 |         calc_angular.run()
343 |         paper.close()
344 |         # Open the snapshot files to verify they are valid ActivePapers
345 |         ActivePaper(snapshot_1, 'r').close()
346 |         ActivePaper(snapshot_2, 'r').close()
347 |         # Check the contents
348 |         paper = h5py.File(filename)
349 |         snapshot_1 = h5py.File(snapshot_1)
350 |         snapshot_2 = h5py.File(snapshot_2)
351 |         for item in ['/data/time', '/data/frequency', '/data/angular/sine',
352 |                      '/code/calc_angular']:
353 |             assert item in paper
354 |             assert item in snapshot_1
355 |             assert item in snapshot_2
356 |         assert '/data/angular/cosine' in paper
357 |         assert '/data/angular/cosine' not in snapshot_1
358 |         assert '/data/angular/cosine' in snapshot_2
359 |         assert '/data/angular/tangent' in paper
360 |         assert '/data/angular/tangent' not in snapshot_1
361 |         assert '/data/angular/tangent' not in snapshot_2
362 |         for root in [snapshot_1, snapshot_2]:
363 |             #time_ref = root['/data/angular/time'][()]
364 |             #assert root[time_ref].name == '/data/time'
365 |             time_ref = root['/data/angular'].attrs['time']
366 |             assert root[time_ref].name == '/data/time'
367 | 
368 | def test_modified_scripts():
369 |     with tempdir.TempDir() as t:
370 |         filename = os.path.join(t, "paper.ap")
371 |         paper = ActivePaper(filename, 'w')
372 |         script = paper.create_calclet("script",
373 | """
374 | from activepapers.contents import data
375 | data.create_dataset('foo', data=42)
376 | group = data.create_group('group1')
377 | group.mark_as_data_item()
378 | group['value'] = 1
379 | group = data.create_group('group2')
380 | group['value'] = 2
381 | """)
382 |         script.run()
383 |         items = sorted([item.name for item in paper.iter_items()])
384 |         assert items == ['/code/script', '/data/foo',
385 |                          '/data/group1', '/data/group2/value']
386 |         assert (paper.data['foo'][...] == 42)
387 |         assert (paper.data['group1/value'][...] == 1)
388 |         assert (paper.data['group2/value'][...] == 2)
389 |         script = paper.create_calclet("script",
390 | """
391 | from activepapers.contents import data
392 | data.create_dataset('foo', data=1)
393 | """)
394 |         script.run()
395 |         items = sorted([item.name for item in paper.iter_items()])
396 |         assert items == ['/code/script', '/data/foo']
397 |         assert (paper.data['foo'][...] == 1)
398 |         paper.close()
399 | 
400 | def test_dummy_datasets():
401 |     with tempdir.TempDir() as t:
402 |         filename = os.path.join(t, "paper.ap")
403 |         paper = ActivePaper(filename, 'w')
404 |         paper.data.create_dataset("frequency", data = 0.2)
405 |         paper.data.create_dataset("time", data=0.1*np.arange(100))
406 |         calc_angular = paper.create_calclet("calc_angular",
407 | """
408 | from activepapers.contents import data, snapshot
409 | import numpy as np
410 | 
411 | frequency = data['frequency'][...]
412 | time = data['time'][...]
413 | angular = data.create_group('angular')
414 | angular.attrs['time'] = data['time'].ref
415 | angular.create_dataset("time", data=data['time'].ref)
416 | angular.create_dataset("sine", data=np.sin(2.*np.pi*frequency*time))
417 | """)
418 |         calc_angular.run()
419 |         paper.replace_by_dummy('/data/angular/sine')
420 |         dummy = paper.data_group['angular/sine']
421 |         assert dummy.attrs.get('ACTIVE_PAPER_GENERATING_CODELET') \
422 |             ==  '/code/calc_angular'
423 |         assert dummy.attrs.get('ACTIVE_PAPER_DUMMY_DATASET', False)
424 |         passed = True
425 |         try:
426 |             paper.replace_by_dummy('/data/time')
427 |         except AssertionError:
428 |             passed = False
429 |         assert not passed
430 |         paper.close()
431 | 


--------------------------------------------------------------------------------
/tests/test_library.py:
--------------------------------------------------------------------------------
 1 | # Test file downloads
 2 | 
 3 | import os
 4 | import tempdir
 5 | 
 6 | from activepapers.storage import ActivePaper
 7 | from activepapers import library
 8 | from activepapers.utility import ascii
 9 | 
10 | if "NO_NETWORK_ACCESS" not in os.environ:
11 |     def test_figshare_download():
12 |         with tempdir.TempDir() as t:
13 |             library.library = [t]
14 |             local_name = library.find_in_library("doi:10.6084/m9.figshare.692144")
15 |             assert local_name == os.path.join(t, "10.6084/m9.figshare.692144.ap")
16 |             paper = ActivePaper(local_name)
17 |             assert ascii(paper.code_group['python-packages/immutable/__init__'].attrs['ACTIVE_PAPER_DATATYPE']) == 'module'
18 |             paper.close()
19 | 
20 | if "NO_NETWORK_ACCESS" not in os.environ:
21 |     def test_zenodo_download():
22 |         with tempdir.TempDir() as t:
23 |             library.library = [t]
24 |             local_name = library.find_in_library("doi:10.5281/zenodo.7648")
25 |             assert local_name == os.path.join(t, "10.5281/zenodo.7648.ap")
26 |             paper = ActivePaper(local_name)
27 |             assert ascii(paper.code_group['python-packages/mosaic/__init__'].attrs['ACTIVE_PAPER_DATATYPE']) == 'module'
28 |             paper.close()
29 | 


--------------------------------------------------------------------------------
/tests/test_python_modules.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import tempdir
  4 | from nose.tools import raises
  5 | from activepapers.storage import ActivePaper
  6 | from activepapers.utility import isstring
  7 | 
  8 | def make_paper(filename):
  9 |     paper = ActivePaper(filename, "w")
 10 |     paper.import_module('foo')
 11 |     paper.import_module('foo.bar')
 12 |     script = paper.create_calclet("test",
 13 | """
 14 | from activepapers.contents import data
 15 | import foo
 16 | from foo.bar import frobnicate
 17 | data['result'] = frobnicate(2)
 18 | assert frobnicate(foo.__version__) == '42'
 19 | """)
 20 |     script.run()
 21 |     paper.close()
 22 | 
 23 | def assert_is_python_module(node):
 24 |     assert node.attrs.get('ACTIVE_PAPER_DATATYPE', None) == 'module'
 25 |     assert node.attrs.get('ACTIVE_PAPER_LANGUAGE', None) == 'python'
 26 |     
 27 | def check_paper(filename):
 28 |     paper = ActivePaper(filename, "r")
 29 |     items = sorted([item.name for item in paper.iter_items()])
 30 |     assert items == ["/code/python-packages/foo/__init__",
 31 |                      "/code/python-packages/foo/bar",
 32 |                      "/code/test",
 33 |                      "/data/result"]
 34 |     deps = [sorted(item.name for item in level)
 35 |             for level in paper.dependency_hierarchy()]
 36 |     assert deps == [['/code/python-packages/foo/__init__',
 37 |                      '/code/python-packages/foo/bar',
 38 |                      '/code/test'],
 39 |                     ['/data/result']]
 40 |     for path in ['foo/__init__', 'foo/bar']:
 41 |         node = paper.code_group['python-packages'][path]
 42 |         assert_is_python_module(node)
 43 |     paper.close()
 44 | 
 45 | def test_simple_paper():
 46 |     with tempdir.TempDir() as t:
 47 |         filename1 = os.path.join(t, "paper1.ap")
 48 |         filename2 = os.path.join(t, "paper2.ap")
 49 |         make_paper(filename1)
 50 |         check_paper(filename1)
 51 |         with ActivePaper(filename1, "r") as paper:
 52 |             paper.rebuild(filename2)
 53 |         check_paper(filename2)
 54 | 
 55 | def make_paper_with_module(filename, value):
 56 |     paper = ActivePaper(filename, "w")
 57 |     paper.add_module("some_values",
 58 | """
 59 | a_value = %d
 60 | """ % value)
 61 |     script = paper.create_calclet("test",
 62 | """
 63 | from activepapers.contents import data
 64 | from some_values import a_value
 65 | data['a_value'] = a_value
 66 | """)
 67 |     script.run()
 68 |     paper.close()
 69 | 
 70 | def check_paper_with_module(filename, value):
 71 |     paper = ActivePaper(filename, "r")
 72 |     assert paper.data['a_value'][...] == value
 73 |     paper.close()
 74 | 
 75 | def test_module_paper():
 76 |     with tempdir.TempDir() as t:
 77 |         filename1 = os.path.join(t, "paper1.ap")
 78 |         filename2 = os.path.join(t, "paper2.ap")
 79 |         make_paper_with_module(filename1, 42)
 80 |         check_paper_with_module(filename1, 42)
 81 |         make_paper_with_module(filename2, 0)
 82 |         check_paper_with_module(filename2, 0)
 83 | 
 84 | @raises(ValueError)
 85 | def test_import_math():
 86 |     # math is an extension module, so this should fail
 87 |     with tempdir.TempDir() as t:
 88 |         filename = os.path.join(t, "paper.ap")
 89 |         paper = ActivePaper(filename, "w")
 90 |         paper.import_module('math')
 91 |         paper.close()
 92 | 
 93 | @raises(ImportError)
 94 | def test_import_ctypes():
 95 |     # ctypes is not in the "allowed module" list, so this should fail
 96 |     with tempdir.TempDir() as t:
 97 |         filename = os.path.join(t, "paper.ap")
 98 |         paper = ActivePaper(filename, "w")
 99 |         script = paper.create_calclet("test",
100 | """
101 | import ctypes
102 | """)
103 |         script.run()
104 |         paper.close()
105 | 


--------------------------------------------------------------------------------
/tests/test_references.py:
--------------------------------------------------------------------------------
  1 | # Test the use of references
  2 | 
  3 | import os
  4 | 
  5 | import numpy as np
  6 | import h5py
  7 | import tempdir
  8 | 
  9 | from activepapers.storage import ActivePaper
 10 | from activepapers.utility import ascii
 11 | from activepapers import library
 12 | 
 13 | def make_simple_paper(filename):
 14 | 
 15 |     paper = ActivePaper(filename, "w")
 16 | 
 17 |     paper.data.create_dataset("frequency", data=0.2)
 18 |     paper.data.create_dataset("time", data=0.1*np.arange(100))
 19 | 
 20 |     calc_sine = paper.create_calclet("calc_sine",
 21 | """
 22 | from activepapers.contents import data
 23 | import numpy as np
 24 | 
 25 | frequency = data['frequency'][...]
 26 | time = data['time'][...]
 27 | data.create_dataset("sine", data=np.sin(2.*np.pi*frequency*time))
 28 | """)
 29 |     calc_sine.run()
 30 | 
 31 |     paper.close()
 32 | 
 33 | 
 34 | def make_library_paper(filename):
 35 | 
 36 |     paper = ActivePaper(filename, "w")
 37 | 
 38 |     paper.add_module("my_math",
 39 | """
 40 | import numpy as np
 41 | 
 42 | def my_func(x):
 43 |     return np.sin(x)
 44 | """)
 45 | 
 46 |     paper.close()
 47 | 
 48 | 
 49 | def make_simple_paper_with_data_refs(filename, paper_ref):
 50 | 
 51 |     paper = ActivePaper(filename, "w")
 52 | 
 53 |     paper.create_data_ref("frequency", paper_ref)
 54 |     paper.create_data_ref("time_from_ref", paper_ref, "time")
 55 | 
 56 |     calc_sine = paper.create_calclet("calc_sine",
 57 | """
 58 | from activepapers.contents import data
 59 | import numpy as np
 60 | 
 61 | frequency = data['frequency'][...]
 62 | time = data['time_from_ref'][...]
 63 | data.create_dataset("sine", data=np.sin(2.*np.pi*frequency*time))
 64 | """)
 65 |     calc_sine.run()
 66 | 
 67 |     paper.close()
 68 | 
 69 | 
 70 | def make_simple_paper_with_data_and_code_refs(filename, paper_ref):
 71 | 
 72 |     paper = ActivePaper(filename, "w")
 73 | 
 74 |     paper.create_data_ref("frequency", paper_ref)
 75 |     paper.create_data_ref("time", paper_ref)
 76 | 
 77 |     paper.create_code_ref("calc_sine", paper_ref)
 78 |     paper.run_codelet('calc_sine')
 79 | 
 80 |     paper.close()
 81 | 
 82 | 
 83 | def make_simple_paper_with_library_refs(filename, paper_ref):
 84 | 
 85 |     paper = ActivePaper(filename, "w")
 86 | 
 87 |     paper.data.create_dataset("frequency", data = 0.2)
 88 |     paper.data.create_dataset("time", data=0.1*np.arange(100))
 89 | 
 90 |     paper.create_module_ref("my_math", paper_ref)
 91 | 
 92 |     calc_sine = paper.create_calclet("calc_sine",
 93 |     """
 94 | from activepapers.contents import data
 95 | import numpy as np
 96 | from my_math import my_func
 97 | 
 98 | frequency = data['frequency'][...]
 99 | time = data['time'][...]
100 | data.create_dataset("sine", data=my_func(2.*np.pi*frequency*time))
101 |     """)
102 |     calc_sine.run()
103 | 
104 |     paper.close()
105 | 
106 | 
107 | def make_simple_paper_with_copies(filename, paper_ref):
108 | 
109 |     paper = ActivePaper(filename, "w")
110 | 
111 |     paper.create_copy("/data/frequency", paper_ref)
112 |     paper.create_copy("/data/time", paper_ref)
113 | 
114 |     paper.create_copy("/code/calc_sine", paper_ref)
115 |     paper.run_codelet('calc_sine')
116 | 
117 |     paper.close()
118 | 
119 | 
120 | def assert_almost_equal(x, y, tolerance):
121 |     assert (np.fabs(np.array(x)-np.array(y)) < tolerance).all()
122 | 
123 | 
124 | def check_paper_with_refs(filename, with_name_postfix, refs, additional_items):
125 |     time_ds_name = '/data/time_from_ref' if with_name_postfix else '/data/time'
126 |     paper = ActivePaper(filename, "r")
127 |     items = sorted([item.name for item in paper.iter_items()])
128 |     assert items == sorted(['/code/calc_sine', '/data/frequency',
129 |                             '/data/sine', time_ds_name] + additional_items)
130 |     for item_name in refs:
131 |         assert paper.data_group[item_name].attrs['ACTIVE_PAPER_DATATYPE'] \
132 |                     == 'reference'
133 |     assert_almost_equal(paper.data["sine"][...],
134 |                         np.sin(0.04*np.pi*np.arange(100)),
135 |                         1.e-10)
136 |     paper.close()
137 | 
138 | def test_simple_paper_with_data_refs():
139 |     with tempdir.TempDir() as t:
140 |         library.library = [t]
141 |         os.mkdir(os.path.join(t, "local"))
142 |         filename1 = os.path.join(t, "local/simple1.ap")
143 |         filename2 = os.path.join(t, "simple2.ap")
144 |         make_simple_paper(filename1)
145 |         make_simple_paper_with_data_refs(filename2, "local:simple1")
146 |         check_paper_with_refs(filename2, True,
147 |                               ['/data/frequency', '/data/time_from_ref'],
148 |                               [])
149 | 
150 | def test_simple_paper_with_data_and_code_refs():
151 |     with tempdir.TempDir() as t:
152 |         library.library = [t]
153 |         os.mkdir(os.path.join(t, "local"))
154 |         filename1 = os.path.join(t, "local/simple1.ap")
155 |         filename2 = os.path.join(t, "simple2.ap")
156 |         make_simple_paper(filename1)
157 |         make_simple_paper_with_data_and_code_refs(filename2, "local:simple1")
158 |         check_paper_with_refs(filename2, False,
159 |                               ['/data/frequency', '/data/time',
160 |                                '/code/calc_sine'],
161 |                               [])
162 | 
163 | def test_simple_paper_with_library_refs():
164 |     with tempdir.TempDir() as t:
165 |         library.library = [t]
166 |         os.mkdir(os.path.join(t, "local"))
167 |         filename1 = os.path.join(t, "local/library.ap")
168 |         filename2 = os.path.join(t, "simple.ap")
169 |         make_library_paper(filename1)
170 |         make_simple_paper_with_library_refs(filename2, "local:library")
171 |         check_paper_with_refs(filename2, False,
172 |                               ['/code/python-packages/my_math'],
173 |                               ['/code/python-packages/my_math'])
174 | 
175 | 
176 | def test_copy():
177 |     with tempdir.TempDir() as t:
178 |         library.library = [t]
179 |         os.mkdir(os.path.join(t, "local"))
180 |         filename1 = os.path.join(t, "local/simple1.ap")
181 |         filename2 = os.path.join(t, "simple2.ap")
182 |         make_simple_paper(filename1)
183 |         make_simple_paper_with_copies(filename2, "local:simple1")
184 |         check_paper_with_refs(filename2, False, [], [])
185 |         paper = ActivePaper(filename2, 'r')
186 |         for path in ['/code/calc_sine', '/data/frequency', '/data/time']:
187 |             item = paper.file[path]
188 |             source = item.attrs.get('ACTIVE_PAPER_COPIED_FROM')
189 |             assert source is not None
190 |             paper_ref, ref_path = source
191 |             if h5py.version.version_tuple[:2] <= (2, 2):
192 |                 paper_ref = paper_ref.flat[0]
193 |                 ref_path = ref_path.flat[0]
194 |             assert ascii(paper_ref) == "local:simple1"
195 |             assert ascii(ref_path) == path
196 | 
197 | 


--------------------------------------------------------------------------------