├── TAR_EXCLUDELIST
├── testing
    ├── __init__.py
    ├── rserve-test.conf
    ├── test-script.R
    ├── binCodes.py
    ├── testtools.py
    ├── test_taggedContainers.py
    ├── conftest.py
    └── test_rparser.py
├── requirements.txt
├── pyRserve
    ├── version.txt
    ├── binary_closure.txt
    ├── __init__.py
    ├── rexceptions.py
    ├── misc.py
    ├── taggedContainers.py
    ├── rtypes.py
    ├── rserializer.py
    ├── rconn.py
    └── rparser.py
├── doc
    ├── intro.rst
    ├── index.rst
    ├── license.rst
    ├── Makefile
    ├── make.bat
    ├── installation.rst
    ├── changelog.rst
    ├── conf.py
    └── manual.rst
├── requirements_dev.txt
├── .gitignore
├── CREDITS
├── .pre-commit-config.yaml
├── setup.cfg
├── TODO
├── .travis.yml
├── .github
    └── workflows
    │   └── build-and-test.yml
├── INSTALL
├── Makefile
├── LICENSE
├── dockerfiles
    └── R-4.2.2-Rserve-1.8.12-alpine
    │   └── Dockerfile
├── setup.py
└── README.rst


/TAR_EXCLUDELIST:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/testing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | 


--------------------------------------------------------------------------------
/pyRserve/version.txt:
--------------------------------------------------------------------------------
1 | 1.0.3
2 | 


--------------------------------------------------------------------------------
/doc/intro.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 


--------------------------------------------------------------------------------
/testing/rserve-test.conf:
--------------------------------------------------------------------------------
1 | oob enable
2 | eval library(Rserve)
3 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | flake8
3 | coverage
4 | sphinx
5 | pre-commit
6 | wheel
7 | twine
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | MANIFEST
 3 | dist
 4 | doc/doctrees
 5 | doc/html
 6 | doc/pyRserve.html.zip
 7 | *~
 8 | __pycache__
 9 | .idea
10 | 


--------------------------------------------------------------------------------
/CREDITS:
--------------------------------------------------------------------------------
1 | Credits:
2 | --------
3 | 
4 | Simon Urbanek, the author of Rserve (http://www.rforge.net/Rserve/),
5 | has been the main source of support to get the binary data conversion
6 | between Rserve and Python working properly.
7 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
3 |     rev: v2.3.0
4 |     hooks:
5 |     -   id: end-of-file-fixer
6 |     -   id: trailing-whitespace
7 |     -   id: flake8
8 |         args: [--max-line-length=88]
9 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bdist_wheel]
 2 | universal=1
 3 | 
 4 | [flake8]
 5 | max_line_length = 80
 6 | per-file-ignores =
 7 |     pyRserve/__init__.py: F401
 8 |     # Much improved readability if we allow for extra space character in
 9 |     # various places for code formatting. So disable some errors for flake8:
10 |     pyRserve/rtypes.py: E114,E116,E221,E222
11 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | 
 2 | Important TODOs:
 3 | ================
 4 | - Support for endianess when loading arrays directly with 'fromstring()' or
 5 |   writing them with 'tostring()'.
 6 | 
 7 |   Affected code:
 8 |     - rserializer.s_xt_array_double()
 9 |     - rparser.xt_array_numeric()
10 | 
11 | 
12 | - The rserializer fails on objects like TaggedList([("n","Fred"), 2.0, ("c_ages", 5.5)])
13 |   where the 2nd item has no tag name.
14 | 


--------------------------------------------------------------------------------
/pyRserve/binary_closure.txt:
--------------------------------------------------------------------------------
 1 | \x01\x00\x01\x00\x4c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00
 2 | 
 3 | \x0a\x48\x00\x00
 4 | 
 5 | Closure: \x12\x44\x00\x00
 6 |     ListTag: \x15\x10\x00\x00
 7 |        Symname:   \x13\x04\x00\x00\x00\x00\x00\x00
 8 |        Symname:   \x13\x04\x00\x00\x65\x6e\x76\x00
 9 |     LangNoTag: \x16\x2c\x00\x00
10 |        \x13\x0c\x00\x00\x2e\x49\x6e\x74\x65\x72\x6e\x61\x6c\x00\x00\x00\x16\x18\x00\x00\x13\x0c\x00\x00\x70\x61\x72\x65\x6e\x74\x2e\x65\x6e\x76\x00\x00\x13\x04\x00\x00\x65\x6e\x76\x00
11 | 


--------------------------------------------------------------------------------
/testing/test-script.R:
--------------------------------------------------------------------------------
 1 | # Test file for Ralph with plot returned as raw file
 2 | #
 3 | # Author: yanabr
 4 | ###############################################################################
 5 | 
 6 | rm(list=ls())
 7 | graphics.off()
 8 | 
 9 | pid <- Sys.getpid()
10 | 
11 | ## some dummy data
12 | x <- sort(rnorm(100))
13 | y <- 2*x+rnorm(100,0,0.5)
14 | 
15 | ## model
16 | model <- lm(y~x)
17 | 
18 | filename <- paste('plot_',pid,'.png',sep="")
19 | png(width=480, height=480, file=filename)
20 | plot(x,y)
21 | abline(coef(model),col=2,lty=2)
22 | dev.off()
23 | 
24 | im <- readBin(filename,"raw", 999999)
25 | 
26 | result_vector <- list(x,y,coef(model),im)
27 | 


--------------------------------------------------------------------------------
/pyRserve/__init__.py:
--------------------------------------------------------------------------------
 1 | """pyRserve package"""
 2 | import os
 3 | import sys
 4 | import warnings
 5 | 
 6 | from .rconn import connect
 7 | from .taggedContainers import TaggedList, TaggedArray, AttrArray
 8 | 
 9 | # Show all deprecated warning only once:
10 | warnings.filterwarnings('once', category=DeprecationWarning)
11 | 
12 | if sys.version_info.major == 2:
13 |     warnings.warn(
14 |         'Python 2 is deprecated, it will no longer be supported in pyRserve 1.1',
15 |         DeprecationWarning
16 |     )
17 | del warnings
18 | 
19 | __version__ = open(os.path.join(os.path.dirname(__file__),
20 |                                 'version.txt')).readline().strip()
21 | 


--------------------------------------------------------------------------------
/pyRserve/rexceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Exception classes for pyRserve
 3 | """
 4 | 
 5 | 
 6 | class PyRserveError(Exception):
 7 |     pass
 8 | 
 9 | 
10 | class REvalError(PyRserveError):
11 |     """Indicates an error raised by R itself (not by Rserve)"""
12 |     pass
13 | 
14 | 
15 | class RConnectionRefused(PyRserveError):
16 |     pass
17 | 
18 | 
19 | class RResponseError(PyRserveError):
20 |     pass
21 | 
22 | 
23 | class RSerializationError(PyRserveError):
24 |     pass
25 | 
26 | 
27 | class PyRserveClosed(PyRserveError):
28 |     pass
29 | 
30 | 
31 | class EndOfDataError(PyRserveError):
32 |     pass
33 | 
34 | 
35 | class RParserError(PyRserveError):
36 |     pass
37 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |     - "2.7"
 4 |     - "3.6"
 5 |     - "3.7"
 6 |     - "3.8"
 7 |     - "3.9"
 8 | before_install:
 9 |     - "sudo apt-get update -qq"
10 |     - "sudo apt-get install -y gfortran"
11 |     - "curl -O https://cran.r-project.org/src/base/R-4/R-4.1.0.tar.gz"
12 |     - "tar -xzf R-4.1.0.tar.gz"
13 |     - "(cd R-4.1.0; ./configure --with-x=no --prefix=/usr --enable-R-shlib && make && sudo make install)"
14 |     - "curl -O http://www.rforge.net/Rserve/snapshot/Rserve_1.8-8.tar.gz"
15 |     - "sudo PKG_CPPFLAGS=-DNODAEMON  R CMD INSTALL Rserve_1.8-8.tar.gz"
16 | install: "pip install -r requirements.txt"
17 | script:
18 |     - "py.test testing"
19 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. pyRserve documentation master file
 2 | 
 3 | Welcome to pyRserve's documentation!
 4 | ====================================
 5 | 
 6 | pyRserve is a library for connecting Python to an R process running under Rserve.
 7 | Through such a connection variables can be get and set in R from Python,
 8 | and also R-functions can be called remotely.
 9 | 
10 | This documentation applies to pyRserve release V |release|
11 | 
12 | Contents:
13 | 
14 | .. toctree::
15 |    :maxdepth: 3
16 | 
17 |    intro
18 |    installation
19 |    manual
20 |    changelog
21 |    license
22 | 
23 | Indices and tables
24 | ==================
25 | 
26 | * :ref:`genindex`
27 | * :ref:`modindex`
28 | * :ref:`search`
29 | 


--------------------------------------------------------------------------------
/.github/workflows/build-and-test.yml:
--------------------------------------------------------------------------------
 1 | name: build-and-test
 2 | on: [push]
 3 | jobs:
 4 |   build-and-test:
 5 |     runs-on: ubuntu-latest
 6 |     strategy:
 7 |       matrix:
 8 |         python-version: ["3.9", "3.10", "3.11"]
 9 | 
10 |     services:
11 |       service-rserve:
12 |         image: ghcr.io/ralhei/r4.2.2-rserve1.8.12:latest
13 |         ports:
14 |           - 6311:6311
15 |         options: --tty
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v3
19 |       - name: Set up Python ${{ matrix.python-version }}
20 |         uses: actions/setup-python@v4
21 |         with:
22 |           python-version: ${{ matrix.python-version }}
23 |           cache: 'pip' # caching pip dependencies
24 |       - name: Install dependencies
25 |         run: pip install -r requirements.txt -r requirements_dev.txt
26 |       - name: Run pytest
27 |         run: pytest testing
28 | 


--------------------------------------------------------------------------------
/testing/binCodes.py:
--------------------------------------------------------------------------------
 1 | # This file is just for development purposes
 2 | # It demonstrates how binary commands are composed for various purposes
 3 | # flake8: noqa
 4 | 
 5 | # A bunch of binary commands:
 6 | 
 7 | # Make an evaluation call to Rserv, giving a simple string with a number:
 8 | #    CMD_EVAL     MSG_SIZE       2nd part of header   DT_STRING+len   data
 9 | c1 = '\3\0\0\0' + '\x08\0\0\0' + '\0\0\0\0\0\0\0\0' + '\4\4\0\0'    + '1\0\0\0'
10 | # -> evaluates to: numpy.array([1.0])
11 | 
12 | 
13 | # Make a CMD_setSEXP call to Rserve, providing a variable name and a simple expression (array):
14 | #     CMD_setSEXP   MSG_SIZE        2nd part of header   DT_STRING+len   str-data
15 | c2 = '\x20\0\0\0' + '\x18\0\0\0' + '\0\0\0\0\0\0\0\0' + '\4\4\0\0'    + 'abc\0' + \
16 |      '\0a\x0c\x00\x00\x20\x08\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00'  # <-  array expression
17 | 
18 | # define a function in R
19 | # myfunc <- function(y1, y2) { tst <- y1 + y2; tst }
20 | 


--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
 1 | 
 2 | Install R:
 3 | ==========
 4 | Install some system packages first, e.g. on RHEL:
 5 |     yum install xz-devel pcre pcre-devel libcurl-devel *gfortran* zlib* bzip2-* png-devel jpeg-devel
 6 | 
 7 | Compilation of R:
 8 | 1. curl -LO https://cran.r-project.org/src/base/R-4/R-4.2.2.tar.gz
 9 | 2. tar -xf R-4.2.2.tar.gz
10 | 3. $ cd R-4.2.2
11 |    $ ./configure --enable-R-shlib --with-x=no
12 |    $ make
13 |    $ make install
14 | 
15 | 
16 | Install Rserve:
17 | ===============
18 | 1. curl -LO http://www.rforge.net/Rserve/snapshot/Rserve_1.8-12.tar.gz
19 | 2. R CMD INSTALL Rserve_1.8.12.tar.gz
20 | 
21 | This way also the debug version will be compiled and installed.
22 | 
23 | To start Rserver type:
24 | 
25 |     $ R CMD Rserve
26 | 
27 | To start Rserve in debug mode type:
28 | 
29 |     $ R CMD Rserve.dbg
30 | 
31 | Now it is ready to be connected from a client.
32 | 
33 | 
34 | Install pyRserve
35 | ================
36 | 
37 | $ pip install pyRserve
38 | 


--------------------------------------------------------------------------------
/testing/testtools.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Some helper functions for unit testing
 3 | """
 4 | from numpy import ndarray, float32, float64, complex64, complex128
 5 | 
 6 | 
 7 | def compareArrays(arr1, arr2):
 8 |     """Compare two (possibly nested) arrays"""
 9 |     def _compareArrays(xarr1, xarr2):
10 |         assert xarr1.shape == xarr2.shape
11 |         for idx in range(len(xarr1)):
12 |             if isinstance(xarr1[idx], ndarray):
13 |                 _compareArrays(xarr1[idx], xarr2[idx])
14 |             else:
15 |                 if type(xarr1[idx]) in [float, float32, float64, complex,
16 |                                         complex64, complex128]:
17 |                     # make a comparison which works for floats and complex
18 |                     # numbers
19 |                     assert abs(xarr1[idx] - xarr2[idx]) < 0.000001
20 |                 else:
21 |                     assert xarr1[idx] == xarr2[idx]
22 |     try:
23 |         _compareArrays(arr1, arr2)
24 |     except TypeError:
25 |         return False
26 |     return True
27 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | DATE    = $(shell date +"%F")
 2 | 
 3 | all:
 4 | 
 5 | docs:
 6 | 	(cd doc; make html)
 7 | 	(cd doc/html; zip -r ../pyRserve.html.zip *.html objects.inv searchindex.js _static/* )
 8 | 	echo
 9 | 	echo "Sphinx documentation has been created in doc/html/index.html"
10 | 
11 | clean:
12 | 	find . -name '*.pyc' -exec rm '{}' \;
13 | 	find . -name '*~'    -exec rm '{}' \;
14 | 	find . -name '*.bak' -exec rm '{}' \;
15 | 	find . -name '*.log' -exec rm '{}' \;
16 | 	find . -name '.coverage' -exec rm '{}' \;
17 | 	rm -rf build dist *.egg-info MANIFEST.in
18 | 
19 | upload-prep: docs
20 | 	rm -f dist/*
21 | 	python setup.py sdist bdist_wheel
22 | 	twine check dist/*
23 | 
24 | upload: upload-prep
25 | 	twine upload dist/*
26 | 
27 | upload-testpypi: upload-prep
28 | 	twine upload -r testpypi dist/*
29 | 
30 | backup: clean _backup
31 | 
32 | _backup:
33 | 	DIR=`pwd`; bDIR=`basename $$DIR`; cd ..; \
34 | 	tar -czf $${bDIR}_$(DATE).tgz -X $$bDIR/TAR_EXCLUDELIST $$bDIR ; \
35 | 	echo "Created backup ../$${bDIR}_$(DATE).tgz"
36 | 
37 | test:
38 | 	pytest testing
39 | 
40 | coverage:
41 | 	rm -f pyRserve/binaryRExpressions.py*
42 | 	coverage run --source pyRserve -m pytest testing && coverage report --show-missing
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is the MIT license  (see: http://www.opensource.org/licenses/mit-license.php)
 2 | 
 3 | Copyright (c) 2009 Ralph Heinkel (rh [at] ralph-heinkel.com)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/doc/license.rst:
--------------------------------------------------------------------------------
 1 | This is the MIT license
 2 | =======================
 3 | (see: http://www.opensource.org/licenses/mit-license.php)
 4 | 
 5 | Copyright (c) 2009, 2010, 2011 Ralph Heinkel (rh [at] ralph-heinkel.com)
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/dockerfiles/R-4.2.2-Rserve-1.8.12-alpine/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM alpine:latest
 2 | 
 3 | ENV LC_ALL en_US.UTF-8
 4 | ENV LANG en_US.UTF-8
 5 | 
 6 | ENV R_VERSION 4.2.2
 7 | ENV R_SOURCE /usr/src
 8 | 
 9 | ENV BUILD_DEPS \
10 |     wget \
11 |     perl \
12 |     tar
13 | 
14 | ENV PERSISTENT_DEPS \
15 |     gcc \
16 |     g++ \
17 |     gfortran \
18 |     icu-dev \
19 |     libjpeg-turbo \
20 |     libpng-dev \
21 |     make \
22 |     openblas-dev \
23 |     pcre2-dev  \
24 |     readline-dev \
25 |     xz-dev \
26 |     zlib-dev \
27 |     bzip2-dev \
28 |     curl-dev
29 | 
30 | RUN apk upgrade --update && \
31 |     apk add --no-cache --virtual .build-deps $BUILD_DEPS && \
32 |     apk add --no-cache --virtual .persistent-deps $PERSISTENT_DEPS
33 | RUN mkdir -p $R_SOURCE && cd $R_SOURCE && \
34 |     wget https://cran.r-project.org/src/base/R-4/R-${R_VERSION}.tar.gz && \
35 |     tar -xf R-${R_VERSION}.tar.gz && \
36 |     cd R-${R_VERSION} && \
37 |     ./configure --prefix=/usr/local --without-x --enable-R-shlib && \
38 |     make && make install && \
39 |     wget https://www.rforge.net/Rserve/snapshot/Rserve_1.8-12.tar.gz && \
40 |     PKG_CPPFLAGS=-DNODAEMON  R CMD INSTALL Rserve_1.8-12.tar.gz && \
41 |     apk del .build-deps && \
42 |     cd / && \
43 |     rm -rf $R_SOURCE
44 | RUN echo -e "remote enable\noob enable\neval library(Rserve)" > /etc/Rserv.conf
45 | 
46 | CMD ["R", "CMD", "Rserve"]
47 | 


--------------------------------------------------------------------------------
/testing/test_taggedContainers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | unittests for classes from taggedContainers
 3 | """
 4 | from pyRserve.taggedContainers import TaggedList
 5 | 
 6 | 
 7 | def test_TaggedList_init_emtpy():
 8 |     t = TaggedList()
 9 |     assert t.astuples() == []
10 |     assert len(t) == 0
11 | 
12 | 
13 | def test_TaggedList_init_one_value():
14 |     t = TaggedList([11])
15 |     assert t.astuples() == [(None, 11)]
16 |     assert len(t) == 1
17 |     assert t[0] == 11
18 | 
19 | 
20 | def test_TaggedList_init_one_value_with_key():
21 |     t = TaggedList([('v1', 11)])
22 |     assert t.astuples() == [('v1', 11)]
23 |     assert len(t) == 1
24 |     assert t[0] == 11
25 |     assert t['v1'] == 11
26 | 
27 | 
28 | def test_TaggedList_init_two_values_second_with_key():
29 |     t = TaggedList([11, ('v2', 22)])
30 |     assert t.astuples() == [(None, 11), ('v2', 22)]
31 |     assert len(t) == 2
32 |     assert t[0] == 11
33 |     assert t[1] == 22
34 |     assert t['v2'] == 22
35 | 
36 | 
37 | def test_TaggedList_append():
38 |     t = TaggedList([11, ('v2', 22)])
39 |     t.append(33)
40 |     assert len(t) == 3
41 |     assert t.values == [11, 22, 33]
42 | 
43 | 
44 | def test_TaggedList_append_with_key():
45 |     t = TaggedList([11, ('v2', 22)])
46 |     t.append(v3=33)
47 |     assert len(t) == 3
48 |     assert t.values == [11, 22, 33]
49 |     assert t['v3'] == 33
50 | 
51 | 
52 | def test_TaggedList_insert():
53 |     t = TaggedList([11, ('v2', 22)])
54 |     t.insert(0, 1)
55 |     assert len(t) == 3
56 |     assert t.values == [1, 11, 22]
57 | 
58 | 
59 | def test_TaggedList_insert_with_key():
60 |     t = TaggedList([11, ('v2', 22)])
61 |     t.insert(0, x=1)
62 |     assert len(t) == 3
63 |     assert t.values == [1, 11, 22]
64 |     assert t[0] == t['x'] == 1
65 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | PACKAGE_NAME = "pyRserve"
 5 | 
 6 | __version__ = open(os.path.join('pyRserve', 'version.txt')).readline().strip()
 7 | requirements = open('requirements.txt').read().splitlines()
 8 | requirements_testing = open('requirements_dev.txt').read().splitlines()
 9 | 
10 | # Get long_description from intro.txt:
11 | here = os.path.dirname(os.path.abspath(__file__))
12 | with open('README.rst') as fp:
13 |     long_description = fp.read()
14 | 
15 | setup(
16 |     name=PACKAGE_NAME,
17 |     version=__version__,
18 |     description='A Python client to remotely access the R statistic package '
19 |                 'via network',
20 |     long_description=long_description,
21 |     long_description_content_type='text/x-rst',
22 |     author='Ralph Heinkel',
23 |     author_email='rh@ralph-heinkel.com',
24 |     url='https://github.com/ralhei/pyRserve',
25 |     project_urls={
26 |         'Documentation': 'https://pyrserve.readthedocs.io/',
27 |         'Changelog': 'https://pyrserve.readthedocs.io/en/latest/changelog.html',
28 |         'PyPI': 'https://pypi.org/project/pyRserve/',
29 |         'Tracker': 'https://github.com/ralhei/pyRserve/issues',
30 |     },
31 |     keywords='R Rserve',
32 |     packages=['pyRserve'],
33 |     include_package_data=True,
34 |     package_data={
35 |         'pyRserve': ['version.txt'],
36 |     },
37 |     data_files=[('.', ['requirements.txt', 'requirements_dev.txt'])],
38 |     python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4',
39 |     install_requires=requirements,
40 |     extras_require={
41 |         'testing': requirements_testing
42 |     },
43 |     license='MIT license',
44 |     platforms=['unix', 'linux', 'cygwin', 'win32'],
45 |     classifiers=[
46 |         'Development Status :: 5 - Production/Stable',
47 |         'Environment :: Console',
48 |         'License :: OSI Approved :: MIT License',
49 |         'Operating System :: POSIX',
50 |         'Operating System :: Microsoft :: Windows',
51 |         'Programming Language :: Python',
52 |         'Programming Language :: Python :: 2',
53 |         'Programming Language :: Python :: 3',
54 |         'Intended Audience :: Developers',
55 |         'Topic :: Software Development :: Libraries',
56 |         'Topic :: System :: Networking',
57 |         'Topic :: Scientific/Engineering :: Information Analysis',
58 |         'Topic :: Scientific/Engineering :: Mathematics',
59 |     ],
60 | )
61 | 


--------------------------------------------------------------------------------
/pyRserve/misc.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | # global variable to indicate whether this is Python3 or not:
 4 | PY3 = sys.version_info[0] == 3
 5 | 
 6 | 
 7 | class FunctionMapper(object):
 8 |     """
 9 |     This class is used in Lexer, Parser, and Serializer to map IDs
10 |     to functions"""
11 |     def __init__(self, adict):
12 |         self.adict = adict
13 | 
14 |     def __call__(self, *args):
15 |         def wrap(func):
16 |             for a in args:
17 |                 self.adict[a] = func
18 |             return func
19 |         return wrap
20 | 
21 | 
22 | def hexString(aString):
23 |     """
24 |     convert a binary string in its hexadecimal representation,
25 |     like '\x00\x01...'
26 |     """
27 |     if PY3:
28 |         # in Py3 iterating over a byte-sequence directly provides the
29 |         # numeric values of the bytes  ...
30 |         return ''.join([r'\x%02x' % c for c in aString])
31 |     else:
32 |         # ... while in Py2 we need to use ord() to convert chars to
33 |         # their numeric values:
34 |         return ''.join([r'\x%02x' % ord(c) for c in aString])
35 | 
36 | 
37 | def byteEncode(aString, encoding='utf-8'):
38 |     # check for __name__ not to get faked by Python2.x!
39 |     if PY3 and type(aString).__name__ != 'bytes':
40 |         return bytes(aString, encoding=encoding)
41 |     else:
42 |         if type(aString).__name__.startswith('unicode'):
43 |             return aString.encode('utf-8')
44 |         else:
45 |             return aString
46 | 
47 | 
48 | def stringEncode(byteData, encoding='utf-8'):
49 |     # check for __name__ not to get faked by Python2.x!
50 |     if PY3 and type(byteData).__name__ == 'bytes':
51 |         if byteData == b'\xff':
52 |             return None
53 |         # got a real bytes object, must be python3 !
54 |         return byteData.decode(encoding=encoding)
55 |     else:
56 |         # in py2.x there is no real byte-data, it is a string already
57 |         return byteData
58 | 
59 | 
60 | def padLen4(aString):
61 |     """
62 |     Calculate how many additional bytes a given string needs to have a length
63 |     of a multiple of 4. A zero-length array is considered a multiple of 4.
64 |     """
65 |     mod = divmod(len(aString), 4)[1]
66 |     return 4-mod if mod else 0
67 | 
68 | 
69 | def string2bytesPad4(aString):
70 |     """
71 |     Return a given string converted into bytes, padded with zeros at the end
72 |     to make its length be a multiple of 4.
73 |     A zero-length string is considered a multiple of 4.
74 |     """
75 |     byteString = byteEncode(aString) + b'\0'
76 |     return byteString + padLen4(byteString) * b'\0'
77 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | PAPER         =
 8 | BUILDDIR      = .
 9 | 
10 | # Internal variables.
11 | PAPEROPT_a4     = -D latex_paper_size=a4
12 | PAPEROPT_letter = -D latex_paper_size=letter
13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
14 | 
15 | .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
16 | 
17 | help:
18 | 	@echo "Please use \`make <target>' where <target> is one of"
19 | 	@echo "  html      to make standalone HTML files"
20 | 	@echo "  dirhtml   to make HTML files named index.html in directories"
21 | 	@echo "  pickle    to make pickle files"
22 | 	@echo "  json      to make JSON files"
23 | 	@echo "  htmlhelp  to make HTML files and a HTML help project"
24 | 	@echo "  qthelp    to make HTML files and a qthelp project"
25 | 	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
26 | 	@echo "  changes   to make an overview of all changed/added/deprecated items"
27 | 	@echo "  linkcheck to check all external links for integrity"
28 | 	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
29 | 
30 | clean:
31 | 	-rm -rf $(BUILDDIR)/*
32 | 
33 | html-zip: html
34 | 	(cd html; zip ../html.zip *.html _static/* )
35 | 
36 | html:
37 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
38 | 	@echo
39 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
40 | 
41 | dirhtml:
42 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
43 | 	@echo
44 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
45 | 
46 | pickle:
47 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
48 | 	@echo
49 | 	@echo "Build finished; now you can process the pickle files."
50 | 
51 | json:
52 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
53 | 	@echo
54 | 	@echo "Build finished; now you can process the JSON files."
55 | 
56 | htmlhelp:
57 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
58 | 	@echo
59 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
60 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
61 | 
62 | qthelp:
63 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
64 | 	@echo
65 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
66 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
67 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pyRserve.qhcp"
68 | 	@echo "To view the help file:"
69 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pyRserve.qhc"
70 | 
71 | latex:
72 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
73 | 	@echo
74 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
75 | 	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
76 | 	      "run these through (pdf)latex."
77 | 
78 | changes:
79 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
80 | 	@echo
81 | 	@echo "The overview file is in $(BUILDDIR)/changes."
82 | 
83 | linkcheck:
84 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
85 | 	@echo
86 | 	@echo "Link check complete; look for any errors in the above output " \
87 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
88 | 
89 | doctest:
90 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
91 | 	@echo "Testing of doctests in the sources finished, look at the " \
92 | 	      "results in $(BUILDDIR)/doctest/output.txt."
93 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | set SPHINXBUILD=sphinx-build
  6 | set BUILDDIR=_build
  7 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
  8 | if NOT "%PAPER%" == "" (
  9 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 10 | )
 11 | 
 12 | if "%1" == "" goto help
 13 | 
 14 | if "%1" == "help" (
 15 | 	:help
 16 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 17 | 	echo.  html      to make standalone HTML files
 18 | 	echo.  dirhtml   to make HTML files named index.html in directories
 19 | 	echo.  pickle    to make pickle files
 20 | 	echo.  json      to make JSON files
 21 | 	echo.  htmlhelp  to make HTML files and a HTML help project
 22 | 	echo.  qthelp    to make HTML files and a qthelp project
 23 | 	echo.  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 24 | 	echo.  changes   to make an overview over all changed/added/deprecated items
 25 | 	echo.  linkcheck to check all external links for integrity
 26 | 	echo.  doctest   to run all doctests embedded in the documentation if enabled
 27 | 	goto end
 28 | )
 29 | 
 30 | if "%1" == "clean" (
 31 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 32 | 	del /q /s %BUILDDIR%\*
 33 | 	goto end
 34 | )
 35 | 
 36 | if "%1" == "html" (
 37 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 38 | 	echo.
 39 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "dirhtml" (
 44 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 45 | 	echo.
 46 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 47 | 	goto end
 48 | )
 49 | 
 50 | if "%1" == "pickle" (
 51 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 52 | 	echo.
 53 | 	echo.Build finished; now you can process the pickle files.
 54 | 	goto end
 55 | )
 56 | 
 57 | if "%1" == "json" (
 58 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 59 | 	echo.
 60 | 	echo.Build finished; now you can process the JSON files.
 61 | 	goto end
 62 | )
 63 | 
 64 | if "%1" == "htmlhelp" (
 65 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 66 | 	echo.
 67 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 68 | .hhp project file in %BUILDDIR%/htmlhelp.
 69 | 	goto end
 70 | )
 71 | 
 72 | if "%1" == "qthelp" (
 73 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 74 | 	echo.
 75 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
 76 | .qhcp project file in %BUILDDIR%/qthelp, like this:
 77 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pyRserve.qhcp
 78 | 	echo.To view the help file:
 79 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pyRserve.ghc
 80 | 	goto end
 81 | )
 82 | 
 83 | if "%1" == "latex" (
 84 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
 85 | 	echo.
 86 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
 87 | 	goto end
 88 | )
 89 | 
 90 | if "%1" == "changes" (
 91 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
 92 | 	echo.
 93 | 	echo.The overview file is in %BUILDDIR%/changes.
 94 | 	goto end
 95 | )
 96 | 
 97 | if "%1" == "linkcheck" (
 98 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
 99 | 	echo.
100 | 	echo.Link check complete; look for any errors in the above output ^
101 | or in %BUILDDIR%/linkcheck/output.txt.
102 | 	goto end
103 | )
104 | 
105 | if "%1" == "doctest" (
106 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
107 | 	echo.
108 | 	echo.Testing of doctests in the sources finished, look at the ^
109 | results in %BUILDDIR%/doctest/output.txt.
110 | 	goto end
111 | )
112 | 
113 | :end
114 | 


--------------------------------------------------------------------------------
/doc/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | Before pyRserve can be used, R and Rserv have to be installed properly.
 5 | Installation instructions for both packages are available on their corresponding
 6 | websites at `<http://www.r-project.org/>`_ and `<http://www.rforge.net/Rserve/>`_
 7 | 
 8 | 
 9 | Installing R from sources
10 | -------------------------
11 | 
12 | For R being able to run Rserve properly it has to be installed with the
13 | ``--enable-R-shlib`` option.
14 | 
15 | The following command show how to do this for the sources. Make sure you have a
16 | fortran compiler installed, otherwise installation will not be possible.
17 | 
18 | .. NOTE::
19 |     You need a couple of LINUX packages and libraries to be installed, like a fortran
20 |     compile and readline/bzip2/... development libraries. On OpenSuse these can be installed
21 |     with ``zypper install -y gcc-fortran readline-devel libbz2-devel xz-devel pcre2-devel libcurl-devel``
22 |     Other Linux distributions provide packages with similar names.
23 | 
24 | On installing R then looks like::
25 | 
26 |   R_VER=4.3.1   # possibly find the latest version, or use the version you require
27 |   curl -LO https://cran.r-project.org/src/base/R-4/R-${R_VER}.tar.gz
28 |   tar -xzf R-${R_VER}.tar.gz
29 |   cd R-${R_VER}
30 |   ./configure --enable-R-shlib -with-x=no
31 |   make
32 |   make install
33 | 
34 | For Windows it might be just enough to install a prebuilt R package. The same
35 | might be true for some Linux distributions, just make sure to install a
36 | version which also contains all headers necessary for compiling Rserve in the
37 | next step.
38 | 
39 | Installing Rserve
40 | ------------------
41 | 
42 | If you have already downloaded the tar file then from your command line run::
43 | 
44 |   curl -LO http://www.rforge.net/Rserve/snapshot/Rserve_1.8-12.tar.gz
45 |   R CMD INSTALL Rserve_1.8-12.tar.gz
46 | 
47 | Older versions of Rserve might also work, the earliest function version however
48 | seems to be 0.6.6.
49 | 
50 | .. NOTE::
51 |    Rserve usually daemonizes itself after starting from the command
52 |    line. If you want to prevent this from happening (e.g. because you would
53 |    like to control Rserve by a process management tool like ``supervisord``
54 |    or want to control Rserve running the unittests with ``pytest --run-rserve``)
55 |    then Rserve has to be install with the special ``-DNODAEMON`` compiler flag::
56 | 
57 |      PKG_CPPFLAGS=-DNODAEMON  R CMD INSTALL Rserve_1.8-12.tar.gz
58 | 
59 | 
60 | Installing pyRserve
61 | -------------------
62 | 
63 | From your unix/windows command line run::
64 | 
65 |   pip install pyRserve
66 | 
67 | If you want to develop or test locally, then also install extra packages for testing::
68 | 
69 |     pip install pyRserve[testing]
70 | 
71 | Currently supported Python versions are 3.6 to 3.11. It might still run on Python 2.7
72 | but this is not supported anymore and will be deprecated in future versions.
73 | 
74 | In the next section you'll find instructions how to use everything together.
75 | 
76 | 
77 | Running unittests
78 | -----------------
79 | After installation is completed - and for those who want to contribute to pyRserve's developement -
80 | unittests can be run straight from the command line. Remember to have pyRserve installed with
81 | the testing dependencies, as described in the previous section.
82 | 
83 | In the current setup pytest is able to automatically fire up an Rserve-process which needs to be available
84 | for the unittests to run against. This is achieved by calling::
85 | 
86 |     $ pytest testing --run-rserve
87 |     =========================== test session starts ===========================
88 |     platform linux -- Python 3.11.3, pytest-7.4.0, pluggy-1.2.0
89 |     rootdir: /home/user/pyRserve
90 |     collected 50 items
91 | 
92 |     testing/test_rparser.py ..........................................                                                [ 84%]
93 |     testing/test_taggedContainers.py ........                                                                         [100%]
94 |     =========================== 50 passed in 4.19s ============================
95 | 
96 | In case you have Rserve already running on localhost, it is sufficient to call ``pytest testing``.
97 | 


--------------------------------------------------------------------------------
/testing/conftest.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Configurations and fixtures for testing pyRserve with pytest.
  3 | """
  4 | import os
  5 | import time
  6 | import shutil
  7 | import socket
  8 | import subprocess
  9 | 
 10 | import pytest
 11 | 
 12 | import pyRserve.rexceptions
 13 | 
 14 | HERE_PATH = os.path.dirname(os.path.realpath(__file__))
 15 | 
 16 | # Use different port from default to avoid clashes with regular Rserve
 17 | # running on same machine:
 18 | EXTRA_RPORT = 6355
 19 | 
 20 | 
 21 | def start_Rserve(port):
 22 |     """Start an Rserve process for unittesting"""
 23 |     # First check that 'R' is in PATH:
 24 |     if not shutil.which('R'):
 25 |         pytest.exit("Cannot start R interpreter, R executable not in PATH", returncode=1)
 26 | 
 27 |     rProc = subprocess.Popen(
 28 |         ['R', 'CMD', 'Rserve', '--no-save', '--RS-conf',
 29 |          os.path.join(HERE_PATH, 'rserve-test.conf'),
 30 |          '--RS-port', str(port)],
 31 |         stdout=open('/dev/null'), stderr=subprocess.PIPE)
 32 |     # wait a moment until Rserve starts listening on EXTRA_RPORT
 33 |     time.sleep(0.6)
 34 |     if rProc.poll():
 35 |         # process has already terminated, so provide its stderr to the user:
 36 |         raise RuntimeError('Rserve has terminated prematurely with the '
 37 |                            'following message:  %s' % rProc.stderr.read())
 38 | 
 39 |     # store original socket timeout and set timeout to new value during startup
 40 |     # of Rserve:
 41 |     defaultTimeout = socket.getdefaulttimeout()
 42 |     socket.setdefaulttimeout(1)
 43 | 
 44 |     rserv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 45 |     cnt = 0
 46 |     # give it a maximum of 10 tries with some sleep in between to wait for
 47 |     # Rserve to come into action!
 48 |     while cnt < 10:
 49 |         try:
 50 |             # open a socket connection to Rserve
 51 |             rserv.connect(('', port))
 52 |         except socket.error:
 53 |             time.sleep(0.3)
 54 |             cnt += 1
 55 |         else:
 56 |             # got a connection! Jump out of the loop
 57 |             break
 58 |     else:
 59 |         # after trying 10 times we still got no connection to Rserv - something
 60 |         # must be wrong.
 61 |         raise RuntimeError('Could not connect to Rserv over the network')
 62 | 
 63 |     # set back original default timeout value:
 64 |     socket.setdefaulttimeout(defaultTimeout)
 65 | 
 66 |     # make a simple test that Rserve really answers correctly by looking at the
 67 |     # first few bytes:
 68 |     hdr = rserv.recv(1024)
 69 |     rserv.close()
 70 |     if not hdr.startswith(b'Rsrv01'):
 71 |         rProc.terminate()
 72 |         raise RuntimeError(
 73 |             'received wrong header information from socket (was: "%s")'
 74 |             % str(hdr[:10])
 75 |         )
 76 |     return rProc
 77 | 
 78 | 
 79 | def pytest_addoption(parser):
 80 |     """Let the developer control whether or not to start extra Rserve process."""
 81 |     parser.addoption(
 82 |         "--run-rserve", action="store_true", default=False,
 83 |         help="Run separate Rserve process for unit testing on port %d" % EXTRA_RPORT
 84 |     )
 85 | 
 86 | 
 87 | @pytest.fixture(scope="session")
 88 | def run_rserve(request):
 89 |     """Fixture providing given command line option."""
 90 |     return request.config.getoption("--run-rserve")
 91 | 
 92 | 
 93 | @pytest.fixture(scope="module")
 94 | def conn(run_rserve):
 95 |     """Fixture providing a connection to a newly started Rserve process."""
 96 |     if run_rserve:
 97 |         # Fire up separate Rserve process:
 98 |         port = EXTRA_RPORT
 99 |         r_proc = start_Rserve(port)
100 |     else:
101 |         port = pyRserve.rconn.RSERVEPORT
102 |         r_proc = None
103 | 
104 |     try:
105 |         conn = pyRserve.connect(port=port)
106 |     except pyRserve.rexceptions.RConnectionRefused:
107 |         try:
108 |             r_proc and r_proc.terminate()
109 |         except subprocess.SubprocessError:
110 |             pass
111 |         pytest.exit('Error: Cannot reach running Rserve process.\nEither start'
112 |                     'one manually or run pytest with option --run-rserve',
113 |                     returncode=1)
114 |         raise
115 | 
116 |     # Create an 'ident' function in R which just returns its argument.
117 |     # Needed for testing below.
118 |     conn.r('ident <- function(v) { v }')
119 | 
120 |     yield conn
121 | 
122 |     conn.close()
123 |     r_proc and r_proc.terminate()
124 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Overview
  2 | =========
  3 | 
  4 | What pyRserve does
  5 | ------------------
  6 | 
  7 | pyRserve is a library for connecting Python to `R  <http://www.r-project.org/>`_
  8 | (an excellent statistic package). Running `Rserve <http://www.rforge.net/Rserve/>`_
  9 | in R attaches the R-interpreter to a network socket, waiting for pyRserve to connect to it.
 10 | Through such a connection, variables can be get and set in R from Python,
 11 | and also R-functions can be called remotely.
 12 | 
 13 | In contrast to `rpy or rpy2 <https://rpy2.github.io/>`_ the R process does not have to
 14 | run on the same machine, it can run on a remote machine and all variable  access and
 15 | function calls will be delegated there through the network.
 16 | 
 17 | Furthermore - and this makes everything feel very pythonic - all data structures will
 18 | automatically be converted from native R to native Python and numpy types and back.
 19 | 
 20 | 
 21 | Supported platforms
 22 | ----------------------------
 23 | 
 24 | This package has been mainly developed under Linux, and hence should run on all standard unix
 25 | platforms, as well as on MacOS. pyRserve has also been successfully used on Windows machines.
 26 | Unittests have been used on the Linux and MacOS side, however they might just work fine for Windows.
 27 | 
 28 | It has been tested to work with Python 2.7.x, 3.6 to 3.9.
 29 | 
 30 | The latest development has been tested with some previous and current versions of R and Rserve.
 31 | 
 32 | License
 33 | -------
 34 | pyRserve has been written by Ralph Heinkel `(ralph-heinkel.com) <https://ralph-heinkel.com/>`_ and is
 35 | released under `MIT license <https://github.com/ralhei/pyRserve/blob/master/LICENSE>`_.
 36 | 
 37 | 
 38 | Quick installation
 39 | -------------------
 40 | From your unix/macOS,windows command line run::
 41 | 
 42 |     pip install pyRserve
 43 | 
 44 | For a fully functional setup also R and Rserve have to be installed. See section
 45 | `installation <https://pyrserve.readthedocs.io/en/latest/installation.html>`_ in the pyRserve
 46 | documentation for instructions.
 47 | 
 48 | 
 49 | Quick usage
 50 | ------------
 51 | Open a **first shell** and start up the R server, by calling the module `Rserve` that provides
 52 | the actual network connectivity for R::
 53 | 
 54 |     $ R CMD Rserve
 55 | 
 56 | R (Rserve) will now listen on port 6311 (on localhost). Of course Rserve can be configured to
 57 | listen on an exposed port and hence will be accessible from remote hosts as well.
 58 | 
 59 | Open a **second shell**, start Python, import pyRserve, and initialize the connection to Rserve::
 60 | 
 61 |     $ python
 62 |     >>> import pyRserve
 63 |     >>> conn = pyRserve.connect()
 64 | 
 65 | The default connection will be done on ``localhost:6311``. Other hosts can be reached by
 66 | calling ``pyRserve.connect(host=..., port=...)`` as well.
 67 | 
 68 | 
 69 | The ``conn`` object provides a namespace called ``conn.r`` that directly maps all variables
 70 | and other global symbols (like functions etc) and hence makes them accessible from Python.
 71 | 
 72 | Now create a vector in R, access the vector from Python (will be converted into a numpy array), and
 73 | call the ``sum()``-function in R::
 74 | 
 75 |     >>> conn.r("vec <- c(1, 2, 4)")
 76 |     >>> conn.r.vec                 # access vector 'vec' as an attribute of 'conn.r'
 77 |     array([1., 2., 4.])
 78 |     >>> conn.r.sum(conn.r.vec)     # 'sum' in running in the R-interpreter, returning the result to Python
 79 |     7.0
 80 | 
 81 | The other way around also works::
 82 | 
 83 |     >>> conn.r.somenumber = 444         # set a variable called 'somenumber' in the R interpreter...
 84 |     >>> conn.r("somenumber * 2")        # ... and double the number
 85 |     888.0
 86 | 
 87 | 
 88 | Source code repository
 89 | ----------------------
 90 | pyRserve is now hosted on GitHub at `<https://github.com/ralhei/pyRserve>`_.
 91 | 
 92 | 
 93 | Documentation
 94 | ----------------
 95 | Documentation can be found at `<https://pyrserve.readthedocs.io>`_.
 96 | 
 97 | 
 98 | Support
 99 | --------
100 | For discussion of pyRserve and getting help please use the Google newsgroup
101 | available at `<http://groups.google.com/group/pyrserve>`_.
102 | 
103 | Issues with the code (like bugs, etc.) should be reported on GitHub at
104 | `<https://github.com/ralhei/pyRserve/issues>`_.
105 | 
106 | 
107 | Missing features
108 | -----------------
109 | * Authentication is implemented in Rserve but not yet in pyRserve
110 | * TLS encryption is not implemented yet in pyRserve. However using ssh tunnels
111 |   can solve security issues in the meantime (see documentation).
112 | 


--------------------------------------------------------------------------------
/doc/changelog.rst:
--------------------------------------------------------------------------------
  1 | Changelog
  2 | =========
  3 | * V.1.0.1 (2023-01-10)
  4 |     * Replace deprecated numpy.bool8 with numpy.bool_
  5 |     * Upgraded installation instructions in INSTALL file (more up-to-date R and Rserve)
  6 |     * Added Dockerfile for installing R and Rserve into container (used for github actions)
  7 |     * Enabled github actions for testing
  8 | 
  9 | * V 1.0.0 (2022-10-13)
 10 |     * Added docu for secure connection to Rserve via SSH tunnel
 11 |     * Updated meta data for pyRserve package
 12 |     * Added deprecation warning for Python 2
 13 |     * Corrected links to documentation
 14 | 
 15 | * V 1.0.0b3 (2021-06-25)
 16 |     Brought usage of pytest into the year 2021.
 17 | 
 18 |     * use fixtures for setting up an Rserve connection
 19 |     * put fixtures into conftest.py
 20 |     * added command line option for controlling rserve startup
 21 |     * properly named rserve-test.conf file.
 22 | 
 23 | * V 1.0.0b2 (2021-06-22)
 24 |     * Added missing version.txt file to wheel
 25 | 
 26 | * V 1.0.0b1 (2021-06-22)
 27 |     * Updated and cleanup documentation
 28 |     * Updated for more recent versions of R and Rserve
 29 |     * Added pre-commit hooks
 30 |     * Separated packages for dev/testing from production ones
 31 |     * Enhanced handling of NA values (thanks to Max Taggart)
 32 |     * Fixed numpy deprecation warnings (thanks to chaddcw)
 33 | 
 34 | * V 0.9.2 (2019-12-19)
 35 |     * Replaced deprecated numpy.fromstring with numpy.frombuffer
 36 |     * Flake8/pep8 cleanup
 37 |     * Refactored exception hierarchy
 38 | * V 0.9.1 (2017-05-19)
 39 |     * Removed a bug on some Python3 versions
 40 |     * Added proper support for S4 objects (`thanks to flying-sheep <https://github.com/flying-sheep>`_)
 41 |     * Added support for Python3 unitests on travis (`thanks to flying-sheep <https://github.com/flying-sheep>`_)
 42 | 
 43 | * V 0.9.0 (2016-04-11)
 44 |     * Full support for data objects larger than 2**24 bytes
 45 |     * Maximum size of message sent to Rserv can now be 2**64 bytes
 46 | 
 47 | * V 0.8.4 (2015-09-06)
 48 |     * fixed missing requirements.txt in MANIFEST.in
 49 |     * fixed bug in installer (setup.py)
 50 | 
 51 | * V 0.8.3 (2015-09-04)
 52 |     * Fixed exception catching for Python 3.4 (thanks to eeue56)
 53 |     * Some pep8 cleanups
 54 |     * explicit initialization of a number of instance variables in some classes
 55 |     * cleanup of import statements in test modules
 56 |     * Allow for message sizes greater than 4GB coming from R server
 57 | 
 58 | * V 0.8.2 (2015-07-11)
 59 |     * Added support for S4 objects (generated when e.g. creating a db object in R)
 60 | 
 61 | * V 0.8.1 (2014-07-17)
 62 |     * Fixed errors in the documentation, updated outdated parts
 63 |     * For unittesting run Rserve on different port from the default 6311 to
 64 |       avoid clashes with regular Rserve running on the same server
 65 |     * Fixed but when passing a R-function as argument to a function call (e.g. to ``sapply``),
 66 |       added unittest for this
 67 | 
 68 | * V 0.8.0 (2014-06-26)
 69 |     * Added support for remote shutdown of Rserve (thanks to Uwe Schmitt)
 70 |     * Added support for Out-Of-Bounds (OOB) messages (thanks to Philipp alias flying-sheep)
 71 | 
 72 | * V 0.7.3 (2013-08-01)
 73 |     * Added missing MANIFEST.in to produce a complete tgz package (now includes docs etc)
 74 |     * Fixed bug on x64 machines when handling integers larger than 2**31
 75 | 
 76 | * V 0.7.2 (2013-07-19)
 77 |     * Tested with Python 3.3.x, R 3.0.1 and Rserve 1.7.0
 78 |     * Updated documentation accordingly
 79 |     * Code cleanup for pep8 (mostly)
 80 |     * Marked code as production stable
 81 | 
 82 | * V 0.7.1 (2013-06-23)
 83 |     * Added link to new GitHub repository
 84 |     * fixed URL to documentation
 85 | 
 86 | * V 0.7.0 (2013-02-25)
 87 |     * Fixed problem when receiving very large result sets from R (added support for XT_LARGE header flag)
 88 |     * Correctly translate multi-dimensional R arrays into numpy arrays (preserve axes the right way)
 89 |       Removed 'arrayOrder' keyword argument as a consequence.
 90 |       THIS IS AN API CHANGE - PLEASE CHECK AND ADAPT YOUR CODE, ESPECIALLY IF YOU USE MULTI-DIM ARRAYS!!
 91 |     * Support for conn.voidEval and conn.eval and new 'defaultVoid'-kw argument in the connect() function
 92 |     * Fixed bug in receiving multi-dimensional boolean (logical) arrays from R
 93 |     * Added support for multi-dimensional string arrays
 94 |     * added support for XT_VECTOR_EXPR type generated e.g. via "expression()" in R (will return a list
 95 |       with the expression content as list content)
 96 |     * windows users can now connect to localhost by pyRserve.connect() (omitting 'localhost' parameter)
 97 | 
 98 | * V 0.6.0 (2012-06-25)
 99 |     * support for Python3.x
100 |     * Python versions <= 2.5 no more supported (due to Py3 support)
101 |     * support for unicode strings in Python 2.x
102 |     * full support complex numbers, partial support for 64bit integers and arrays
103 |     * suport for Fortran-style ordering of numpy arrays
104 |     * elements of single-item arrays are now translated to native python data types
105 |     * much improved documentation
106 |     * better unit test coverage
107 |     * usage of the deprecated conn(<eval-string>) is no more possible
108 |     * pyRserve.rconnect() now also removed
109 | 
110 | * V 0.5.2 (2011-12-02)
111 |     * Fixed problem with 32bit integers being mistakenly rendered into 64bit integers on 64bit machines
112 | 
113 | * V 0.5.1 (2011-11-22)
114 |     * Fixed improper DeprecationWarning when evaluating R statements via conn.r(...)
115 | 
116 | * V 0.5 (2011-10-03)
117 |     * Renamed pyRserve.rconnect() to pyRserve.connect(). The former still works but shows a DeprecationWarning
118 |     * String evaluation should now only be executed on the namespace directly, not on the connection object anymore.
119 |       The latter still works but shows a DeprecationWarning.
120 |     * New kw argument `atomicArray=True` added to pyRserve.connect() for preventing single valued arrays from being
121 |       converted into atomic python data types.
122 | 
123 | * V 0.4 (2011-09-20)
124 |     * Added support for nested function calls. E.g. conn.r.t.test( ....) now works.
125 |     * Proper support for boolean variables and vectors
126 | 
127 | * V 0.3 (2010-06-08)
128 |     * Added conversion of more complex R structures into Python
129 |     * Updated documentation (installation, manual)
130 | 
131 | * V 0.2 (2010-03-19) Fixed rendering of TaggedArrays
132 | 
133 | * V 0.1 (2010-01-10) Initial version
134 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # pyRserve documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri Oct 23 13:26:42 2009.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys
 15 | sys.path.insert(0, '..')
 16 | from pyRserve import __version__  # noqa
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | # sys.path.append(os.path.abspath('.'))
 22 | 
 23 | # -- General configuration -----------------------------------------------------
 24 | 
 25 | # Add any Sphinx extension module names here, as strings. They can be extensions
 26 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 27 | extensions = ['sphinx.ext.todo']
 28 | 
 29 | # Add any paths that contain templates here, relative to this directory.
 30 | templates_path = ['_templates']
 31 | 
 32 | # The suffix of source filenames.
 33 | source_suffix = '.rst'
 34 | 
 35 | # The encoding of source files.
 36 | # source_encoding = 'utf-8'
 37 | 
 38 | # The master toctree document.
 39 | master_doc = 'index'
 40 | 
 41 | # General information about the project.
 42 | project = u'pyRserve'
 43 | copyright = u'2009-2021 Ralph Heinkel'
 44 | 
 45 | # The version info for the project you're documenting, acts as replacement for
 46 | # |version| and |release|, also used in various other places throughout the
 47 | # built documents.
 48 | #
 49 | # The short X.Y version.
 50 | version = __version__
 51 | # The full version, including alpha/beta/rc tags.
 52 | release = __version__
 53 | 
 54 | # The language for content autogenerated by Sphinx. Refer to documentation
 55 | # for a list of supported languages.
 56 | # language = None
 57 | 
 58 | # There are two options for replacing |today|: either, you set today to some
 59 | # non-false value, then it is used:
 60 | # today = ''
 61 | # Else, today_fmt is used as the format for a strftime call.
 62 | # today_fmt = '%B %d, %Y'
 63 | 
 64 | # List of documents that shouldn't be included in the build.
 65 | # unused_docs = []
 66 | 
 67 | # List of directories, relative to source directory, that shouldn't be searched
 68 | # for source files.
 69 | exclude_trees = ['_build']
 70 | 
 71 | # The reST default role (used for this markup: `text`) to use for all documents.
 72 | # default_role = None
 73 | 
 74 | # If true, '()' will be appended to :func: etc. cross-reference text.
 75 | # dd_function_parentheses = True
 76 | 
 77 | # If true, the current module name will be prepended to all description
 78 | # unit titles (such as .. function::).
 79 | # dd_module_names = True
 80 | 
 81 | # If true, sectionauthor and moduleauthor directives will be shown in the
 82 | # output. They are ignored by default.
 83 | # show_authors = False
 84 | 
 85 | # The name of the Pygments (syntax highlighting) style to use.
 86 | pygments_style = 'sphinx'
 87 | 
 88 | # A list of ignored prefixes for module index sorting.
 89 | # modindex_common_prefix = []
 90 | 
 91 | 
 92 | # -- Options for HTML output ---------------------------------------------------
 93 | 
 94 | # The theme to use for HTML and HTML Help pages.  Major themes that come with
 95 | # Sphinx are currently 'default' and 'sphinxdoc'.
 96 | html_theme = 'default'
 97 | 
 98 | # Theme options are theme-specific and customize the look and feel of a theme
 99 | # further.  For a list of options available for each theme, see the
100 | # documentation.
101 | # html_theme_options = {}
102 | 
103 | # Add any paths that contain custom themes here, relative to this directory.
104 | # html_theme_path = []
105 | 
106 | # The name for this set of Sphinx documents.  If None, it defaults to
107 | # "<project> v<release> documentation".
108 | # html_title = None
109 | 
110 | # A shorter title for the navigation bar.  Default is the same as html_title.
111 | # html_short_title = None
112 | 
113 | # The name of an image file (relative to this directory) to place at the top
114 | # of the sidebar.
115 | # html_logo = None
116 | 
117 | # The name of an image file (within the static path) to use as favicon of the
118 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
119 | # pixels large.
120 | # html_favicon = None
121 | 
122 | # Add any paths that contain custom static files (such as style sheets) here,
123 | # relative to this directory. They are copied after the builtin static files,
124 | # so a file named "default.css" will overwrite the builtin "default.css".
125 | # html_static_path = ['html/_static']
126 | 
127 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
128 | # using the given strftime format.
129 | # html_last_updated_fmt = '%b %d, %Y'
130 | 
131 | # If true, SmartyPants will be used to convert quotes and dashes to
132 | # typographically correct entities.
133 | # html_use_smartypants = True
134 | 
135 | # Custom sidebar templates, maps document names to template names.
136 | # html_sidebars = {}
137 | 
138 | # Additional templates that should be rendered to pages, maps page names to
139 | # template names.
140 | # html_additional_pages = {}
141 | 
142 | # If false, no module index is generated.
143 | # html_use_modindex = True
144 | 
145 | # If false, no index is generated.
146 | # html_use_index = True
147 | 
148 | # If true, the index is split into individual pages for each letter.
149 | # html_split_index = False
150 | 
151 | # If true, links to the reST sources are added to the pages.
152 | # html_show_sourcelink = True
153 | 
154 | # If true, an OpenSearch description file will be output, and all pages will
155 | # contain a <link> tag referring to it.  The value of this option must be the
156 | # base URL from which the finished HTML is served.
157 | # html_use_opensearch = ''
158 | 
159 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
160 | # html_file_suffix = ''
161 | 
162 | # Output file base name for HTML help builder.
163 | htmlhelp_basename = 'pyRservedoc'
164 | 
165 | 
166 | # -- Options for LaTeX output --------------------------------------------------
167 | 
168 | # The paper size ('letter' or 'a4').
169 | # latex_paper_size = 'letter'
170 | 
171 | # The font size ('10pt', '11pt' or '12pt').
172 | # latex_font_size = '10pt'
173 | 
174 | # Grouping the document tree into LaTeX files. List of tuples
175 | # (source start file, target name, title, author, documentclass [howto/manual]).
176 | latex_documents = [
177 |   ('index', 'pyRserve.tex', u'pyRserve Documentation',
178 |    u'Ralph Heinkel', 'manual'),
179 | ]
180 | 
181 | # The name of an image file (relative to this directory) to place at the top of
182 | # the title page.
183 | # latex_logo = None
184 | 
185 | # For "manual" documents, if this is true, then toplevel headings are parts,
186 | # not chapters.
187 | # latex_use_parts = False
188 | 
189 | # Additional stuff for the LaTeX preamble.
190 | # latex_preamble = ''
191 | 
192 | # Documents to append as an appendix to all manuals.
193 | # latex_appendices = []
194 | 
195 | # If false, no module index is generated.
196 | # latex_use_modindex = True
197 | 


--------------------------------------------------------------------------------
/pyRserve/taggedContainers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Some specialized list and array classes to store results obtained from R. These
  3 | classes provide means not to only access object items by index but also - sort
  4 | of like a dictionary - by key. However keys must not be unique or can even be
  5 | None. In those cases only the first item with that key is found.
  6 | 
  7 | Available classes:
  8 | - TaggedList
  9 | - TaggedArray
 10 | """
 11 | import numpy
 12 | 
 13 | 
 14 | class TaggedList(object):
 15 |     # This code is mainly based on UserList.UserList and modified for tags
 16 |     """
 17 |     A tagged list is useful for additionally addressing individual items by
 18 |     name instead of only by index. In contrast to dictionaries multiple items
 19 |     can have the same name or key. However only the first one will be found.
 20 | 
 21 |     In many cases a TaggedList behaves like a normal list, however for lazyness
 22 |     reasons of the programmer not all methods are implemented yet.
 23 | 
 24 |     Example:
 25 |     l = TaggedList( [('v1', 1), ('v2', 2), 3, ('v2', 4)] )
 26 |     l[0]     # returns 1
 27 |     l['v1']  # returns 1
 28 |     l['v2']  # returns 2  (not 4 !)
 29 |     l[3]     # returns 4
 30 | 
 31 |     Data can be appended or inserted in the following way:
 32 |     l.insert(0, x=3)
 33 |     l['x']   # returns 3
 34 |     l[0]     # also returns 3
 35 | 
 36 |     l.append(y=3)
 37 |     l[-1]    # returns 3
 38 |     """
 39 |     def __init__(self, initlist=()):
 40 |         """
 41 |         Items in initlist can either be
 42 |         - tuples of (key,values)
 43 |         - or plain values
 44 |         Keys can be None or empty strings in item tuples.
 45 |         """
 46 |         self.values = []
 47 |         self.keys = []
 48 |         for idx, item in enumerate(initlist):
 49 |             try:
 50 |                 key, value = item
 51 |                 key = None if key == '' else key
 52 |             except (TypeError, ValueError):
 53 |                 value = item
 54 |                 key = None
 55 | 
 56 |             self.values.append(value)
 57 |             self.keys.append(key)
 58 | 
 59 |     def astuples(self):
 60 |         """
 61 |         Convert a TaggedList into a representation suitable to be provided
 62 |         to __init__()
 63 |         """
 64 |         return list(zip(self.keys, self.values))
 65 | 
 66 |     def __repr__(self):
 67 |         data = ["%s=%s" % (key, repr(value)) if key else "'%s'" % value
 68 |                 for key, value in self.astuples()]
 69 |         return '<TaggedList(%s)>' % ', '.join(data)
 70 | 
 71 |     #    def __lt__(self, other): return self.values <  self.__cast(other)
 72 |     #    def __le__(self, other): return self.values <= self.__cast(other)
 73 |     #    def __eq__(self, other): return self.values == self.__cast(other)
 74 |     #    def __ne__(self, other): return self.values != self.__cast(other)
 75 |     #    def __gt__(self, other): return self.values >  self.__cast(other)
 76 |     #    def __ge__(self, other): return self.values >= self.__cast(other)
 77 |     #    def __cast(self, other):
 78 |     #        if isinstance(other, UserList): return other.data
 79 |     #        else: return other
 80 |     #    def __cmp__(self, other):
 81 |     #        return cmp(self.values, self.__cast(other))
 82 |     __hash__ = None  # Mutable sequence, so not hashable
 83 | 
 84 |     def __eq__(self, other):
 85 |         if not isinstance(other, self.__class__):
 86 |             return False
 87 |         return self.__dict__ == other.__dict__
 88 | 
 89 |     def __ne__(self, other):
 90 |         return not self.__eq__(other)
 91 | 
 92 |     def __contains__(self, item):
 93 |         return item in self.values
 94 | 
 95 |     def __len__(self):
 96 |         return len(self.values)
 97 | 
 98 |     def __getitem__(self, i):
 99 |         if type(i) == str:
100 |             i = self.keys.index(i)
101 |         return self.values[i]
102 | 
103 |     def __setitem__(self, i, item):
104 |         if type(i) == str:
105 |             i = self.keys.index[i]
106 |         self.values[i] = item
107 | 
108 |     def __delitem__(self, i):
109 |         if type(i) == str:
110 |             i = self.keys.index[i]
111 |             del self.keys[i]
112 |         del self.values[i]
113 | 
114 |     def __getslice__(self, i, j):
115 |         i = max(i, 0)
116 |         j = max(j, 0)
117 |         return self.__class__(self.astuples()[i:j])
118 | 
119 |     #    def __setslice__(self, i, j, other):
120 |     #        i = max(i, 0); j = max(j, 0)
121 |     #        if isinstance(other, UserList):
122 |     #            self.values[i:j] = other.data
123 |     #        elif isinstance(other, type(self.values)):
124 |     #            self.values[i:j] = other
125 |     #        else:
126 |     #            self.values[i:j] = list(other)
127 | 
128 |     def __delslice__(self, i, j):
129 |         raise NotImplementedError()
130 |         # i = max(i, 0); j = max(j, 0)
131 |         # del self.values[i:j]
132 |         # del self.keys[i:j]
133 | 
134 |     def __add__(self, other):
135 |         raise NotImplementedError()
136 |         #        if isinstance(other, UserList):
137 |         #            return self.__class__(self.values + other.data)
138 |         #        elif isinstance(other, type(self.values)):
139 |         #            return self.__class__(self.values + other)
140 |         #        else:
141 |         #            return self.__class__(self.values + list(other))
142 | 
143 |     def __radd__(self, other):
144 |         raise NotImplementedError()
145 |         #        if isinstance(other, UserList):
146 |         #            return self.__class__(other.data + self.values)
147 |         #        elif isinstance(other, type(self.values)):
148 |         #            return self.__class__(other + self.values)
149 |         #            return self.__class__(list(other) + self.values)
150 | 
151 |     def __iadd__(self, other):
152 |         raise NotImplementedError()
153 |         #        if isinstance(other, UserList):
154 |         #            self.values += other.data
155 |         #        elif isinstance(other, type(self.values)):
156 |         #            self.values += other
157 |         #        else:
158 |         #            self.values += list(other)
159 |         #        return self
160 | 
161 |     def __mul__(self, n):
162 |         raise NotImplementedError()
163 |         # return self.__class__(self.values*n)
164 |     __rmul__ = __mul__
165 | 
166 |     def __imul__(self, n):
167 |         raise NotImplementedError()
168 |         # self.values *= n
169 |         # return self
170 | 
171 |     def append(self, *value, **key_and_value):
172 |         """
173 |         Append an item to the list, either given as plain value or as a
174 |         keyword-arg pair.
175 |         Example:
176 |             taggedlist.append(4)
177 |         or
178 |             taggedlist.append(k=4)
179 |         """
180 |         if len(value) == 1 and not key_and_value:
181 |             key = None
182 |             value = value[0]
183 |         elif len(key_and_value) == 1 and not value:
184 |             [(key, value)] = key_and_value.items()
185 |         else:
186 |             raise ValueError("Only either one single value or one single pair "
187 |                              "of key/value is allowed")
188 |         self.values.append(value)
189 |         self.keys.append(key)
190 | 
191 |     def insert(self, i, *value, **key_and_value):
192 |         """
193 |         Insert an item in the list at position i, either given as plain value
194 |         or as a keyword-arg pair.
195 |         Example:
196 |             taggedlist.insert(4, 'abc)
197 |         or
198 |             taggedlist.append(4, k='abc')
199 |         """
200 |         if len(value) == 1 and not key_and_value:
201 |             key = None
202 |             value = value[0]
203 |         elif len(key_and_value) == 1 and not value:
204 |             [(key, value)] = key_and_value.items()
205 |         else:
206 |             raise ValueError("Only either one single value or one single pair "
207 |                              "of key/value is allowed")
208 |         self.values.insert(i, value)
209 |         self.keys.insert(i, key)
210 | 
211 |     def pop(self, i=-1):
212 |         """
213 |         Remove an item from the list. By default the last item will be removed.
214 |         If an item at a specific position should be removed, pass an additional
215 |         index arguemnt.
216 |         """
217 |         return self.values.pop(i)
218 | 
219 |     def remove(self, item):
220 |         raise NotImplementedError()
221 |         # self.values.remove(item)
222 | 
223 |     def count(self, item):
224 |         return self.values.count(item)
225 | 
226 |     def index(self, item, *args):
227 |         return self.values.index(item, *args)
228 | 
229 |     def reverse(self):
230 |         self.values.reverse()
231 |         self.keys.reverse()
232 | 
233 |     def sort(self, *args, **kwds):
234 |         raise NotImplementedError()
235 |         # self.values.sort(*args, **kwds)
236 | 
237 |     def extend(self, other):
238 |         raise NotImplementedError()
239 |         #  if isinstance(other, UserList):
240 |         #      self.values.extend(other.data)
241 |         #  else:
242 |         #      self.values.extend(other)
243 | 
244 | 
245 | class AttrArray(numpy.ndarray):
246 |     """
247 |     numpy.ndarray with additional "attr"-container.
248 |     Used as base class for TaggedArray.
249 |     """
250 |     attr = None
251 | 
252 |     def __repr__(self):
253 |         r = super(AttrArray, self).__repr__()
254 |         if hasattr(self, 'attr'):
255 |             return r[:-1] + ', attr=' + repr(self.attr) + ')'
256 |         return r
257 | 
258 |     @classmethod
259 |     def new(cls, data, attr):
260 |         """
261 |         Factory method to create AttrArray objects from ndarrays or Python
262 |         lists.
263 |         Usage:
264 |             AttrArray.new(array([1, 2, 3, 4]), {'attr1': val1, 'attr2': val2})
265 |         """
266 |         if not isinstance(data, numpy.ndarray):
267 |             # assume it is a Python list or any other valid data type
268 |             # for arrays
269 |             arr = numpy.array(data)
270 |         else:
271 |             arr = data
272 | 
273 |         attrArr = arr.view(cls)
274 |         attrArr.attr = attr
275 |         return attrArr
276 | 
277 | 
278 | def asAttrArray(data, attr):
279 |     return AttrArray.new(data, attr)
280 | 
281 | 
282 | class TaggedArray(AttrArray):
283 |     """
284 |     A tagged array is useful for additionally addressing individual items by
285 |     name instead of only by index. In contrast to dictionaries multiple items
286 |     can have the same name or key. However only the first one will be found.
287 | 
288 |     In many cases a TaggedArray behaves like a normal array and is the
289 |     equivalent for TaggedList.
290 |     This class is basically only useful to translate results created by R into
291 |     something useful in Python.
292 | 
293 |     Instances of TaggedArray should only be created using the factory function
294 |     'asTaggedArray([values)], [tags])', where 'values' and 'tags' can be plain
295 |     python lists or numpy-arrays.
296 | 
297 |     Example:
298 |     l = asTaggedArray(array([1, 2, 3, 4]), ['v1', 'v2', 'v3', 'v4'])
299 |     l[0]     # returns 1
300 |     l['v1']  # returns 1
301 |     l['v2']  # returns 2  (not 4 !)
302 |     l[3]     # returns 4
303 | 
304 |     It is recommended not to do lots of manipulations that modify the
305 |     structure of the arrary. This could lead to mismatched btw. tags and
306 |     values (those are only very loosely coupled internally). However any type
307 |     of mathematics like multiplying the array should be possible without
308 |     problems.
309 |     """
310 |     attr = []
311 | 
312 |     def __repr__(self):
313 |         r = super(AttrArray, self).__repr__()
314 |         if hasattr(self, 'attr'):
315 |             return r[:-1] + ', key=' + repr(self.attr) + ')'
316 |         return r
317 | 
318 |     def __getitem__(self, idx_or_name):
319 |         try:
320 |             return numpy.ndarray.__getitem__(self, idx_or_name)
321 |         except Exception:
322 |             pass
323 |         try:
324 |             return numpy.ndarray.__getitem__(self,
325 |                                              self.attr.index(idx_or_name))
326 |         except ValueError:
327 |             raise KeyError('No key "%s" available for array' % idx_or_name)
328 | 
329 |     def keys(self):
330 |         return self.attr[:]
331 | 
332 |     @classmethod
333 |     def new(cls, data, tags):
334 |         """
335 |         Factory method to create TaggedArray objects from ndarrays or Python
336 |         lists.
337 |         Check the docs in TaggedArray for more information.
338 |         Usage:
339 |         l = TaggedArray.new(array([1, 2, 3, 4]), ['v1', 'v2', 'v3', 'v4'])
340 |         l[0]     # returns 1
341 |         l['v1']  # returns 1
342 |         l['v2']  # returns 2  (not 4 !)
343 |         l[3]     # returns 4
344 |         """
345 |         if len(tags) != len(data):
346 |             raise ValueError('Number of keys must match size of array')
347 |         if not isinstance(data, numpy.ndarray):
348 |             # assume it is a Python list or any other valid data type
349 |             # for arrays
350 |             arr = numpy.array(data)
351 |         else:
352 |             arr = data
353 | 
354 |         taggedArr = arr.view(cls)
355 |         taggedArr.attr = tags
356 |         return taggedArr
357 | 
358 | 
359 | def asTaggedArray(data, tags):
360 |     return TaggedArray.new(data, tags)
361 | 


--------------------------------------------------------------------------------
/pyRserve/rtypes.py:
--------------------------------------------------------------------------------
  1 | """
  2 | types module for pyRserve
  3 | """
  4 | import numpy
  5 | from pyRserve.misc import PY3
  6 | 
  7 | # some general constants:
  8 | SOCKET_BLOCK_SIZE = 4096
  9 | MAX_INT32 = 2**31 - 1
 10 | MIN_INT32 = -MAX_INT32
 11 | 
 12 | # Rserve constants and mappings ###############################################
 13 | 
 14 | # Main Rserve header size [bytes]
 15 | RHEADER_SIZE = 16
 16 | 
 17 | # Header sizes (in SEXPR) without and with XT_LARGE or DT_LARGE flag [bytes]
 18 | SMALL_DATA_HEADER_SIZE = 4
 19 | LARGE_DATA_HEADER_SIZE = 8
 20 | 
 21 | 
 22 | CMD_RESP = 0x10000            # all responses have this flag set
 23 | 
 24 | RESP_OK  = CMD_RESP | 0x0001  # command succeeded; returned parameters depend
 25 |                               # on the command issued
 26 | RESP_ERR = CMD_RESP | 0x0002  # command failed, check stats code
 27 | 
 28 | 
 29 | CMD_OOB         = 0x20000     # out-of-band data - i.e. unsolicited messages
 30 | 
 31 | OOB_SEND        = CMD_OOB | 0x1000  # OOB send - unsolicited SEXP sent from the
 32 |                                     # R instance to the client. 12 LSB are
 33 |                                     # reserved for application-specific code
 34 | OOB_MSG         = CMD_OOB | 0x2000  # OOB message - unsolicited message sent
 35 |                                     # from the R instance to the client
 36 |                                     # requiring a response. 12 LSB are reserved
 37 |                                     # for application-specific code
 38 | OOB_STREAM_READ = CMD_OOB | 0x4000  # OOB stream read request - server requests
 39 |                                     # streaming data from the client (typically
 40 |                                     # streaming input for computation)
 41 | 
 42 | ###############################################################################
 43 | # Error codes
 44 | 
 45 | ERR_auth_failed      = 0x41  # auth.failed or auth.requested but no
 46 |                              #   login came. in case of authentification
 47 |                              #   failure due to name/pwd mismatch,
 48 |                              #   server may send CMD_accessDenied instead
 49 | 
 50 | ERR_conn_broken      = 0x42  # connection closed or broken packet killed it
 51 | ERR_inv_cmd          = 0x43  # unsupported/invalid command
 52 | ERR_inv_par          = 0x44  # some parameters are invalid
 53 | ERR_Rerror           = 0x45  # R-error occured, usually followed by connection
 54 |                              #   shutdown
 55 | ERR_IOerror          = 0x46  # I/O error
 56 | ERR_notOpen          = 0x47  # attempt to perform fileRead/Write on closed file
 57 | ERR_accessDenied     = 0x48  # this answer is also valid on
 58 |                              #   CMD_login; otherwise it's sent
 59 |                              #   if the server deosn;t allow the user
 60 |                              #   to issue the specified command.
 61 |                              #   (e.g. some server admins may block
 62 | ERR_unsupportedCmd   = 0x49  # unsupported command
 63 | ERR_unknownCmd       = 0x4a  # unknown command - the difference
 64 |                              #   between unsupported and unknown is that
 65 |                              #   unsupported commands are known to the
 66 |                              #   server but for some reasons (e.g.
 67 |                              #   platform dependent) it's not supported.
 68 |                              #   unknown commands are simply not recognized
 69 |                              #   by the server at all.
 70 | 
 71 | # The following ERR_.. exist since 1.23/0.1-6
 72 | ERR_data_overflow    = 0x4b  # incoming packet is too big.
 73 |                              #   currently there is a limit as of the
 74 |                              #   size of an incoming packet.
 75 | ERR_object_too_big   = 0x4c  # the requested object is too big
 76 |                              #   to be transported in that way.
 77 |                              #   If received after CMD_eval then
 78 |                              #   the evaluation itself was successful.
 79 |                              #   optional parameter is the size of the object
 80 | 
 81 | # since 1.29/0.1-9
 82 | ERR_out_of_mem       = 0x4d  # out of memory. the connection is usually
 83 |                              #  closed after this error was sent
 84 | # since 0.6-0
 85 | ERR_ctrl_closed      = 0x4e  # control pipe to the master process is closed
 86 |                              #  or broken
 87 | 
 88 | # since 0.4-0
 89 | ERR_session_busy     = 0x50  # session is still busy
 90 | ERR_detach_failed    = 0x51  # unable to detach seesion (cannot determine
 91 |                              #  peer IP or problems creating a listening socket
 92 |                              #  for resume)
 93 | 
 94 | # pack all error codes with their names into a dictionary:
 95 | ERRORS = dict([(errCode, err_name) for (err_name, errCode) in locals().items()
 96 |                if err_name.startswith('ERR_')])
 97 | 
 98 | 
 99 | ###############################################################################
100 | # Available commands
101 | 
102 | CMD_login        = 0x001    # "name\npwd" : -
103 | CMD_voidEval     = 0x002    # string : -
104 | CMD_eval         = 0x003    # string : encoded SEXP
105 | CMD_shutdown     = 0x004    # [admin-pwd] : -
106 | 
107 | # file I/O routines. server may answe
108 | CMD_openFile     = 0x010    # fn : -
109 | CMD_createFile   = 0x011    # fn : -
110 | CMD_closeFile    = 0x012    # - : -
111 | CMD_readFile     = 0x013    # [int size] : data... ; if size not present,
112 |                             #      server is free to choose any value - usually
113 |                             #      it uses the size of its static buffer
114 | CMD_writeFile    = 0x014    # data : -
115 | CMD_removeFile   = 0x015    # fn : -
116 | 
117 | # object manipulation
118 | CMD_setSEXP      = 0x020    # string(name), REXP : -
119 | CMD_assignSEXP   = 0x021    # string(name), REXP : - ; same as setSEXP
120 |                             #    except that the name is parsed
121 | 
122 | # session management (since 0.4-0)
123 | CMD_detachSession    = 0x030  # : session key
124 | CMD_detachedVoidEval = 0x031  # string : session key; doesn't
125 | CMD_attachSession    = 0x032  # session key : -
126 | 
127 | # control commands (since 0.6-0) - passed on to the master process
128 | # Note: currently all control commands are asychronous, i.e. RESP_OK
129 | #   indicates that the command was enqueued in the master pipe, but there
130 | #  is no guarantee that it will be processed. Moreover non-forked
131 | #   connections (e.g. the default debug setup) don't process any
132 | #   control commands until the current client connection is closed so
133 | #   the connection issuing the control command will never see its result.
134 | 
135 | CMD_ctrl            = 0x40  # -- not a command - just a constant --
136 | CMD_ctrlEval        = 0x42  # string : -
137 | CMD_ctrlSource      = 0x45  # string : -
138 | CMD_ctrlShutdown    = 0x44  # - : -
139 | 
140 | # 'internal' commands (since 0.1-9)
141 | CMD_setBufferSize   = 0x081   # [int sendBufSize]
142 |                               #     this commad allow clients to request
143 |                               #     bigger buffer sizes if large data is to be
144 |                               #     transported from Rserve to the client.
145 |                               #     (incoming buffer is resized automatically)
146 | 
147 | CMD_setEncoding     = 0x082   # string (one of "native","latin1","utf8")
148 | 
149 | # special commands - the payload of packages with this mask does not contain
150 | # defined parameters
151 | 
152 | CMD_SPECIAL_MASK    = 0xf0
153 | 
154 | CMD_serEval         = 0xf5     # serialized eval - the packets are raw
155 |                                #   serialized data without data header
156 | CMD_serAssign       = 0xf6     # serialized assign - serialized list with
157 |                                #   [[1]]=name, [[2]]=value
158 | CMD_serEEval        = 0xf7     # serialized expression eval - like serEval with
159 |                                #   one additional evaluation round
160 | 
161 | 
162 | ###############################################################################
163 | # Data types for the transport protocol (QAP1) do NOT confuse with any
164 | # XT_.. values.
165 | 
166 | DT_INT           = 0x01  # int
167 | DT_CHAR          = 0x02  # char
168 | DT_DOUBLE        = 0x03  # double
169 | DT_STRING        = 0x04  # 0 terminted string
170 | DT_BYTESTREAM    = 0x05  # stream of bytes (unlike DT_STRING may contain 0)
171 | DT_SEXP          = 0x0A  # encoded SEXP
172 | 
173 | DT_ARRAY         = 0x0B  # array of objects (i.e. first 4 bytes specify how
174 |                          #   many subsequent objects are part of the array;
175 |                          #   0 is legitimate)
176 | DT_LARGE         = 0x40  # new in 0102: if this flag is set then the length of
177 |                          #   the object is coded as 56-bit integer enlarging
178 |                          # the header by 4 bytes
179 | 
180 | ###############################################################################
181 | # XpressionTypes
182 | 
183 | #   REXP - R expressions are packed in the same way as command parameters
184 | #   transport format of the encoded Xpressions:
185 | #   [0] int type/len (1 byte type, 3 bytes len - same as SET_PAR)
186 | #   [4] REXP attr (if bit 8 in type is set)
187 | #   [4/8] data ..
188 | 
189 | XT_NULL          =  0x00  # P  data: [0]
190 | XT_INT           =  0x01  # -  data: [4]int
191 | XT_DOUBLE        =  0x02  # -  data: [8]double
192 | XT_STR           =  0x03  # P  data: [n]char null-term. strg.
193 | XT_LANG          =  0x04  # -  data: same as XT_LIST
194 | XT_SYM           =  0x05  # -  data: [n]char symbol name
195 | XT_BOOL          =  0x06  # -  data: [1]byte boolean (1=TRUE, 0=FALSE, 2=NA)
196 | 
197 | XT_S4            =  0x07  # P  data: [0]
198 | 
199 | XT_BYTE          =  0x08  # extension for pyRserve
200 | XT_INT3          =  0x09  # extension for pyRserve, a 3-byte integer as used
201 |                           #   in REXP
202 | XT_INT7          =  0x0A  # extension for pyRserve, a 7-byte integer as used
203 |                           #   in REXP
204 | 
205 | XT_VECTOR        =  0x10  # 16dec: P  data: [?]REXP,REXP,..
206 | XT_LIST          =  0x11  # 17dec: -  X head, X vals, X tag (since 0.1-5)
207 | XT_CLOS          =  0x12  # 18dec: P  X formals, X body  (closure; since 0.1-5)
208 | XT_SYMNAME       =  0x13  # 19dec: s  same as XT_STR (since 0.5)
209 | XT_LIST_NOTAG    =  0x14  # 20dec: s  same as XT_VECTOR (since 0.5)
210 | XT_LIST_TAG      =  0x15  # 21dec: P  X tag, X val, Y tag, Y val,  (since 0.5)
211 | XT_LANG_NOTAG    =  0x16  # 22dec: s  same as XT_LIST_NOTAG (since 0.5)
212 | XT_LANG_TAG      =  0x17  # 23dec: s  same as XT_LIST_TAG (since 0.5)
213 | XT_VECTOR_EXP    =  0x1a  # 26dec: s  same as XT_VECTOR (since 0.5)
214 | XT_VECTOR_STR    =  0x1b  # 27dec: -  same as XT_VECTOR (since 0.5 but unused,
215 |                           #           use XT_ARRAY_STR instead)
216 | 
217 | XT_ARRAY_INT     =  0x20  # 32dec: P  data: [n*4]int,int,..
218 | XT_ARRAY_DOUBLE  =  0x21  # 33dec: P  data: [n*8]double,double,..
219 | XT_ARRAY_STR     =  0x22  # 34dec: P  data: string,string,.. (
220 |                           #           string=byte,byte,...,0) padded with '\01'
221 | XT_ARRAY_BOOL_UA =  0x23  # 35dec: -  data: [n]byte,byte,..
222 |                           #           (unaligned! NOT supported anymore)
223 | XT_ARRAY_BOOL    =  0x24  # 36dec: P  data: int(n),byte,byte,...
224 | XT_RAW           =  0x25  # 37dec: P  data: int(n),byte,byte,...
225 | XT_ARRAY_CPLX    =  0x26  # 38dec: P  data: [n*16]double,double,...
226 |                           #           (Re,Im,Re,Im,...)
227 | 
228 | XT_UNKNOWN       =  0x30  # 48dec: P  data: [4]int - SEXP
229 | #                                     type (as from TYPEOF(x))
230 | #                      |
231 | #                      +--- interesting flags for client implementations:
232 | #                           P = primary type
233 | #                           s = secondary type - its decoding is identical to
234 | #                               a primary type and thus the client doesn't need
235 | #                                to decode it separately.
236 | #                           - = deprecated/removed. if a client doesn't need to
237 | #                               support old Rserve versions, those can be
238 | #                               safely skipped.
239 | #  Total primary: 4 trivial types (NULL, STR, S4, UNKNOWN) + 6 array types +
240 | #                 3 recursive types
241 | 
242 | 
243 | XT_LARGE         =  0x40  # 64dec: new in 0102: if this flag is set then the
244 |                           #   length of the object is coded as 56-bit integer
245 |                           #   enlarging the header by 4 bytes
246 | XT_HAS_ATTR      =  0x80  # 128dec: flag; if set, the following REXP is the
247 |                           #   attribute the use of attributes and vectors
248 |                           #   results in recursive storage of REXPs
249 | 
250 | # Build up a dictionary that translates all codes for XT_* and DT_* constants
251 | # into their names:
252 | 
253 | XTs = dict([(rTypeCode, xt_name) for (xt_name, rTypeCode) in locals().items()
254 |             if xt_name.startswith('XT_')])
255 | DTs = dict([(rTypeCode, dt_name) for (dt_name, rTypeCode) in locals().items()
256 |             if dt_name.startswith('DT_')])
257 | 
258 | 
259 | BOOL_TRUE = 1
260 | BOOL_FALSE = 0
261 | BOOL_NA = 2
262 | 
263 | VALID_R_TYPES = [
264 |     DT_SEXP, XT_BOOL, XT_INT, XT_DOUBLE, XT_STR, XT_SYMNAME, XT_VECTOR,
265 |     XT_LIST_TAG, XT_LANG_TAG, XT_LIST_NOTAG, XT_LANG_NOTAG, XT_CLOS,
266 |     XT_ARRAY_BOOL, XT_ARRAY_INT, XT_ARRAY_DOUBLE, XT_ARRAY_CPLX, XT_ARRAY_STR,
267 |     XT_VECTOR_EXP, XT_NULL, XT_UNKNOWN, XT_RAW, XT_S4
268 | ]
269 | 
270 | STRING_TYPES = [str, numpy.string_, numpy.str_]
271 | if not PY3:
272 |     STRING_TYPES.append(unicode)  # noqa: F821      'unicode' unknown in Python3
273 | 
274 | ###############################################################################
275 | # Mapping btw. numpy and R data types, in both directions
276 | 
277 | # map r-types and some python types to typecodes used in the 'struct' module
278 | structMap = {
279 |     XT_BOOL:          'b',
280 |     bool:             'b',
281 |     XT_BYTE:          'B',
282 |     XT_INT:           'i',
283 |     int:              'i',
284 |     numpy.int32:      'i',
285 |     XT_INT3:          'i',
286 |     XT_INT7:          'q',     # 64 bit integer
287 |     XT_DOUBLE:        'd',     # double (float64)
288 |     float:            'd',
289 |     numpy.double:     'd',
290 |     complex:          'd',
291 |     complex:          'd',
292 |     numpy.complex128: 'd',
293 | }
294 | 
295 | # mapping to determine overall type of message.
296 | DT_Map = {
297 |     str:    DT_STRING,
298 |     int:    DT_INT,
299 |     float:  DT_DOUBLE,
300 | }
301 | 
302 | 
303 | numpyMap = {
304 |     XT_ARRAY_BOOL:     numpy.bool_,
305 |     XT_ARRAY_INT:      numpy.int32,
306 |     XT_ARRAY_DOUBLE:   numpy.double,     # double float64
307 |     XT_ARRAY_CPLX:     complex,
308 |     XT_ARRAY_STR:      numpy.string_,
309 | }
310 | 
311 | # also add the inverse mapping to it:
312 | for k, v in list(numpyMap.items()):
313 |     numpyMap[v] = k
314 | 
315 | # some manual additions for numpy variants:
316 | numpyMap[numpy.complex128]  = XT_ARRAY_CPLX
317 | numpyMap[numpy.int32]       = XT_ARRAY_INT
318 | numpyMap[numpy.int64]       = XT_ARRAY_INT
319 | numpyMap[numpy.compat.long] = XT_ARRAY_INT
320 | numpyMap[numpy.str_]        = XT_ARRAY_STR
321 | numpyMap[numpy.unicode_]    = XT_ARRAY_STR
322 | 
323 | 
324 | atom2ArrMap = {
325 |     # map atomic python objects to their array counterparts in R
326 |     int:               XT_ARRAY_INT,
327 |     numpy.int32:       XT_ARRAY_INT,
328 |     float:             XT_ARRAY_DOUBLE,
329 |     numpy.double:      XT_ARRAY_DOUBLE,
330 |     complex:           XT_ARRAY_CPLX,
331 |     numpy.complex128:  XT_ARRAY_CPLX,
332 |     str:               XT_ARRAY_STR,
333 |     numpy.str_:        XT_ARRAY_STR,
334 |     numpy.string_:     XT_ARRAY_STR,
335 |     numpy.unicode_:    XT_ARRAY_STR,
336 |     bool:              XT_ARRAY_BOOL,
337 | }
338 | 


--------------------------------------------------------------------------------
/pyRserve/rserializer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Serializer class to convert Python objects into a binary data stream for
  3 | sending them to Rserve.
  4 | """
  5 | import os
  6 | import io
  7 | import struct
  8 | import socket
  9 | 
 10 | import numpy
 11 | 
 12 | from . import rtypes
 13 | from .misc import PY3, FunctionMapper, byteEncode, padLen4, string2bytesPad4
 14 | from .taggedContainers import TaggedList, TaggedArray
 15 | 
 16 | # turn on DEBUG to see extra information about what the serializer is
 17 | # doing with your data
 18 | DEBUG = 0
 19 | 
 20 | NoneType = type(None)
 21 | 
 22 | if PY3:
 23 |     # make test work with Python 3 where 'long'-type does not exist:
 24 |     long = int
 25 | 
 26 | 
 27 | class RSerializer(object):
 28 |     """
 29 |     Class to to serialize Python objects into a binary data stream for sending
 30 |     them to Rserve.
 31 | 
 32 |     Depending on 'commandType' given to __init__ the resulting binary string
 33 |     can be used to send a command, to assign a variable in Rserve, or to
 34 |     reply to a request received from Rserve.
 35 |     """
 36 |     serializeMap = {}
 37 |     fmap = FunctionMapper(serializeMap)
 38 | 
 39 |     def __init__(self, commandType, fp=None):
 40 |         if isinstance(fp, socket.socket):
 41 |             # kwargs = {'mode': 'b'} if PY3 else {}
 42 |             self._fp = fp
 43 |             self._buffer = io.BytesIO()
 44 |         elif not fp:
 45 |             self._buffer = fp or io.BytesIO()
 46 |             self._fp = None
 47 |         else:
 48 |             # expect fp to be a seekable file(-like) object:
 49 |             self._buffer = self._fp = fp
 50 |         self._dataSize = 0
 51 |         self._writeHeader(commandType)
 52 | 
 53 |     def _getRetVal(self):
 54 |         if self._fp is self._buffer:
 55 |             # file(-like) object - data has been written, nothing to return
 56 |             return None
 57 |         elif not self._fp:
 58 |             # data has only been written into buffer, so return its value:
 59 |             return self._buffer.getvalue()
 60 |         else:
 61 |             # i.e. socket: write result of _fp into socket-fp
 62 |             self._fp.send(self._buffer.getvalue())
 63 |             return None
 64 | 
 65 |     def _writeHeader(self, commandType):
 66 |         """Write main header of message for Rserve"""
 67 |         # Set length to zero initially, will be fixed in _finalizerHeader()
 68 |         # when msg size is determined:
 69 |         msg_length_lower = msg_length_higher = 0
 70 |         data_offset = 0
 71 |         header = struct.pack('<IIII', commandType, msg_length_lower,
 72 |                              data_offset, msg_length_higher)
 73 |         if DEBUG:
 74 |             print('Writing header: %d bytes: %s' % (len(header), repr(header)))
 75 |         self._buffer.write(header)
 76 | 
 77 |     def finalize(self):
 78 |         """Finalize the message package before actually sending/wriring it out.
 79 |         -> Set the length of the entire data package in the general message hdr
 80 |            as number of bytes of the entire message minus the general hdr
 81 |         """
 82 |         # Jump to end of buffer to determine its length:
 83 |         self._buffer.seek(0, os.SEEK_END)
 84 |         messageSize = self._buffer.tell() - rtypes.RHEADER_SIZE
 85 |         if DEBUG:
 86 |             print('writing size of header: %2d' % messageSize)
 87 |         # Goto position 4 of the general Rserve package header and write the
 88 |         # size of the overall rserve message there. For message size > 2**32
 89 |         # the size is split into two parts, the lower 32 bits are written at
 90 |         # position 4, the higher part is written at position 12 (see QAP1 docs)
 91 |         bin_messageSize = struct.pack('<Q', messageSize)
 92 |         bin_messageSize_lo = bin_messageSize[:4]
 93 |         bin_messageSize_hi = bin_messageSize[4:]
 94 | 
 95 |         self._buffer.seek(4)
 96 |         self._buffer.write(bin_messageSize_lo)
 97 |         self._buffer.write(b'\x00\x00\x00\x00')  # data offset, zero by default
 98 |         self._buffer.write(bin_messageSize_hi)
 99 |         return self._getRetVal()
100 | 
101 |     def _writeDataHeader(self, rTypeCode, length):
102 |         """Write a header for either DataTypes (DT_*) or ExpressionTypes (XT_*)
103 | 
104 |         According to the documentation of Rserve:
105 |         -----------------------------------------
106 |         If the length of the data block is smaller than 2**24 - 16 (fffff0)
107 |         then the header has a length of 4 bytes and looks like:
108 |             [1]   rTypeCode
109 |             [2-4] length of data block (3 bytes/24 bits)
110 |         If the length of the data is larger, then the rTypeCode of the header
111 |         has to be OR'ed with XT_LARGE (or DT_LARGE, which is the same). Then
112 |         the length of the datablock can be encoded in 7 bytes:
113 |             [1]   rTypeCode
114 |             [2-4] length of data block (lower three bytes)
115 |             [5-8] length of data block (upper four bytes)
116 | 
117 |         However; pyRserve is not capable of dynamic header sizes, so we will
118 |                  use the large header setup for all data packages no matter of
119 |                  their size. Simon Urbanek confirmed that this does not cause
120 |                  any problems with Rserve.
121 |         """
122 |         rTypeCode |= rtypes.XT_LARGE
123 |         hdr = struct.pack('<BQ', rTypeCode, length)
124 |         # cut-off leftover zeros at the right end of the header string
125 |         # before writing it to the buffer:
126 |         self._buffer.write(hdr[:rtypes.LARGE_DATA_HEADER_SIZE])
127 |         return rtypes.LARGE_DATA_HEADER_SIZE
128 | 
129 |     def serialize(self, o, dtTypeCode=rtypes.DT_SEXP):
130 |         # Here the data typecode (DT_* ) of the entire message is written,
131 |         # with its length. Then the actual data itself is written out.
132 |         if dtTypeCode == rtypes.DT_STRING:
133 |             paddedString = string2bytesPad4(o)
134 |             length = len(paddedString)
135 |             hdrSize = self._writeDataHeader(dtTypeCode, length)
136 |             self._buffer.write(paddedString)
137 |         elif dtTypeCode == rtypes.DT_INT:
138 |             length = 4   # an integer is encoded as 4 bytes
139 |             hdrSize = self._writeDataHeader(dtTypeCode, length)
140 |             self._buffer.write(struct.pack('<i', o))
141 |         elif dtTypeCode == rtypes.DT_SEXP:
142 |             startPos = self._buffer.tell()
143 |             self._buffer.write(b'\0\0\0\0\0\0\0\0')
144 |             length = self.serializeExpr(o)
145 |             self._buffer.seek(startPos)
146 |             hdrSize = self._writeDataHeader(dtTypeCode, length)
147 |         else:
148 |             raise NotImplementedError('no support for DT-type %x' % dtTypeCode)
149 |         # Jump back to end of buffer to be prepared for writing more data
150 |         self._buffer.seek(0, os.SEEK_END)
151 |         # Adjust datasize counter
152 |         self._dataSize += length + hdrSize
153 | 
154 |     def serializeExpr(self, o):
155 |         if isinstance(o, numpy.ndarray):
156 |             rTypeCode = rtypes.numpyMap[o.dtype.type]
157 |         else:
158 |             rTypeCode = type(o)
159 |         try:
160 |             s_func = self.serializeMap[rTypeCode]
161 |         except KeyError:
162 |             raise NotImplementedError('Serialization of "%s" not implemented' %
163 |                                       rTypeCode)
164 |         startPos = self._buffer.tell()
165 |         if DEBUG:
166 |             print('Serializing expr %r with rTypeCode=%s using function %s' %
167 |                   (o, rTypeCode, s_func))
168 |         s_func(self, o)
169 |         # determine and return the length of actual R expression data:
170 |         return self._buffer.tell() - startPos
171 | 
172 |     @fmap(NoneType, rtypes.XT_NULL)
173 |     def s_null(self, _):
174 |         """Send Python's None to R, resulting in NULL there"""
175 |         # For NULL only the header needs to be written, there is no data body.
176 |         self._writeDataHeader(rtypes.XT_NULL, 4)
177 | 
178 |     @fmap(rtypes.XT_STR, rtypes.XT_SYMNAME)
179 |     def s_string_or_symbol(self, o, rTypeCode=rtypes.XT_STR):
180 |         """
181 |         Possible rTypeCodes for a given string are:
182 |         - XT_STR
183 |         - XT_SYMNAME
184 |         """
185 |         # The string packet contains trailing padding zeros to make it always
186 |         # a multiple of 4 in length:
187 |         paddedString = string2bytesPad4(o)
188 |         length = len(paddedString)
189 |         self._writeDataHeader(rTypeCode, length)
190 |         if DEBUG:
191 |             print('Writing string: %2d bytes: %s' %
192 |                   (length, repr(paddedString)))
193 |         self._buffer.write(paddedString)
194 | 
195 |     # ############### Arrays #########################################
196 | 
197 |     def __s_write_xt_array_tag_data(self, o):
198 |         """
199 |         Write tag data of an array, like dimension for a multi-dim array,
200 |         or other information found. Return appropriate rTypeCode.
201 |         """
202 |         xt_tag_list = []
203 |         if o.ndim > 1:
204 |             xt_tag_list.append((b'dim', numpy.array(o.shape, numpy.int32)))
205 |         if isinstance(o, TaggedArray):
206 |             xt_tag_list.append((b'names', numpy.array(o.attr)))
207 | 
208 |         attrFlag = rtypes.XT_HAS_ATTR if xt_tag_list else 0
209 |         rTypeCode = rtypes.numpyMap[o.dtype.type] | attrFlag
210 |         # write length of zero for now, will be corrected later:
211 |         self._writeDataHeader(rTypeCode, 0)
212 |         if attrFlag:
213 |             self.s_xt_tag_list(xt_tag_list)
214 |         return rTypeCode
215 | 
216 |     def __s_update_xt_array_header(self, headerPos, rTypeCode):
217 |         """
218 |         Update length information of xt array header which has been
219 |         previously temporarily set to 0 in __s_write_xt_array_tag_data()
220 |         @arg headerPos: file position where header information should be
221 |                         written.
222 |         @arg rTypeCode
223 |         """
224 |         # subtract length of data header (8 bytes), does not count to payload!
225 |         length = self._buffer.tell() - headerPos - rtypes.LARGE_DATA_HEADER_SIZE
226 |         self._buffer.seek(headerPos)
227 |         self._writeDataHeader(rTypeCode, length)
228 |         self._buffer.seek(0, os.SEEK_END)
229 | 
230 |     @fmap(*rtypes.STRING_TYPES)
231 |     def s_xt_array_single_str(self, o):
232 |         """Serialize single string object"""
233 |         arr = numpy.array([o])
234 |         self.s_xt_array_str(arr)
235 | 
236 |     @fmap(rtypes.XT_ARRAY_STR)
237 |     def s_xt_array_str(self, o):
238 |         """Serialize array of strings"""
239 |         startPos = self._buffer.tell()
240 |         rTypeCode = self.__s_write_xt_array_tag_data(o)
241 | 
242 |         # reshape into 1d array:
243 |         o1d = o.reshape(o.size, order='F')
244 |         # Byte-encode them:
245 |         bo = [byteEncode(d) for d in o1d]
246 |         # add empty string to that the following join with \0 adds an
247 |         # additional zero at the end of the last string!
248 |         bo.append(b'')
249 |         # Concatenate them as null-terminated strings:
250 |         nullTerminatedStrings = b'\0'.join(bo)
251 | 
252 |         padLength = padLen4(nullTerminatedStrings)
253 |         self._buffer.write(nullTerminatedStrings)
254 |         self._buffer.write(b'\1\1\1\1'[:padLength])
255 | 
256 |         # Update the array header:
257 |         self.__s_update_xt_array_header(startPos, rTypeCode)
258 | 
259 |     @fmap(bool, numpy.bool_)
260 |     def s_atom_to_xt_array_boolean(self, o):
261 |         """
262 |         Render single boolean items into their corresponding array
263 |         counterpart in R.
264 |         Always convert a boolean atomic value into a specialized boolean
265 |         R vector.
266 |         """
267 |         arr = numpy.array([o])
268 |         self.s_xt_array_boolean(arr)
269 | 
270 |     @fmap(rtypes.XT_ARRAY_BOOL)
271 |     def s_xt_array_boolean(self, o):
272 |         """
273 |         - o: numpy array or subclass (e.g. TaggedArray) with boolean values
274 |         Note: If o is multi-dimensional a tagged array is created. Also if o
275 |               is of type TaggedArray.
276 |         """
277 |         startPos = self._buffer.tell()
278 |         rTypeCode = self.__s_write_xt_array_tag_data(o)
279 | 
280 |         # A boolean vector starts with its number of boolean values in the
281 |         # vector (as int32):
282 |         structCode = '<'+rtypes.structMap[int]
283 |         self._buffer.write(struct.pack(structCode, o.size))
284 |         # Then write the boolean values themselves. Note that R expects binary
285 |         # array data in Fortran order, so prepare this accordingly:
286 |         data = o.tobytes(order='F')
287 |         self._buffer.write(data)
288 |         # Finally pad the binary data to be of a multiple of four in length:
289 |         self._buffer.write(padLen4(data) * b'\xff')
290 | 
291 |         # Update the array header:
292 |         self.__s_update_xt_array_header(startPos, rTypeCode)
293 | 
294 |     @fmap(int, numpy.int32, long, numpy.int64, numpy.compat.long, float, complex,
295 |           numpy.float64, numpy.complex64, numpy.complex128)
296 |     def s_atom_to_xt_array_numeric(self, o):
297 |         """
298 |         Render single numeric items into their corresponding array counterpart
299 |         in R
300 |         """
301 |         if isinstance(o, (int, long, numpy.int64, numpy.compat.long)):
302 |             if rtypes.MIN_INT32 <= o <= rtypes.MAX_INT32:
303 |                 # even though this type of data is 'long' it still fits into a
304 |                 # normal integer. Good!
305 |                 o = int(o)
306 |             else:
307 |                 raise ValueError('Cannot serialize long integers larger than '
308 |                                  'MAX_INT32 (**31-1)')
309 | 
310 |         rTypeCode = rtypes.atom2ArrMap[type(o)]
311 |         structCode = '<'+rtypes.structMap[type(o)]
312 |         length = struct.calcsize(structCode)
313 |         if type(o) is complex:
314 |             self._writeDataHeader(rTypeCode, length*2)
315 |             self._buffer.write(struct.pack(structCode, o.real))
316 |             self._buffer.write(struct.pack(structCode, o.imag))
317 |         else:
318 |             self._writeDataHeader(rTypeCode, length)
319 |             self._buffer.write(struct.pack(structCode, o))
320 | 
321 |     @fmap(rtypes.XT_ARRAY_CPLX, rtypes.XT_ARRAY_DOUBLE, rtypes.XT_ARRAY_INT)
322 |     def s_xt_array_numeric(self, o):
323 |         """
324 |         @param o: numpy array or subclass (e.g. TaggedArray)
325 |         @note: If o is multi-dimensional a tagged array is created. Also if o
326 |                is of type TaggedArray.
327 |         """
328 |         if o.dtype in (numpy.int64, numpy.compat.long):
329 |             # Note: use int instead of compat.long once Py2 is abandoned.
330 |             if rtypes.MIN_INT32 <= o.min() and o.max() <= rtypes.MAX_INT32:
331 |                 # even though this type of array is 'long' its values still
332 |                 # fit into a normal int32 array. Good!
333 |                 o = o.astype(numpy.int32)
334 |             else:
335 |                 raise ValueError('Cannot serialize long integer arrays with '
336 |                                  'values outside MAX_INT32 (2**31-1) range')
337 | 
338 |         startPos = self._buffer.tell()
339 |         rTypeCode = self.__s_write_xt_array_tag_data(o)
340 | 
341 |         # TODO: make this also work on big endian machines (data must be
342 |         #       written in little-endian!!)
343 | 
344 |         # Note: R expects binary array data in Fortran order, so prepare this
345 |         # accordingly:
346 |         self._buffer.write(o.tobytes(order='F'))
347 | 
348 |         # Update the array header:
349 |         self.__s_update_xt_array_header(startPos, rTypeCode)
350 | 
351 |     # ############## Vectors and Tag lists ####################################
352 | 
353 |     @fmap(list, TaggedList)
354 |     def s_xt_vector(self, o):
355 |         """Render all objects of given python list into generic r vector"""
356 |         startPos = self._buffer.tell()
357 |         # remember start position for calculating length in bytes of entire
358 |         # list content
359 |         attrFlag = rtypes.XT_HAS_ATTR if o.__class__ == TaggedList else 0
360 |         self._writeDataHeader(rtypes.XT_VECTOR | attrFlag, 0)
361 |         if attrFlag:
362 |             self.s_xt_tag_list([(b'names', numpy.array(o.keys))])
363 |         for v in o:
364 |             self.serializeExpr(v)
365 |         length = self._buffer.tell() - startPos
366 |         self._buffer.seek(startPos)
367 |         # now write header again with correct length information
368 |         # subtract length of list data header:
369 |         self._writeDataHeader(rtypes.XT_VECTOR | attrFlag,
370 |                               length - rtypes.LARGE_DATA_HEADER_SIZE)
371 |         self._buffer.seek(0, os.SEEK_END)
372 | 
373 |     def s_xt_tag_list(self, o):
374 |         startPos = self._buffer.tell()
375 |         self._writeDataHeader(rtypes.XT_LIST_TAG, 0)
376 |         for tag, data in o:
377 |             self.serializeExpr(data)
378 |             self.s_string_or_symbol(tag, rTypeCode=rtypes.XT_SYMNAME)
379 |         length = self._buffer.tell() - startPos
380 |         self._buffer.seek(startPos)
381 |         # now write header again with correct length information
382 |         # subtract length of list data header:
383 |         self._writeDataHeader(rtypes.XT_LIST_TAG,
384 |                               length - rtypes.LARGE_DATA_HEADER_SIZE)
385 |         self._buffer.seek(0, os.SEEK_END)
386 | 
387 |     # ##########################################################
388 |     # ### class methods for calling specific Rserv functions ###
389 | 
390 |     @classmethod
391 |     def rEval(cls, aString, fp=None, void=False):
392 |         """
393 |         Create binary code for evaluating a string expression remotely in
394 |         Rserve
395 |         """
396 |         cmd = rtypes.CMD_voidEval if void else rtypes.CMD_eval
397 |         s = cls(cmd, fp=fp)
398 |         s.serialize(aString, dtTypeCode=rtypes.DT_STRING)
399 |         return s.finalize()
400 | 
401 |     @classmethod
402 |     def rAssign(cls, varname, o, fp=None):
403 |         """
404 |         Create binary code for assigning an expression to a variable remotely
405 |         in Rserve
406 |         """
407 |         s = cls(rtypes.CMD_setSEXP, fp=fp)
408 |         s.serialize(varname, dtTypeCode=rtypes.DT_STRING)
409 |         s.serialize(o, dtTypeCode=rtypes.DT_SEXP)
410 |         return s.finalize()
411 | 
412 |     @classmethod
413 |     def rShutdown(cls, fp=None):
414 |         s = cls(rtypes.CMD_shutdown, fp=fp)
415 |         return s.finalize()
416 | 
417 |     @classmethod
418 |     def rSerializeResponse(cls, Rexp, fp=None):
419 |         # mainly used for unittesting
420 |         s = cls(rtypes.RESP_OK, fp=fp)
421 |         s.serialize(Rexp, dtTypeCode=rtypes.DT_SEXP)
422 |         return s.finalize()
423 | 
424 | 
425 | # Some shortcuts:
426 | rEval = RSerializer.rEval
427 | rAssign = RSerializer.rAssign
428 | rSerializeResponse = RSerializer.rSerializeResponse
429 | rShutdown = RSerializer.rShutdown
430 | 


--------------------------------------------------------------------------------
/pyRserve/rconn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Module providing functionality to connect to a running Rserve instance
  4 | """
  5 | import socket
  6 | import time
  7 | import pydoc
  8 | 
  9 | from . import rtypes
 10 | from .rexceptions import RConnectionRefused, REvalError, PyRserveClosed
 11 | from .rserializer import rEval, rAssign, rSerializeResponse, rShutdown
 12 | from .rparser import rparse, OOBMessage
 13 | from .misc import hexString
 14 | 
 15 | RSERVEPORT = 6311
 16 | DEBUG = False
 17 | 
 18 | 
 19 | def _defaultOOBCallback(data, code=0):  # noqa
 20 |     return None
 21 | 
 22 | 
 23 | class OOBCallback(object):
 24 |     """Sets up conn with a new callback when entering the `with` block and
 25 |     restores the old one when exiting
 26 |     """
 27 |     def __init__(self, conn, callback):
 28 |         self.conn = conn
 29 |         self.callback = callback
 30 | 
 31 |     def __enter__(self):
 32 |         self.old_callback = self.conn.oobCallback
 33 |         self.conn.oobCallback = self.callback
 34 |         return self.conn
 35 | 
 36 |     def __exit__(self, exc_type, exc_value, traceback):
 37 |         self.conn.oobCallback = self.old_callback
 38 | 
 39 | 
 40 | def connect(host='', port=RSERVEPORT, unix_socket=None, atomicArray=False, defaultVoid=False,
 41 |             oobCallback=_defaultOOBCallback):
 42 |     """Open a connection to an Rserve instance
 43 |     Params:
 44 |     - host: provide hostname where Rserve runs, or leave as empty string to
 45 |             connect to localhost
 46 |     - port: Rserve port number, defaults to 6311
 47 |     - unix_socket: Unix Socket path (use in place of (host,port))
 48 |     - atomicArray:
 49 |             If True: when a result from an Rserve call is an array with
 50 |             a single element that single element
 51 |             is returned. Otherwise the array is returned unmodified.
 52 |             Default: True
 53 |     - arrayOrder:
 54 |             The order in which data in multi-dimensional arrays is returned.
 55 |             Provide 'C' for c-order, F for fortran. Default: 'C'
 56 |     - defaultVoid:
 57 |             If True then calls to conn.r('..') don't return a result by default
 58 |     - oobCallback:
 59 |             Callback to be executed when self.oobSend/oobMessage is called from
 60 |             R. The callback receives the submitted data and a user code as
 61 |             parameters. If self.oobMessage was used, the result value of the
 62 |             callback is sent back to R.
 63 |             Default: lambda data, code=0: None (oobMessage will return NULL)
 64 |     """
 65 |     if host in (None, ''):
 66 |         # On Win32 it seems that passing an empty string as 'localhost' does
 67 |         # not work. So just to be sure provide the full local hostname if None
 68 |         # or '' were passed.
 69 |         host = 'localhost'
 70 |     assert port is not None, 'port number must be given'
 71 |     return RConnector(host, port, unix_socket, atomicArray, defaultVoid, oobCallback)
 72 | 
 73 | 
 74 | def checkIfClosed(func):
 75 |     def decoCheckIfClosed(self, *args, **kw):
 76 |         if self.isClosed:
 77 |             raise PyRserveClosed('Connection to Rserve already closed')
 78 |         try:
 79 |             return func(self, *args, **kw)
 80 |         except socket.error as msg:
 81 |             if msg.strerror in ['Connection reset by peer', 'Broken pipe']:
 82 |                 # seems like the connection to Rserve has died, so mark
 83 |                 # the connection as closed
 84 |                 self.close()
 85 |                 raise PyRserveClosed('Connection to Rserve already closed')
 86 |             else:
 87 |                 raise
 88 |     return decoCheckIfClosed
 89 | 
 90 | 
 91 | class RConnector(object):
 92 |     """Provide a network connector to an Rserve process"""
 93 |     def __init__(self, host, port, unix_socket, atomicArray, defaultVoid,
 94 |                  oobCallback=_defaultOOBCallback):
 95 |         self.sock = None
 96 |         self.__closed = True
 97 |         self.host = host
 98 |         self.port = port
 99 |         self.unix_socket = unix_socket
100 |         self.atomicArray = atomicArray
101 |         self.defaultVoid = defaultVoid
102 |         self.oobCallback = oobCallback
103 |         self.r = RNameSpace(self)
104 |         self.ref = RNameSpaceReference(self)
105 |         self.connect()
106 | 
107 |     def __repr__(self):
108 |         txt = 'Closed handle' if self.isClosed else 'Handle'
109 |         if self.unix_socket:
110 |             return '<%s to Rserve on %s>' % \
111 |                    (txt, self.unix_socket)
112 |         else:
113 |             return '<%s to Rserve on %s:%s>' % \
114 |                    (txt, self.host or 'localhost', self.port)
115 | 
116 |     @property
117 |     def isClosed(self):
118 |         return self.__closed
119 | 
120 |     def connect(self):
121 |         if self.unix_socket:
122 |             self.sock = socket.socket(socket.AF_UNIX)
123 |             try:
124 |                 self.sock.connect(self.unix_socket)
125 |             except socket.error:
126 |                 raise RConnectionRefused('Connection denied, server not reachable '
127 |                                          'or not accepting connections')            
128 |         else:
129 |             self.sock = socket.socket()
130 |             try:
131 |                 self.sock.connect((self.host, self.port))
132 |             except socket.error:
133 |                 raise RConnectionRefused('Connection denied, server not reachable '
134 |                                          'or not accepting connections')
135 |         time.sleep(0.2)
136 |         hdr = self.sock.recv(1024)
137 |         self.__closed = False
138 |         if DEBUG:
139 |             print('received hdr %s from rserve' % hdr)
140 |         # make sure we are really connected with rserv
141 |         assert hdr.startswith(b'Rsrv01'), \
142 |             'Protocol error with Rserv, obtained invalid header string'
143 |         # TODO: possibly also do version checking here to make sure we
144 |         #       understand the protocol...
145 | 
146 |     @checkIfClosed
147 |     def close(self):
148 |         """Close network connection to rserve"""
149 |         self.sock.close()
150 |         self.__closed = True
151 | 
152 |     @checkIfClosed
153 |     def shutdown(self):
154 |         rShutdown(fp=self.sock)
155 |         self.close()
156 | 
157 |     def _reval(self, aString, void):
158 |         rEval(aString, fp=self.sock, void=void)
159 | 
160 |     def _rrespond(self, aObj):
161 |         rSerializeResponse(aObj, fp=self.sock)
162 | 
163 |     @checkIfClosed
164 |     def eval(self, aString, atomicArray=None, void=False):
165 |         """
166 |         Evaluate a string expression through Rserve and return the result
167 |         transformed into python objects
168 |         """
169 |         if not type(aString in rtypes.STRING_TYPES):
170 |             raise TypeError('Only string evaluation is allowed')
171 |         self._reval(aString, void)
172 |         if DEBUG:
173 |             # Read entire data into memory en bloque, it's easier to debug
174 |             src = self._receive()
175 |             print('Raw response: %s' % hexString(src))
176 |         else:
177 |             src = self.sock
178 | 
179 |         if atomicArray is None:
180 |             # if not specified, use the global default:
181 |             atomicArray = self.atomicArray
182 | 
183 |         try:
184 |             message = rparse(src, atomicArray=atomicArray)
185 |             # Before the result is returned, 0-∞ OOB messages may be sent
186 |             while isinstance(message, OOBMessage):
187 |                 if DEBUG:
188 |                     print('OOB Message received:', message)
189 |                 ret = self.oobCallback(message.data, message.userCode)
190 |                 if message.type == rtypes.OOB_MSG:
191 |                     self._rrespond(ret)
192 | 
193 |                 if isinstance(src, (str, bytes)):
194 |                     # This is no stream, so we have to cut off data
195 |                     src = src[len(message):]
196 | 
197 |                 message = rparse(src, atomicArray=atomicArray)
198 |             return message
199 |         except REvalError:
200 |             # R has reported an evaluation error, so let's obtain a descriptive
201 |             # explanation about why the error has occurred. R allows to
202 |             # retrieve the error message of the last exception via a built-in
203 |             # function called 'geterrmessage()'.
204 |             errorMsg = self.eval('geterrmessage()').strip()
205 |             raise REvalError(errorMsg)
206 | 
207 |     @checkIfClosed
208 |     def voidEval(self, aString):
209 |         """
210 |         Evaluate a string expression through Rserve without returning
211 |         any result data
212 |         """
213 |         self.eval(aString, void=True)
214 | 
215 |     @checkIfClosed
216 |     def _receive(self):
217 |         """Receive the result from a previous call to rserve."""
218 |         raw = self.sock.recv(rtypes.SOCKET_BLOCK_SIZE)
219 |         d = [raw]
220 |         while len(raw) == rtypes.SOCKET_BLOCK_SIZE:
221 |             raw = self.sock.recv(rtypes.SOCKET_BLOCK_SIZE)
222 |             d.append(raw)
223 |         return ''.join(d)
224 | 
225 | #    @checkIfClosed
226 | #    def _raw(self, *args, **kw):
227 | #        self.send(*args)
228 | #        return self.receive()
229 | 
230 |     @checkIfClosed
231 |     def setRexp(self, name, o):
232 |         """
233 |         Convert a python object into an RExp and bind it to a variable
234 |         called "name" in the R namespace
235 |         """
236 |         rAssign(name, o, self.sock)
237 |         # Rserv sends an emtpy confirmation message, or error message in case
238 |         # of an error. rparse() will raise an Exception in the latter case.
239 |         rparse(self.sock, atomicArray=self.atomicArray)
240 | 
241 |     @checkIfClosed
242 |     def getRexp(self, name):
243 |         """Retrieve a Rexp stored in a variable called 'name'"""
244 |         return self.eval(name)
245 | 
246 |     @checkIfClosed
247 |     def callFunc(self, name, *args, **kw):
248 |         """
249 |         @brief  make a call to a function "name" through Rserve
250 |         @detail positional and keyword arguments are first stored as local
251 |                 variables in the R namespace and then delivered to the
252 |                 function.
253 |         @result Whatever the result of the called function is.
254 |         """
255 |         if name == 'rm':
256 |             # SPECIAL HANDLING FOR "rm()":
257 |             # Calling "rm" with real values instead of reference to values
258 |             # works, however it doesn't produce the desired effect (it only
259 |             # removes temporaily created variables). To avoid confusion for
260 |             # the users a check is applied here to make sure that "args" only
261 |             # contains variable or function references (proxies) and NOT
262 |             # values!
263 |             assert [x for x in args if not isinstance(x, RBaseProxy)] == (),\
264 |                 'Only references to variables or functions allowed for "rm()"'
265 | 
266 |         argNames = []
267 |         for idx, arg in enumerate(args):
268 |             if isinstance(arg, RBaseProxy):
269 |                 argName = arg.__name__
270 |             else:
271 |                 # a real python value is passed. Set a value of an artificial
272 |                 # variable on the R side, memorize its name for making the
273 |                 # actual call to the function below
274 |                 argName = 'arg_%d_' % idx
275 |                 self.setRexp(argName, arg)
276 |             argNames.append(argName)
277 |         for key, value in kw.items():
278 |             if isinstance(value, RBaseProxy):
279 |                 argName = value.__name__
280 |             else:
281 |                 argName = 'kwarg_%s_' % key
282 |                 self.setRexp(argName, value)
283 |             argNames.append('%s=%s' % (key, argName))
284 |         return self.eval(name+'(%s)' % ', '.join(argNames))
285 | 
286 |     @checkIfClosed
287 |     def assign(self, aDict):
288 |         """Assign all items of the dictionary to the default R namespace"""
289 |         for k, v in aDict.items():
290 |             self.setRexp(k, v)
291 | 
292 |     @checkIfClosed
293 |     def isFunction(self, name):
294 |         """Check whether given name references an existing function in R"""
295 |         return self.eval('is.function(%s)' % name)
296 | 
297 | 
298 | class RNameSpace(object):
299 |     """
300 |     An instance of this class serves as access point to the default namesspace
301 |     of an Rserve connection
302 |     """
303 |     def __init__(self, rconn):
304 |         self.__dict__['_rconn'] = rconn
305 | 
306 |     def __setattr__(self, name, o):
307 |         """Assign an rExpr to a variable called 'name'"""
308 |         self._rconn.setRexp(name, o)
309 | 
310 |     def __getattr__(self, name):
311 |         """
312 |         Retrieve either Rexp stored in a variable called "name" or make call
313 |         to function called 'name'
314 |         """
315 |         realname = name[1:] if name.startswith('_') else name
316 |         try:
317 |             isFunction = self._rconn.isFunction(realname)
318 |         except Exception:
319 |             # an error is only raised if neither such a function or variable
320 |             # exists at all!
321 |             raise NameError('no such variable or function "%s" '
322 |                             'defined in Rserve' % realname)
323 |         if isFunction:
324 |             return RFuncProxy(realname, self._rconn)
325 |         elif name.startswith('_'):
326 |             return RVarProxy(realname, self._rconn)
327 |         else:
328 |             return self._rconn.getRexp(name)
329 | 
330 |     def __call__(self, aString, atomicArray=None, void=None):
331 |         if void is None:
332 |             void = self._rconn.defaultVoid
333 |         return self._rconn.eval(aString, atomicArray=atomicArray, void=void)
334 | 
335 | 
336 | class RNameSpaceReference(object):
337 |     """
338 |     Provide reference to R objects (a proxy), NOT directly to their values
339 |     """
340 |     def __init__(self, rconn):
341 |         self.__dict__['_rconn'] = rconn
342 | 
343 |     def __getattr__(self, name):
344 |         """Return either a reference proxy to a variable to to a function"""
345 |         try:
346 |             isFunction = self._rconn.isFunction(name)
347 |         except Exception:
348 |             # an error is only raised if neither such a function or variable
349 |             # exists at all!
350 |             raise NameError('no such variable or function "%s" '
351 |                             'defined in Rserve' % name)
352 |         if isFunction:
353 |             return RFuncProxy(name, self._rconn)
354 |         else:
355 |             return RVarProxy(name, self._rconn)
356 | 
357 | 
358 | class RBaseProxy(object):
359 |     """
360 |     Proxy for a reference to a variable or function in R.
361 |     Do not use this directly, only its subclasses.
362 |     """
363 |     def __init__(self, name, rconn):
364 |         self.__name__ = name
365 |         self._rconn = rconn
366 | 
367 | 
368 | class RVarProxy(RBaseProxy):
369 |     """Proxy for a reference to a variable in R"""
370 |     def __repr__(self):
371 |         return '<RVarProxy to variable "%s">' % self.__name__
372 | 
373 |     def value(self):
374 |         return self._rconn.getRexp(self.__name__)
375 | 
376 | 
377 | class RFuncProxy(RBaseProxy):
378 |     """Proxy for function calls to Rserve"""
379 |     def __repr__(self):
380 |         return '<RFuncProxy to function "%s">' % self.__name__
381 | 
382 |     def __call__(self, *args, **kw):
383 |         return self._rconn.callFunc(self.__name__, *args, **kw)
384 | 
385 |     # command to send to R in order to get the help for a function in text
386 |     # format:
387 |     R_HELP = "capture.output(tools:::Rd2txt(utils:::.getHelpFile(help(%s))))"
388 | 
389 |     @property
390 |     def __doc__(self):
391 |         """
392 |         There are different ways to get the help message from R:
393 |         # The the package db file:
394 |         pkgRdDB = tools:::fetchRdDB(file.path(find.package('base'),
395 |                                               'help', 'base'))
396 |         # show all available topics in the help package:
397 |         names(pkgRdDB)
398 |         # convert the 'lapply' help message to text (from the base package):
399 |         tools::Rd2txt(pkgRdDB[['lapply']])
400 |         # capture this output into a variable:
401 |         a <- capture.output(tools::Rd2txt(pkgRdDB[['lapply']]))
402 |         Disadvantage: One needs to know the package beforehand.
403 | 
404 |         Better:
405 |         Everything in one line and better (doesn't need to know the pkg):
406 |         a <- capture.output(tools:::Rd2txt(utils:::.getHelpFile(help(sapply))))
407 |         """
408 |         try:
409 |             d = self._rconn.eval(self.R_HELP % self.__name__)
410 |         except REvalError:
411 |             # probably no help available, unfortunately there is no specific
412 |             # code for this...
413 |             return None
414 |         # Join the list of strings:
415 |         helpstring = '\n'.join(d)
416 |         # remove some obscure characters:
417 |         # helpstring = helpstring.replace('_\x08', '')
418 |         return helpstring
419 | 
420 |     def help(self):
421 |         """Directly page the help message to the terminal (e.g. via less)"""
422 |         pydoc.pager(self.__doc__)
423 | 
424 |     def __getattr__(self, name):
425 |         """Allow for nested name space calls, e.g. 't.test' """
426 |         if name == '__name__':
427 |             # this is useful for py.test which does some code inspection
428 |             # during runtime
429 |             return self.__name__
430 | 
431 |         concatName = "%s.%s" % (self.__name__, name)
432 |         try:
433 |             self._rconn.isFunction(concatName)
434 |         except Exception:
435 |             # an error is only raised if neither such a function or variable
436 |             # exists at all!
437 |             raise NameError('no such variable or function "%s" '
438 |                             'defined in R' % concatName)
439 |         return RFuncProxy(concatName, self._rconn)
440 | 
441 | 
442 | def _test_main():
443 |     import os
444 |     import readline
445 |     import atexit
446 |     # Setup history and readline facility for remote q:
447 |     histfile = os.path.join(os.environ['HOME'], '.pyhistory')
448 |     try:
449 |         readline.read_history_file(histfile)
450 |     except IOError:
451 |         pass
452 |     atexit.register(readline.write_history_file, histfile)
453 | 
454 |     conn = connect()
455 |     print('"conn" is your handle to rserve. Type e.g. "conn(\'1\')" '
456 |           'for string evaluation.')
457 |     # r('x<-1:20; y<-x*2; lm(y~x)')
458 |     sc = open('../testData/test-script.R').read()
459 |     v = conn.r(sc)
460 |     open('r-test-png.png', 'w').write(v[3])
461 |     conn.r.v = 'abc'
462 |     conn.r('func0 <- function() { 3 }')
463 |     conn.r('func1 <- function(a1) { a1 }')
464 |     conn.r('func2 <- function(a1, a2) { list(a1, a2) }')
465 |     conn.r('funcKW <- function(a1=1, a2=4) { list(a1, a2) }')
466 |     conn.r('squared<-function(t) t^2')
467 | 
468 | 
469 | if __name__ == '__main__':
470 |     _test_main()
471 | 


--------------------------------------------------------------------------------
/testing/test_rparser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Unittesting module for rparser
  4 | """
  5 | import datetime
  6 | ###
  7 | import numpy
  8 | import pytest
  9 | ###
 10 | from pyRserve import rtypes, rserializer, rparser
 11 | from pyRserve.rconn import RVarProxy, OOBCallback
 12 | from pyRserve.misc import PY3
 13 | from pyRserve.rexceptions import REvalError
 14 | from pyRserve.taggedContainers import TaggedList, TaggedArray
 15 | ###
 16 | from .testtools import compareArrays
 17 | 
 18 | 
 19 | # ### Test string evaluations in R
 20 | 
 21 | def test_eval_strings(conn):
 22 |     """
 23 |     Test plain string, byte-strings, unicodes (depending on Python version)
 24 |     """
 25 |     assert conn.r("''") == ''
 26 |     assert conn.r("'abc'") == 'abc'
 27 | 
 28 |     # make sure also byte-strings are handled successfully.
 29 |     # Makes no difference in PY2, but in PY3 it does:
 30 |     assert conn.r(b"'abc'") == 'abc'
 31 | 
 32 |     # test via call to ident function with single argument:
 33 |     assert conn.r.ident('abc') == 'abc'
 34 | 
 35 |     try:
 36 |         # make sure also unicode strings are handled successfully in Python2.x
 37 |         # Since u'abc' would raise a SyntaxError when this module is loaded
 38 |         # in Py3 < 3.3 we have to create the unicode string via eval at
 39 |         # runtime:
 40 |         unicode_str = eval("""u'"abc"'""")
 41 |     except SyntaxError:
 42 |         # outdated PY3 version, so just skip the rest
 43 |         return
 44 | 
 45 |     assert conn.r(unicode_str) == 'abc'
 46 | 
 47 |     # test via call to ident function with single argument:
 48 |     assert conn.r.ident(eval("u'abc'")) == 'abc'
 49 | 
 50 | 
 51 | def test_eval_string_arrays(conn):
 52 |     """Test for string arrays"""
 53 |     assert compareArrays(conn.r("'abc'", atomicArray=True),
 54 |                          numpy.array(['abc']))
 55 |     assert compareArrays(conn.r("c('abc', 'def')"),
 56 |                          numpy.array(['abc', 'def']))
 57 |     assert compareArrays(conn.r("c('abc', NA, 'def')"),
 58 |                          numpy.array(['abc', None, 'def']))
 59 | 
 60 |     # test via call to ident function with single argument:
 61 |     assert compareArrays(conn.r.ident(numpy.array(['abc', 'def'])),
 62 |                          numpy.array(['abc', 'def']))
 63 | 
 64 | 
 65 | def test_eval_unicode_arrays(conn):
 66 |     """
 67 |     Test for unicode arrays. The ident function should return the
 68 |     same array, just not as unicode
 69 |     """
 70 |     try:
 71 |         u1 = eval("u'abc'")
 72 |         u2 = eval("u'def'")
 73 |     except SyntaxError:
 74 |         # Python 3 below 3.3 does not accept the u'' operator,
 75 |         # just skip this test!
 76 |         return
 77 | 
 78 |     # test via call to ident function with single argument:
 79 |     assert conn.r.ident(numpy.array([u1])) == 'abc'
 80 |     assert compareArrays(conn.r.ident(numpy.array([u1, u2])),
 81 |                          numpy.array(['abc', 'def']))
 82 | 
 83 | 
 84 | # ### Test integers
 85 | 
 86 | def test_eval_integers(conn):
 87 |     """
 88 |     Test different types and sizes of integers.
 89 |     Note that R converts all integers into floats
 90 |     """
 91 |     res = conn.r("0")
 92 |     assert res == 0.0
 93 |     assert type(res) is float
 94 | 
 95 |     assert conn.r("1") == 1.0
 96 | 
 97 |     # ### Create real integers in R:
 98 |     res = conn.r('as.integer(c(1))')
 99 |     assert res == 1
100 |     assert type(res) == int
101 | 
102 |     # test via call to ident function with single argument:
103 |     assert conn.r.ident(5) == 5
104 | 
105 | 
106 | def test_eval_long(conn):
107 |     """
108 |     Test long integers. Going beyond MAX_INT32 works with eval() because
109 |     in R all integers are converted to floats right away. However sending a
110 |     long as functional parameter should raise a NotImplementedError if
111 |     its value is outside the normal integer range (i.e. MAX_INT32).
112 |     """
113 |     assert conn.r("%d" % rtypes.MAX_INT32) == rtypes.MAX_INT32
114 |     # Next test for long integers, handled as floats in R via eval():
115 |     assert conn.r("%d" % (rtypes.MAX_INT32*2)) == rtypes.MAX_INT32*2
116 | 
117 |     # The syntax like 234L only exists in Python2! So use long in Py2. I
118 |     # n Python3 everything is of type <int>
119 |     # Send a long value which is still within below the rtypes.MAX_INT32.
120 |     # It it automatically converted to a normal int in the rserializer and
121 |     # hence should work fine:
122 |     toLong = int if PY3 else long  # noqa    No 'long' function in PY3
123 |     assert conn.r.ident(toLong(123))
124 | 
125 |     # Here comes the problem - there is no native 64bit integer on the R side,
126 |     # so this should raise a ValueError
127 |     pytest.raises(ValueError, conn.r.ident, rtypes.MAX_INT32*2)
128 | 
129 | 
130 | def test_eval_integer_arrays(conn):
131 |     """
132 |     Test integer arrays. The result from R is actually always a numpy
133 |     float array
134 |     """
135 |     assert compareArrays(conn.r("266", atomicArray=True), numpy.array([266]))
136 |     assert compareArrays(conn.r("c(55, -35)"), numpy.array([55.0, -35.0]))
137 |     res = conn.r("c(55, -35)")
138 |     assert isinstance(res, numpy.ndarray)
139 |     assert res.dtype == float
140 | 
141 |     # ### Create real integer arrays in R:
142 |     res = conn.r('as.integer(c(1, 5))')
143 |     assert compareArrays(res, numpy.array([1, 5]))
144 |     assert res.dtype in (int, numpy.int32)
145 | 
146 |     # test via call to ident function with single argument:
147 |     assert compareArrays(conn.r.ident(numpy.array([1, 5])),
148 |                          numpy.array([1, 5]))
149 | 
150 | 
151 | def test_eval_long_arrays(conn):
152 |     """
153 |     Test calling with a long array where all values are smaller than
154 |     MAX_INT32. Such an array is internally handled as a 32bit integer array
155 |     and hence should work.
156 |     """
157 |     toLong = int if PY3 else long    # noqa   No 'long' function in PY3
158 |     # arr64 = numpy.array([rtypes.MIN_INT32, toLong(5)], dtype=numpy.int64)
159 |     # assert compareArrays(conn.r.ident(arr64), arr64)
160 | 
161 |     # Here again comes the problem: a int64 array with values beyong
162 |     # rtypes.MAX_INT32. This should raise a ValueError:
163 |     arr64big = numpy.array([toLong(-rtypes.MAX_INT32 * 2), toLong(5)],
164 |                            dtype=numpy.int64)
165 |     pytest.raises(ValueError, conn.r.ident, arr64big)
166 | 
167 | 
168 | # ### Test floats
169 | 
170 | def test_eval_floats(conn):
171 |     """Test different types and sizes of floats"""
172 |     res = conn.r("0.0")
173 |     assert res == 0.0
174 |     assert type(res) is float
175 | 
176 |     assert conn.r("1.0") == 1.0
177 |     assert conn.r("c(1.0)") == 1.0
178 |     assert conn.r("-746586.56") == -746586.56
179 | 
180 |     # test via call to ident function with single argument:
181 |     assert conn.r.ident(5.5) == 5.5
182 | 
183 | 
184 | def test_eval_float_arrays(conn):
185 |     """Test float arrays"""
186 |     assert compareArrays(conn.r("266.5", atomicArray=True),
187 |                          numpy.array([266.5]))
188 |     assert compareArrays(conn.r("c(55.2, -35.7)"), numpy.array([55.2, -35.7]))
189 |     res = conn.r("c(55.5, -35.5)")
190 |     assert isinstance(res, numpy.ndarray)
191 |     assert res.dtype == float
192 | 
193 |     # test via call to ident function with single argument:
194 |     assert compareArrays(conn.r.ident(numpy.array([1.7, 5.6])),
195 |                          numpy.array([1.7, 5.6]))
196 | 
197 | 
198 | # ### Test complex numbers
199 | 
200 | def test_eval_complex(conn):
201 |     """Test different types and sizes of complex numbers"""
202 |     res = conn.r("complex(real = 0, imaginary = 0)")
203 |     assert res == (0+0j)
204 |     assert type(res) is complex
205 | 
206 |     assert conn.r("complex(real = 5.5, imaginary = -3.3)") == 5.5-3.3j
207 | 
208 |     # test via call to ident function with single argument:
209 |     assert conn.r.ident(5.5-3.3j) == 5.5-3.3j
210 | 
211 | 
212 | def test_eval_complex_arrays(conn):
213 |     """Test complex number arrays"""
214 |     res = conn.r("complex(real = 5.5, imaginary = 6.6)", atomicArray=True)
215 |     assert compareArrays(res, numpy.array([(5.5+6.6j)]))
216 |     assert isinstance(res, numpy.ndarray)
217 |     assert res.dtype == complex
218 | 
219 |     # test via call to ident function with single argument:
220 |     arr = numpy.array([(5.5+6.6j), (-3.0-6j)])
221 |     assert compareArrays(conn.r.ident(arr), arr)
222 | 
223 | 
224 | # ### Test boolean values
225 | 
226 | def test_eval_bool(conn):
227 |     """Test boolean values"""
228 |     res = conn.r('TRUE')
229 |     assert res is True
230 |     assert type(res) == bool
231 |     assert conn.r('FALSE') is False
232 | 
233 |     # test via call to ident function with single argument:
234 |     assert conn.r.ident(True) is True
235 | 
236 | 
237 | def test_eval_bool_arrays(conn):
238 |     """Test boolean arrays"""
239 |     res = conn.r('TRUE', atomicArray=True)
240 |     assert compareArrays(res, numpy.array([True]))
241 |     assert res.dtype == bool
242 |     assert compareArrays(conn.r('c(TRUE, FALSE)'), numpy.array([True, False]))
243 |     assert compareArrays(conn.r('c(TRUE, NA, FALSE)'), numpy.array([True, None, False]))
244 | 
245 |     # test via call to ident function with single argument:
246 |     assert compareArrays(conn.r.ident(numpy.array([True, False, False])),
247 |                          numpy.array([True, False, False]))
248 | 
249 | 
250 | def test_empty_boolean_array(conn):
251 |     """Check that zero-length boolean ('logical') array is returned fine"""
252 |     conn.r('empty_bool_arr = as.logical(c())')
253 |     assert compareArrays(conn.r.empty_bool_arr, numpy.array([], dtype=bool))
254 | 
255 | 
256 | # ### Test null value
257 | 
258 | def test_null_value(conn):
259 |     """Test NULL value, which is None in Python"""
260 |     assert conn.r('NULL') is None
261 |     assert conn.r.ident(None) is None
262 | 
263 | 
264 | # ### Test large data objects
265 | 
266 | def test_large_objects(conn):
267 |     """Test that data objects larger than 2**24 bytes are supported
268 |     Sent array back and forth btw Python and R before comparing them.
269 |     """
270 |     # make an integer (int32) array a little bit larger than 2**24
271 |     arr = numpy.arange(2**24 / 4 + 100, dtype=numpy.int32)
272 |     conn.r.largearr = arr
273 |     compareArrays(arr, conn.r.largearr)
274 | 
275 | 
276 | # ### Test list function
277 | 
278 | def test_lists(conn):
279 |     """Test lists which directtly translate into Python lists"""
280 |     assert conn.r('list()') == []
281 |     # with strings
282 |     assert conn.r('list("otto")') == ['otto']
283 |     assert conn.r('list("otto", "amma")') == ['otto', 'amma']
284 |     # with numbers, same type and mixed
285 |     assert conn.r('list(1)') == [1]
286 |     assert conn.r('list(1, 5)') == [1, 5]
287 |     assert conn.r('list(1, complex(real = 5.5, imaginary = -3.3))') == \
288 |         [1, 5.5-3.3j]
289 | 
290 |     # make a Python-style call to the list-function:
291 |     assert conn.r.list(1, 2, 5) == [1, 2, 5]
292 | 
293 |     # test via call to ident function with single argument:
294 |     assert conn.r.ident([1, 2, 5]) == [1, 2, 5]
295 | 
296 | 
297 | def test_tagged_lists(conn):
298 |     """
299 |     Tests 'tagged' lists, i.e. lists which allow to address their items via
300 |     name, not only via index.
301 |     Those R lists are translated into 'TaggedList'-objects in Python.
302 |     """
303 |     res = conn.r('list(husband="otto")')
304 |     assert res == TaggedList([("husband", "otto")])
305 |     # a mixed list, where the 2nd item has no tag:
306 | 
307 |     exp_res = TaggedList([("n", "Fred"), ("v", 2.0),
308 |                           ("c_ages", numpy.array([1.0, 2.0]))])
309 |     res = conn.r('list(n="Fred", v=2, c_ages=c(1, 2))')
310 |     # do string comparison because of complex nested data!
311 |     assert repr(res) == repr(exp_res)
312 | 
313 |     # test via call to ident function with single argument:
314 |     # do string comparison because of complex nested data!
315 |     assert repr(conn.r.ident(exp_res)) == repr(exp_res)
316 | 
317 |     # NOTE: The following fails in the rserializer because of the missing tag
318 |     # of the 2nd element:  <<<<--------- TODO!!
319 |     # conn.r.ident(TaggedList([("n","Fred"), 2.0, ("c_ages", 5.5)])
320 | 
321 | 
322 | def test_vector_expression(conn):
323 |     """
324 |     Tests for typecode 0x1a XT_VECTOR_EXP - returns the expression content
325 |     as python list
326 |     """
327 |     # first empty expression
328 |     res = conn.r('expression()')
329 |     assert res == []
330 | 
331 |     # second expression with content
332 |     res = conn.r('expression("1+1")')
333 |     assert res == ['1+1']
334 | 
335 | 
336 | # ### Test more numpy arrays
337 | # ### Many have been test above, but generally only 1-d arrays. Let's look at
338 | # ### arrays with higher dimensions
339 | 
340 | def test_2d_arrays_created_in_python(conn):
341 |     """
342 |     Check that transferring various arrays to R preserves columns, rows,
343 |     and shape.
344 |     """
345 |     bools = [True, False, True, True]
346 |     strings = ['abc', 'def', 'ghi', 'jkl']
347 |     arrays = [
348 |         # next is same as: numpy.array([[1,2,3], [4,5,6]])
349 |         numpy.arange(6).reshape((2, 3), order='C'),
350 |         # next is same as: numpy.array([[1,3,5], [2,4,6]])
351 |         numpy.arange(6).reshape((2, 3), order='F'),
352 |         # next is same as: numpy.array([[True, False], [True, True]])
353 |         numpy.array(bools).reshape((2, 2), order='C'),
354 |         # next is same as: numpy.array([[True, True], [False, True]])
355 |         numpy.array(bools).reshape((2, 2), order='F'),
356 |         numpy.array(strings).reshape((2, 2), order='C'),
357 |         numpy.array(strings).reshape((2, 2), order='F'),
358 |     ]
359 | 
360 |     for arr in arrays:
361 |         res = conn.r.ident(arr)
362 |         assert res.shape == arr.shape
363 |         assert compareArrays(res, arr)
364 | 
365 |         # assign array within R namespace and check some cols and rows:
366 |         conn.r.arr = arr
367 |         # check that 2nd row (last row) is equal:
368 |         assert compareArrays(arr[1], conn.r('arr[2,]'))
369 |         # check that 2nd column (middle col) is equal:
370 |         assert compareArrays(arr[:, 1], conn.r('arr[,2]'))
371 | 
372 | 
373 | def test_2d_numeric_array_created_in_R(conn):
374 |     """
375 |     Create an array in R, transfer it to python, and check that columns,
376 |     rows, and shape are preserved.
377 |     Note: Arrays in R are always in Fortran order, i.e. first index moves
378 |     fastest.
379 | 
380 |     The array in R looks like:
381 |          [,1] [,2] [,3]
382 |     [1,]    1    3    5
383 |     [2,]    2    4    6
384 |     """
385 |     arr = conn.r('arr = array(1:6, dim=c(2, 3))')
386 |     assert compareArrays(conn.r.arr, arr)
387 | 
388 |     # check that 2nd row (last row) is equal:
389 |     assert len(arr[1]) == len(conn.r('arr[2,]')) == 3
390 |     assert compareArrays(arr[1], conn.r('arr[2,]'))
391 | 
392 |     # check that 2nd column (middle col) is equal:
393 |     assert len(arr[:, 1]) == len(conn.r('arr[,2]')) == 2
394 |     assert compareArrays(arr[:, 1], conn.r('arr[,2]'))
395 | 
396 | 
397 | def test_tagged_array(conn):
398 |     res = conn.r('c(a=1.,b=2.,c=3.)')
399 |     exp_res = TaggedArray.new(numpy.array([1., 2., 3.]), ['a', 'b', 'c'])
400 |     assert compareArrays(res, exp_res)
401 |     assert res.keys() == exp_res.keys()  # compare the tags of both arrays
402 | 
403 | 
404 | def test_very_large_result_array(conn):
405 |     """Check that a SEXP with XT_LARGE set in header is properly parsed """
406 |     res = conn.r('c(1:9999999)')
407 |     assert res.size == 9999999
408 | 
409 | 
410 | def test_eval_void(conn):
411 |     """
412 |     Check that conn.voidEval() does not return any result in contrast to
413 |     conn.eval()
414 |     """
415 |     assert conn.r('a=1') == 1.0
416 |     assert conn.eval('a=1') == 1.0
417 |     assert conn.voidEval('a=1') is None
418 |     assert conn.eval('a=1', void=True) is None
419 |     assert conn.r('a=1', void=True) is None
420 | 
421 | 
422 | # ### Test evaluation of some R functions
423 | 
424 | def test_eval_sequence(conn):
425 |     # first string evaluate of R expression:
426 |     res = conn.r('seq(1, 5)')
427 |     assert compareArrays(res, numpy.array(range(1, 6)))
428 |     assert res.dtype == numpy.int32
429 | 
430 |     # now make Python-style call to the R function:
431 |     assert compareArrays(conn.r.seq(1, 5), numpy.array(range(1, 6)))
432 | 
433 | 
434 | def test_eval_polyroot(conn):
435 |     # first string evaluate of R expression:
436 |     res = conn.r('polyroot(c(-39.141,151.469,401.045))')
437 |     exp_res = numpy.array([0.1762039 + 1.26217745e-29j,
438 |                            -0.5538897 - 1.26217745e-29j])
439 |     assert compareArrays(res, exp_res)
440 | 
441 |     # now make Python-style call to the R function:
442 |     assert compareArrays(conn.r.polyroot(conn.r.c(-39.141, 151.469, 401.045)),
443 |                          exp_res)
444 | 
445 | 
446 | def test_eval_very_convoluted_function_result(conn):
447 |     """
448 |     The result of this call is a highly nested data structure.
449 |     Have fun on evaluation it!
450 |     """
451 |     res = conn.r('x<-1:20; y<-x*2; lm(y~x)')
452 |     assert res.__class__ == TaggedList
453 |     # check which tags the TaggedList has:
454 |     assert res.keys == ['coefficients', 'residuals', 'effects', 'rank',
455 |                         'fitted.values', 'assign', 'qr', 'df.residual',
456 |                         'xlevels', 'call', 'terms', 'model']
457 |     assert compareArrays(res['coefficients'],
458 |                          TaggedArray.new(numpy.array([-0.,  2.]),
459 |                                          ['(Intercept)', 'x']))
460 |     # ... many more tags could be tested here ...
461 | 
462 | 
463 | def test_s4(conn):
464 |     """
465 |     S4 classes behave like dicts but usually have a 'class' attribute.
466 |     """
467 |     res = conn.r('''
468 |         track <- setClass("track",
469 |                           slots = c(x="numeric", y="NULL"))
470 |         track(x = 1:10, y = NULL)
471 |     ''')
472 |     assert isinstance(res, rparser.S4)
473 |     assert res.classes == ['track']
474 |     assert set(res.keys()) == {'x', 'y'}
475 |     assert compareArrays(res['x'], numpy.arange(1, 11))
476 |     assert res['y'] is None
477 |     assert "<S4 classes=['track'] {" in repr(res)
478 | 
479 | 
480 | def test_s4_empty(conn):
481 |     """
482 |     Rare but possible: S4 classes without attributes.
483 |     """
484 |     res = conn.r('''
485 |         empty <- setClass("empty", slots = c(dummy = 'NULL'))
486 |         e <- empty()
487 |         class(e) <- NULL
488 |         attributes(e) <- NULL
489 |         e
490 |     ''')
491 |     assert isinstance(res, rparser.S4)
492 |     assert res.classes == []
493 | 
494 | 
495 | def test_sapply_with_func_proxy_argument(conn):
496 |     """
497 |     Test calling sapply providing a proxy object to a R function as argument
498 |     """
499 |     res = conn.r.sapply(-5, conn.r.abs)
500 |     assert res == 5
501 | 
502 | 
503 | # ### Some more tests
504 | 
505 | def test_rAssign_method(conn):
506 |     """test "rAssign" class method of RSerializer"""
507 |     hexd = b'\x20\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
508 |            b'\x00\x44\x04\x00\x00\x00\x00\x00\x00\x76\x00\x00\x00\x4a\x0c' \
509 |            b'\x00\x00\x00\x00\x00\x00\x60\x04\x00\x00\x00\x00\x00\x00\x01' \
510 |            b'\x00\x00\x00'
511 |     assert rserializer.rAssign('v', 1) == hexd
512 | 
513 |     # now assign a value via the connector:
514 |     conn.r.aaa = 'a123'
515 |     assert conn.r.aaa == 'a123'
516 | 
517 | 
518 | def test_rEval_method():
519 |     """test "rEval" method"""
520 |     hexd = b'\x03\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'\
521 |            b'\x44\x04\x00\x00\x00\x00\x00\x00\x61\x3d\x31\x00'
522 |     assert rserializer.rEval('a=1') == hexd
523 | 
524 | 
525 | def test_serialize_DT_INT():
526 |     hexd = b'\x03\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'\
527 |            b'\x41\x04\x00\x00\x00\x00\x00\x007\x00\x00\x00'
528 |     s = rserializer.RSerializer(rtypes.CMD_eval)
529 |     s.serialize(55, dtTypeCode=rtypes.DT_INT)
530 |     res = s.finalize()
531 |     assert hexd == res
532 | 
533 | 
534 | def test_serialize_unsupported_object_raises_exception(conn):
535 |     # datetime objects are not yet supported, so an exception can be expected
536 |     pytest.raises(NotImplementedError, conn.r.ident, datetime.date.today())
537 | 
538 | 
539 | def test_eval_illegal_variable_lookup(conn):
540 |     """
541 |     Calling an invalid variable lookup should result in a proper exception.
542 |     Also the connector should be still usable afterwards.
543 |     """
544 |     try:
545 |         conn.r('x')
546 |     except REvalError as msg:
547 |         assert str(msg) == "Error: object 'x' not found"
548 |     # check that the connection still works:
549 |     assert conn.r('1') == 1
550 | 
551 | 
552 | def test_eval_illegal_R_statement(conn):
553 |     """
554 |     Calling an R statement lookup should result in a proper exception.
555 |     Also the connector should be still usable afterwards.
556 |     """
557 |     try:
558 |         conn.r('x-%r\0/455')
559 |     except REvalError:
560 |         pass
561 |     # check that the connection still works:
562 |     assert conn.r('1') == 1
563 | 
564 | 
565 | #######################
566 | # some more tests
567 | 
568 | def test_rvarproxy(conn):
569 |     """A var proxy is accessed via conn.ref"""
570 |     conn.r.a = [1, 2, 3]
571 |     assert conn.ref.a.__class__ == RVarProxy
572 |     assert conn.ref.a.value() == [1, 2, 3]
573 | 
574 | 
575 | def test_oob_send(conn):
576 |     """Tests OOB without registering a callback"""
577 |     assert conn.r('self.oobSend("foo")') is True
578 | 
579 | 
580 | def test_oob_message(conn):
581 |     """Tests OOB Message. Should not lock up, and without callbacks,
582 |     NULL should be sent back to R. (None → NULL)
583 |     """
584 |     assert conn.r('stopifnot(self.oobMessage("foo") == NULL)') is None
585 | 
586 | 
587 | def test_oob_callback(conn):
588 |     """Tests OOB with one registered callback"""
589 |     collect = []
590 | 
591 |     def collectMSG(data, code=0):
592 |         collect.append((code, data))
593 | 
594 |     with OOBCallback(conn, collectMSG):
595 |         conn.r('self.oobSend(1)')
596 |         conn.r('self.oobMessage(2, code=10L)')
597 | 
598 |         assert collect == [(0, 1), (10, 2)]
599 | 
600 | 
601 | def test_oob_callback_result(conn):
602 |     """Tests OOB with a registered callback returning a one"""
603 |     with OOBCallback(conn, lambda data, code=0: 1):
604 |         assert conn.r('stopifnot(self.oobMessage(NULL) == 1L)') is None
605 | 
606 | 
607 | def test_help_message(conn):
608 |     """Check that a help message is properly delivered from R for a function"""
609 |     help_msg = conn.r.sapply.__doc__
610 |     assert help_msg is not None
611 |     # remove the extra underscore formatting characters from the help message:
612 |     help_msg = help_msg.replace('_\x08', '')
613 |     assert help_msg.startswith('Apply a Function over a List or Vector')
614 | 


--------------------------------------------------------------------------------
/pyRserve/rparser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Parser module for pyRserve
  4 | """
  5 | import io
  6 | import struct
  7 | import socket
  8 | 
  9 | import numpy
 10 | 
 11 | from .rtypes import (
 12 |     CMD_OOB, CMD_RESP, DT_SEXP, DTs, ERRORS, RESP_ERR, RESP_OK,
 13 |     SOCKET_BLOCK_SIZE, VALID_R_TYPES, XT_ARRAY_BOOL, XT_ARRAY_CPLX,
 14 |     XT_ARRAY_DOUBLE, XT_ARRAY_INT, XT_ARRAY_STR, XT_BOOL, XT_CLOS, XT_DOUBLE,
 15 |     XT_HAS_ATTR, XT_INT, XT_INT3, XT_INT7, XT_LANG_NOTAG, XT_LANG_TAG, XT_LARGE,
 16 |     XT_LIST_NOTAG, XT_LIST_TAG, XT_NULL, XT_RAW, XT_S4, XT_STR, XT_SYMNAME,
 17 |     XT_UNKNOWN, XT_VECTOR, XT_VECTOR_EXP, XTs, structMap, numpyMap
 18 | )
 19 | from .misc import FunctionMapper, byteEncode, stringEncode, PY3
 20 | from .rexceptions import \
 21 |     RResponseError, REvalError, EndOfDataError, RParserError
 22 | from .taggedContainers import TaggedList, asTaggedArray, asAttrArray
 23 | 
 24 | DEBUG = 0
 25 | 
 26 | 
 27 | class OOBMessage(object):
 28 |     """OOB Message
 29 | 
 30 |     - type: OOB_SEND or OOB_MSG or OOB_STREAM_READ
 31 |     - userCode: user-defined code passed to self.oobSend/oobMessage
 32 |     - data: user-sent data or None
 33 |     - messageSize: number of bytes in user-sent data
 34 |     """
 35 |     def __init__(self, type, userCode, data=None, messageSize=0):
 36 |         self.type = type
 37 |         self.userCode = userCode
 38 |         self.data = data
 39 |         self.messageSize = messageSize
 40 | 
 41 |     def __len__(self):
 42 |         return self.messageSize + 16  # header
 43 | 
 44 | 
 45 | class Command(object):
 46 |     """Wrapper around the command bitfield calculating and storing its properties
 47 |     Magic extracted from RSProtocol.h
 48 |     """
 49 |     def __init__(self, code):
 50 |         self.code = code
 51 |         # Rserve 1.7 or’s the command with CMD_RESP even if it’s a OOB instead
 52 |         fixedOOBCode = code & ~CMD_RESP
 53 | 
 54 |         self.isOOB = bool(code & CMD_OOB)
 55 |         self.oobType = fixedOOBCode & 0x0ffff000
 56 |         self.oobUserCode = fixedOOBCode & 0xfff
 57 | 
 58 |         self.errCode = (code >> 24) & 127
 59 |         self.responseCode = code & 0xfffff  # lowest 20 bit
 60 | 
 61 | 
 62 | class Lexeme(list):
 63 |     """Basic Lexeme class for parsing binary data coming from Rserve"""
 64 |     def __init__(self, rTypeCode, length, hasAttr, lexpos):
 65 |         list.__init__(self, [rTypeCode, length, hasAttr, lexpos])
 66 |         self.rTypeCode = rTypeCode
 67 |         self.length = length
 68 |         self.hasAttr = hasAttr
 69 |         self.lexpos = lexpos
 70 |         self.attrLexeme = None
 71 |         self.data = None
 72 | 
 73 |     def setAttr(self, attrLexeme):
 74 |         self.attrLexeme = attrLexeme
 75 | 
 76 |     @property
 77 |     def attr(self):
 78 |         return self.attrLexeme.data if self.attrLexeme else None
 79 | 
 80 |     @property
 81 |     def attrLength(self):
 82 |         return self.attrLexeme.length
 83 | 
 84 |     @property
 85 |     def attrTypeCode(self):
 86 |         return self.attrLexeme.rTypeCode
 87 | 
 88 |     @property
 89 |     def dataLength(self):
 90 |         """Return length (in bytes) of actual REXPR data body"""
 91 |         if self.hasAttr:
 92 |             if not self.attrLexeme:
 93 |                 raise RuntimeError('Attribute lexeme not yet set')
 94 |             # also subtract size of REXP header=4
 95 |             return self.length - self.attrLength - 4
 96 |         else:
 97 |             return self.length
 98 | 
 99 |     def __str__(self):
100 |         return 'Typecode: %s   Length: %s  hasAttr: %s,  Lexpos: %d' % \
101 |                (hex(self.rTypeCode), self.length, self.hasAttr, self.lexpos)
102 | 
103 | 
104 | class Lexer(object):
105 |     """Rserve message lexer
106 |     Can either read a OOBMessage or a R Object
107 |     """
108 |     lexerMap = {}
109 |     fmap = FunctionMapper(lexerMap)
110 | 
111 |     def __init__(self, src):
112 |         """
113 |         @param src: Either a string, a file object, a socket -
114 |                     all providing valid binary r data
115 |         """
116 |         if type(src) == str:
117 |             # this only works for objects implementing the buffer protocol,
118 |             # e.g. strings, arrays, ...
119 |             # convert string to byte object
120 |             self.fp = io.BytesIO(byteEncode(src))
121 |         elif type(src) == bytes:
122 |             self.fp = io.BytesIO(src)
123 |         else:
124 |             self.fp = src
125 |         if isinstance(self.fp, socket.socket):
126 |             self._read = self.fp.recv
127 |         else:
128 |             self._read = self.fp.read
129 |         # The following attributes will be set thru 'readHeader()':
130 |         self.lexpos = None
131 |         self.messageSize = None
132 |         self.errCode = None
133 |         self.responseCode = None
134 |         self.responseOK = None
135 |         self.isOOB = False
136 |         self.oobType = None
137 |         self.oobUserCode = None
138 | 
139 |     def readHeader(self):
140 |         """
141 |         Called initially when reading fresh data from an input source
142 |         (file or socket). Reads header which contains data like response/error
143 |         code and size of data entire package.
144 | 
145 |         QAP1 header structure parts (16 bytes total):
146 | 
147 |             [ 0-3 ] (int) command
148 |             [ 4-7 ] (int) length of the message (bits 0-31)
149 |             [ 8-11] (int) offset of the data part
150 |             [12-15] (int) length of the message (bits 32-63)
151 |         """
152 |         self.lexpos = 0
153 | 
154 |         command = Command(struct.unpack('<I', self.read(4))[0])
155 |         # Obtain lower 32bit part of message length:
156 |         messageSize1 = self.__unpack(XT_INT)
157 |         dataOffset = self.__unpack(XT_INT)
158 |         assert dataOffset == 0, 'dataOffset > 0 is not implemented'
159 |         # Obtain upper 32bit part of message length:
160 |         messageSize2 = self.__unpack(XT_INT) << 32  # shift 32bits to the left
161 |         self.messageSize = messageSize2 + messageSize1
162 | 
163 |         self.isOOB = command.isOOB
164 |         if self.isOOB:
165 |             # FIXME: Rserve has a bug(?) that sets CMD_RESP on
166 |             #        OOB commands so we clear it for now
167 |             self.oobType = command.oobType
168 |             self.oobUserCode = command.oobUserCode
169 | 
170 |             if DEBUG:
171 |                 print('oob type: %x, oob user code: %x, message size: %d' %
172 |                       (self.oobType, self.oobUserCode, self.messageSize))
173 |         else:
174 |             self.errCode = command.errCode
175 | 
176 |             self.responseCode = command.responseCode
177 |             if self.responseCode == RESP_OK:
178 |                 self.responseOK = True
179 |             elif self.responseCode == RESP_ERR:
180 |                 self.responseOK = False
181 |             else:
182 |                 self.clearSocketData()
183 |                 raise ValueError('Received illegal response code (%x)' %
184 |                                  self.responseCode)
185 | 
186 |             if DEBUG:
187 |                 print('response ok? %s (responseCode=%x), error-code: %x, '
188 |                       'message size: %d' %
189 |                       (self.responseOK, self.responseCode,
190 |                        self.errCode, self.messageSize))
191 | 
192 |         return self.messageSize
193 | 
194 |     def clearSocketData(self):
195 |         """
196 |         If for any reason the parsing process returns an error, make sure that
197 |         all data from a socket is removed to avoid data pollution with further
198 |         parsing attempts.
199 |         """
200 |         if not isinstance(self.fp, socket.socket):
201 |             # not a socket. Nothing to do here.
202 |             return
203 |         # Switch socket into non-blocking mode and read from it until it
204 |         # is empty (and hence socket.error is raised):
205 |         self.fp.setblocking(False)
206 |         try:
207 |             while True:
208 |                 self.fp.recv(SOCKET_BLOCK_SIZE)
209 |         except socket.error:
210 |             # socket has no more data, it can be considered as cleared
211 |             pass
212 |         finally:
213 |             # Now set it back to blocking mode (no matter what exception):
214 |             self.fp.setblocking(True)
215 | 
216 |     def read(self, length):
217 |         """
218 |         Read number of bytes from input data source (file or socket).
219 |         If end of data is reached it raises EndOfDataError().
220 | 
221 |         Sockets might not return all requested data at once, so use an io
222 |         buffer to collect all data needed in a loop.
223 |         """
224 |         bytesToRead = length
225 |         buf = io.BytesIO(b'')
226 |         while bytesToRead > 0:
227 |             fragment = self._read(bytesToRead)
228 |             lenFrag = len(fragment)
229 |             if lenFrag == 0:
230 |                 raise EndOfDataError()
231 |             buf.write(fragment)
232 |             bytesToRead -= lenFrag
233 | 
234 |         self.lexpos += length
235 |         data = buf.getvalue()
236 |         return data
237 | 
238 |     def __unpack(self, tCode, num=None):
239 |         """
240 |         Read 'num' (atomic) data items from the input source and convert them
241 |         into a list of python objects. Byteswapping for numeric data will
242 |         be done.
243 |         """
244 |         structCode = structMap[tCode] if type(tCode) == int else tCode
245 |         # All data from Rserve is stored in little-endian format!
246 |         fmt = byteEncode('<' + str(num) + structCode if (num is not None)
247 |                          else '<' + structCode)  # convert into bytes!
248 |         if tCode == XT_INT3:
249 |             length = 3
250 |             rawData = self.read(length) + b'\x00'
251 |         elif tCode == XT_INT7:
252 |             length = 7
253 |             rawData = self.read(length) + b'\x00'
254 |         else:
255 |             length = struct.calcsize(fmt or 1)
256 |             rawData = self.read(length)
257 |         d = struct.unpack(fmt, rawData)
258 |         return d[0] if num is None else list(d)
259 | 
260 |     def nextExprHdr(self):
261 |         """
262 |         From the input file/socket determine the type of the next data item,
263 |         and its length.
264 |         This method can be applied to read the
265 |         - entire data header (containing one of the DT_* codes)
266 |         - an REXPR header
267 |         """
268 |         startLexpos = self.lexpos
269 |         _rTypeCode = self.__unpack('B')  # unsigned byte!
270 |         # extract pure rTypeCode without XT_HAS_ATTR or XT_LARGE flags:
271 |         rTypeCode = _rTypeCode & 0x3F
272 |         # extract XT_HAS_ATTR flag (if it exists)"
273 |         hasAttr = (_rTypeCode & XT_HAS_ATTR) != 0
274 |         # extract XT_LARGE flag (if it exists):
275 |         isXtLarge = (_rTypeCode & XT_LARGE) != 0
276 |         if isXtLarge:
277 |             # header is larger, use all 7 bytes for length information
278 |             # (new in Rserve 0.3)
279 |             length = self.__unpack(XT_INT7)
280 |         else:
281 |             # small header, use 3 bytes for length information
282 |             length = self.__unpack(XT_INT3)
283 |         if rTypeCode not in VALID_R_TYPES:
284 |             raise RParserError(
285 |                 "Unknown SEXP type %s found at lexpos %d, length %d" %
286 |                 (hex(rTypeCode), startLexpos, length))
287 |         return Lexeme(rTypeCode, length, hasAttr, startLexpos)
288 | 
289 |     def nextExprData(self, lexeme):
290 |         """
291 |         Read next data item from binary r data and transform it into a
292 |         python object.
293 |         """
294 |         return self.lexerMap[lexeme.rTypeCode](self, lexeme)
295 | 
296 |     ###########################################################################
297 | 
298 |     @fmap(XT_INT, XT_DOUBLE)
299 |     def xt_atom(self, lexeme):
300 |         raw = self.read(lexeme.dataLength)
301 |         return struct.unpack(
302 |             byteEncode('<%s' % structMap[lexeme.rTypeCode]), raw)[0]
303 | 
304 |     @fmap(XT_BOOL)
305 |     def xt_bool(self, lexeme):
306 |         raw = self.read(lexeme.dataLength)
307 |         # a boolean is stored in a 4 bytes word, but only the first byte
308 |         # is significant:
309 |         if PY3:
310 |             # python3 directly converts a single byte item into a number!
311 |             b = raw[0]
312 |         else:
313 |             b = struct.unpack(byteEncode('<%s' % structMap[XT_BOOL]),
314 |                               raw[0])[0]
315 |             # b can be 2, meaning NA. Otherwise transform 0/1 into False/True
316 |         return None if b == 2 else b == 1
317 | 
318 |     @fmap(XT_ARRAY_INT, XT_ARRAY_DOUBLE, XT_ARRAY_CPLX)
319 |     def xt_array_numeric(self, lexeme):
320 |         raw = self.read(lexeme.dataLength)
321 |         # TODO: swapping...
322 |         data = numpy.frombuffer(raw, dtype=numpyMap[lexeme.rTypeCode])
323 |         return data
324 | 
325 |     @fmap(XT_ARRAY_BOOL)
326 |     def xt_array_bool(self, lexeme):
327 |         """A boolean array consists of a 4-byte word (i.e. integer)
328 |         determining the number of boolean values in the following dataLength-4
329 |         bytes.
330 |         E.g. a bool array of one TRUE item looks like:
331 |         01 00 00 00   01 ff ff ff
332 | 
333 |         The first 01 value tells that there is one bool value in the array.
334 |         The other 01 is the TRUE value, the other 3 'ff's are padding bytes.
335 |         Those will be used if the vector has 2,3 or 4 boolean values.
336 |         For a fifth value another 4 bytes are appended.
337 |         """
338 |         numBools = self.__unpack(XT_INT, 1)[0]
339 |         # read the actual boolean values, including padding bytes:
340 |         raw = self.read(lexeme.dataLength - 4)
341 |         # Check if the array contains any NA values (encoded as \x02).
342 |         # If so we need to convert the 2's to None's and use a numpy
343 |         # array of type Object otherwise numpy will cast the None's into False's.
344 |         # This is handled for us for numeric types since numpy can use it's own
345 |         # nan type, but here we need to help it out.
346 |         if 2 in raw:
347 |             data = numpy.frombuffer(raw[:numBools], dtype=numpy.int8).astype(object)
348 |             data[data == 2] = None
349 |         else:
350 |             data = numpy.frombuffer(
351 |                 raw[:numBools],
352 |                 dtype=numpyMap[lexeme.rTypeCode]
353 |             )
354 |         return data
355 | 
356 |     @fmap(XT_ARRAY_STR)
357 |     def xt_array_str(self, lexeme):
358 |         """
359 |         An array of one or more null-terminated strings.
360 |         The XT_ARRAY_STR can contain trailing chars \x01 which need to be
361 |         chopped off. Since strings are encoded as bytes (in Py3) they need
362 |         to be converted into real strings.
363 |         """
364 |         if lexeme.dataLength == 0:
365 |             return ''
366 |         raw = self.read(lexeme.dataLength)
367 |         bytesStrList = raw.split(b'\0')[:-1]
368 |         strList = [stringEncode(byteString) for byteString in bytesStrList]
369 |         return numpy.array(strList)
370 | 
371 |     @fmap(XT_STR)
372 |     def xt_str(self, lexeme):
373 |         """
374 |         A null-terminated string.
375 |         It's length can be larger than the actual string since it is always a
376 |         multiple of 4.
377 |         The rest is filled with trailing \0s which need to be chopped off.
378 |         """
379 |         raw = self.read(lexeme.dataLength)
380 |         byteStr = raw.split(b'\0', 1)[0]
381 |         return stringEncode(byteStr)
382 | 
383 |     @fmap(XT_SYMNAME)
384 |     def xt_symname(self, lexeme):
385 |         """
386 |         Just like a string, but in S4 classes, a special value for NULL exists
387 |         """
388 |         string = self.xt_str(lexeme)
389 |         return None if string == '\x01NULL\x01' else string
390 | 
391 |     @fmap(XT_NULL)
392 |     def xt_null(self, lexeme):
393 |         return None
394 | 
395 |     @fmap(XT_UNKNOWN)
396 |     def xt_unknown(self, lexeme):
397 |         return self.__unpack(XT_INT)
398 | 
399 |     @fmap(XT_RAW)
400 |     def xt_raw(self, lexeme):
401 |         self.__unpack(XT_INT)
402 |         return self.read(lexeme.dataLength - 4)
403 | 
404 | 
405 | class RParser(object):
406 |     #
407 |     parserMap = {}
408 |     fmap = FunctionMapper(parserMap)
409 | 
410 |     def __init__(self, src, atomicArray):
411 |         """
412 |         atomicArray: if False parsing arrays with only one element will just
413 |                      return this element
414 |         arrayOrder:  The order in which data in multi-dimensional arrays is
415 |                      returned. 'C' for c-order, F for fortran.
416 |         """
417 |         self.lexer = Lexer(src)
418 |         self.atomicArray = atomicArray
419 |         self.indentLevel = None
420 | 
421 |     def __getitem__(self, key):
422 |         return self.parserMap[key]
423 | 
424 |     def __getattr__(self, attr):
425 |         if attr in ['messageSize']:
426 |             return getattr(self.lexer, attr)
427 |         else:
428 |             raise AttributeError(attr)
429 | 
430 |     @property
431 |     def __ind(self):
432 |         # return string with number of spaces appropriate for current
433 |         # indentation level
434 |         return self.indentLevel * 4 * ' '
435 | 
436 |     def _debugLog(self, lexeme, isRexpr=True):
437 |         if DEBUG:
438 |             lx = lexeme
439 |             typeCodeDict = XTs if isRexpr else DTs
440 |             print('%s %s (%s), hasAttr=%s, lexpos=%d, length=%s' %
441 |                   (self.__ind, typeCodeDict[lx.rTypeCode], hex(lx.rTypeCode),
442 |                    lx.hasAttr, lx.lexpos, lx.length))
443 | 
444 |     def parse(self):
445 |         """
446 |         Parse data stream and return result converted into
447 |         python data structure
448 |         """
449 |         self.indentLevel = 1
450 |         self.lexer.readHeader()
451 | 
452 |         message = None
453 |         if self.lexer.messageSize > 0:
454 |             try:
455 |                 message = self._parse()
456 |             except Exception:
457 |                 # If any error is raised during lexing and parsing, make sure
458 |                 # that the entire data is read from the input source if it is
459 |                 # a socket, otherwise following attempts to
460 |                 # parse again from a socket will return polluted data:
461 |                 self.lexer.clearSocketData()
462 |                 raise
463 |         elif not self.lexer.responseOK:
464 |             try:
465 |                 rserve_err_msg = ERRORS[self.lexer.errCode]
466 |             except KeyError:
467 |                 raise REvalError("R evaluation error (code=%d)" %
468 |                                  self.lexer.errCode)
469 |             else:
470 |                 raise RResponseError('Response error %s (error code=%d)' %
471 |                                      (rserve_err_msg, self.lexer.errCode))
472 | 
473 |         if self.lexer.isOOB:
474 |             return OOBMessage(self.lexer.oobType, self.lexer.oobUserCode,
475 |                               message, self.lexer.messageSize)
476 |         else:
477 |             return message
478 | 
479 |     def _parse(self):
480 |         dataLexeme = self.lexer.nextExprHdr()
481 |         self._debugLog(dataLexeme, isRexpr=False)
482 |         if dataLexeme.rTypeCode == DT_SEXP:
483 |             lexeme = self._parseExpr()
484 |             return self._postprocessData(lexeme.data)
485 |         else:
486 |             raise NotImplementedError()
487 | 
488 |     def _parseExpr(self):
489 |         self.indentLevel += 1
490 |         lexeme = self.lexer.nextExprHdr()
491 |         self._debugLog(lexeme)
492 |         if lexeme.hasAttr:
493 |             self.indentLevel += 1
494 |             if DEBUG:
495 |                 print('%s Attribute:' % self.__ind)
496 |             lexeme.setAttr(self._parseExpr())
497 |             self.indentLevel -= 1
498 |         lexeme.data = self.parserMap.get(lexeme.rTypeCode,
499 |                                          self[None])(self, lexeme)
500 |         self.indentLevel -= 1
501 |         return lexeme
502 | 
503 |     def _nextExprData(self, lexeme):
504 |         lexpos = self.lexer.lexpos
505 |         data = self.lexer.nextExprData(lexeme)
506 |         if DEBUG:
507 |             print('%s    data-lexpos: %d, data-length: %d bytes' %
508 |                   (self.__ind, lexpos, lexeme.dataLength))
509 |             print('%s    data: %s' % (self.__ind, repr(data)))
510 |             try:
511 |                 dataLen = len(data)
512 |                 print('%s    length: %d' % (self.__ind, dataLen))
513 |             except TypeError:
514 |                 pass
515 |         return data
516 | 
517 |     def _postprocessData(self, data):
518 |         """
519 |         Postprocess parsing results depending on configuration parameters
520 |         Currently only arrays are effected.
521 |         """
522 |         if data.__class__ == numpy.ndarray:
523 |             # this does not apply for arrays with attributes
524 |             # (__class__ would be TaggedArray)!
525 |             if len(data) == 1 and not self.atomicArray:
526 |                 # if data is a plain numpy array, and has only one element,
527 |                 # just extract and return this.
528 |                 # For convenience reasons type-convert it into a native
529 |                 # Python data type:
530 |                 data = data[0]
531 |                 if isinstance(data, (float, numpy.float64)):
532 |                     # convert into native python float:
533 |                     data = float(data)
534 |                 elif isinstance(data, (int, numpy.int32, numpy.int64)):
535 |                     # convert into native int or long, depending on value:
536 |                     data = int(data)
537 |                 elif isinstance(data, (complex, numpy.complex64,
538 |                                        numpy.complex128)):
539 |                     # convert into native python complex number:
540 |                     data = complex(data)
541 |                 elif isinstance(data, (numpy.string_, str)):
542 |                     # convert into native python string:
543 |                     data = str(data)
544 |                 elif isinstance(data, (bool, numpy.bool_)):
545 |                     # convert into native python string
546 |                     data = bool(data)
547 |         return data
548 | 
549 |     @fmap(None)
550 |     def xt_(self, lexeme):
551 |         # apply this for atomic data
552 |         return self._nextExprData(lexeme)
553 | 
554 |     @fmap(XT_ARRAY_BOOL, XT_ARRAY_INT, XT_ARRAY_DOUBLE, XT_ARRAY_STR)
555 |     def xt_array(self, lexeme):
556 |         # converts data into a numpy array already:
557 |         data = self._nextExprData(lexeme)
558 |         if lexeme.hasAttr and lexeme.attrTypeCode == XT_LIST_TAG:
559 |             for tag, value in lexeme.attr:
560 |                 if tag == 'dim':
561 |                     # the array has a defined shape, and R stores and
562 |                     # sends arrays in Fortran mode:
563 |                     data = data.reshape(value, order='F')
564 |                 elif tag == 'names':
565 |                     # convert numpy-vector 'value' into list to make
566 |                     # TaggedArray work properly:
567 |                     data = asTaggedArray(data, list(value))
568 |                 else:
569 |                     # there are additional tags in the attribute, just collect
570 |                     # them in a dictionary attached to the array.
571 |                     try:
572 |                         data.attr[tag] = value
573 |                     except AttributeError:
574 |                         data = asAttrArray(data, {tag: value})
575 |         return data
576 | 
577 |     @fmap(XT_VECTOR, XT_VECTOR_EXP, XT_LANG_NOTAG, XT_LIST_NOTAG)
578 |     def xt_vector(self, lexeme):
579 |         """
580 |         A vector is e.g. return when sending "list('abc','def')" to R. It can
581 |         contain mixed types of data items.
582 |         The binary representation of an XT_VECTOR is weird: a vector contains
583 |         unknown number of items, with possibly variable length. Only the number
584 |         of bytes of the data of a vector is known in advance.
585 |         The end of this REXP can only be detected by keeping track of how
586 |         many bytes have been consumed (lexeme.length!) until the end of the
587 |         REXP has been reached.
588 | 
589 |         A vector expression (type 0x1a) is according to Rserve docs the same
590 |         as XT_VECTOR. For now just a list with the expression content is
591 |         returned in this case.
592 |         """
593 |         finalLexpos = self.lexer.lexpos + lexeme.dataLength
594 |         if DEBUG:
595 |             print('%s     Vector-lexpos: %d, length %d, finished at: %d' %
596 |                   (self.__ind, self.lexer.lexpos,
597 |                    lexeme.dataLength, finalLexpos))
598 |         data = []
599 |         while self.lexer.lexpos < finalLexpos:
600 |             # convert single item arrays into atoms (via stripArray)
601 |             data.append(self._postprocessData(self._parseExpr().data))
602 | 
603 |         if lexeme.hasAttr and lexeme.attrTypeCode == XT_LIST_TAG:
604 |             # The vector is actually a tagged list, i.e. a list which allows
605 |             # to access its items by name (like in a dictionary). However items
606 |             # are ordered, and there is not necessarily a name available for
607 |             # every item.
608 |             for tag, value in lexeme.attr:
609 |                 if tag == 'names':
610 |                     # the vector has named items
611 |                     data = TaggedList(zip(value, data))
612 |                 else:
613 |                     if DEBUG:
614 |                         print('Warning: applying LIST_TAG "%s" on xt_vector '
615 |                               'not yet implemented' % tag)
616 |         return data
617 | 
618 |     @fmap(XT_LIST_TAG, XT_LANG_TAG)
619 |     def xt_list_tag(self, lexeme):
620 |         # a xt_list_tag usually occurs as an attribute of a vector or list
621 |         # (like for a tagged list)
622 |         finalLexpos = self.lexer.lexpos + lexeme.dataLength
623 |         r = []
624 |         while self.lexer.lexpos < finalLexpos:
625 |             value, tag = self._parseExpr().data, self._parseExpr().data
626 |             # reverse order of tag and value when adding it to result list
627 |             r.append((tag, value))
628 |         return r
629 | 
630 |     @fmap(XT_CLOS)
631 |     def xt_closure(self, lexeme):
632 |         # read entire data provided for closure (a R code object) even though
633 |         # we don't know what to do with it on the Python side ;-)
634 |         aList1 = self._parseExpr().data
635 |         aList2 = self._parseExpr().data
636 |         # Some closures seem to provide their sourcecode in an attrLexeme,
637 |         # but some don't.
638 |         # return Closure(lexeme.attrLexeme.data[0][1])
639 |         # So for now let's just return the entire parse tree in a
640 |         # Closure instance.
641 |         return Closure(lexeme, aList1, aList2)
642 | 
643 |     @fmap(XT_S4)
644 |     def xt_s4(self, lexeme):
645 |         """A S4 object only contains attributes, no other payload"""
646 |         if lexeme.hasAttr and lexeme.attrTypeCode == XT_LIST_TAG:
647 |             return S4(lexeme.attr)
648 |         else:
649 |             return S4([])
650 | 
651 | 
652 | ##############################################################################
653 | 
654 | 
655 | def rparse(src, atomicArray=False):
656 |     rparser = RParser(src, atomicArray)
657 |     return rparser.parse()
658 | 
659 | ##############################################################################
660 | 
661 | 
662 | class Closure(object):
663 |     """
664 |     Very simple container to return "something" for a closure.
665 |     Not really usable in Python though.
666 |     """
667 |     def __init__(self, lexeme, aList1, aList2):
668 |         self.lexeme = lexeme
669 |         self.aList1 = aList1
670 |         self.aList2 = aList2
671 | 
672 |     def __repr__(self):
673 |         return '<Closure instance %d>' % id(self)
674 | 
675 | 
676 | class S4(dict):
677 |     """Very simple representation of a S4 instance"""
678 |     def __init__(self, source=(), **entries):
679 |         super(S4, self).__init__(source, **entries)
680 | 
681 |         if 'class' in self:
682 |             self.classes = self['class']
683 |             del self['class']
684 |         else:
685 |             self.classes = []
686 | 
687 |     def __repr__(self):
688 |         attrs = super(S4, self).__repr__()
689 |         return "<S4 classes={} {}>".format(self.classes, attrs)
690 | 


--------------------------------------------------------------------------------
/doc/manual.rst:
--------------------------------------------------------------------------------
  1 | pyRserve manual
  2 | ===============
  3 | 
  4 | This manual is written in sort of a `walk-through`-style. All examples can be tried out on the Python
  5 | command line as you read through it.
  6 | 
  7 | Setting up a connection to Rserve
  8 | ---------------------------------
  9 | 
 10 | Running both Rserve and pyRserve locally on one host
 11 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 12 | This is the most simple solution, and we will begin with it before explaining remote connections.
 13 | 
 14 | First of all startup Rserve if it is not yet running::
 15 | 
 16 |   $ R CMD Rserve
 17 | 
 18 | By default Rserve is listening on port port 6311 (its default) on localhost (or 127.0.0.1) only,
 19 | for security reasons. This means that no connection from any other machine is possible to it.
 20 | For now, and for simplicity, we stick with running everything (Rserve and pyRserve) on the same host.
 21 | 
 22 | R puts itself into daemon mode, meaning that your shell comes back, and you have no way to
 23 | shutdown R via ``ctrl-C`` (you need to call ``kill`` with it's process id). However ``Rserve`` can be started in
 24 | debug mode during development. In this mode it'll print messages to stdout helping you to see whether your
 25 | connection works etc. To do so `Rserve` needs to be started like::
 26 | 
 27 |   $ R CMD Rserve.dbg
 28 | 
 29 | Now we can try to connect to it.
 30 | From the python interpreter import the pyRserve package and by omitting any arguments to the ``connect()`` function
 31 | setup the connection to your locally running ``Rserve``::
 32 | 
 33 |   $ python
 34 |   >>> import pyRserve
 35 |   >>> conn = pyRserve.connect()
 36 | 
 37 | The resulting connection handle can tell you where it is connected to::
 38 | 
 39 |   >>> conn
 40 |   <Handle to Rserve on localhost:6311>
 41 | 
 42 | The connection will be closed automatically when conn is deleted, or by explicitly calling the ``close()``-method::
 43 | 
 44 |   >>> conn.close()
 45 |   >>> conn
 46 |   <Closed handle to Rserve on localhost:6311>
 47 | 
 48 | Running operations on a closed pyRserve connector results in an exception. However a connection can be reopened by
 49 | calling the ``connect()`` method. It reuses the previously given values (or defaults) for ``host`` and ``port``::
 50 | 
 51 |   >>> conn.connect()
 52 |   <Handle to Rserve on localhost:6311>
 53 | 
 54 | To check the status of the connection use::
 55 | 
 56 |   >>> conn.isClosed
 57 |   False
 58 | 
 59 | Setting up a remote connection to Rserve
 60 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 61 | 
 62 | Variant 1: Make Rserve listen to a public port
 63 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 64 | 
 65 | To allow Rserve accept connections from remote hosts on a public port a special flag needs to be set in its
 66 | configuration file (which might be missing initially). Once the ``remote enable``-flag is set there,
 67 | Rserve needs to be restarted in order to honor it.
 68 | 
 69 | .. WARNING::
 70 |     Opening Rserve on a port which is publically (or maybe within an organization like a company)
 71 |     accessible allows anyone who has access to this machine to connect to the Rserve server process.
 72 | 
 73 | .. WARNING::
 74 |     Traffic between Rserve and pyRserve is not encrypted - so anyone with access to the network
 75 |     would in principle be able to sniff your communication, or even manipulate it.
 76 | 
 77 | By default Rserve tries to load the configuration file from ``/etc/Rserv.conf``. So if you have
 78 | root privileges on your host you can enable remote connections with the following command::
 79 | 
 80 |     $ sudo echo "remote enable" > /etc/Rserv.conf
 81 | 
 82 | Then restart Rserve.
 83 | 
 84 | In case you don't have sudo privileges the config file can be created anywhere else, e.g.::
 85 | 
 86 |     $ echo "remote enable" > ~/.config/Rserv.conf
 87 | 
 88 | Then restart Rserve like ``$ R CMD Rserve --RS-conf ~/.config/Rserv.conf``.
 89 | 
 90 | Variant 2: Connect to Rserve through an SSH tunnel
 91 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 92 | This option is definitely more secure than variant 1. First of all communication is encrypted.
 93 | Secondly you can easily control who is allowed to access Rserve from outside the host
 94 | Rserve is running on.
 95 | 
 96 | The approach could be:
 97 | 
 98 | 1. Create a generic account on the host running Rserve, called e.g. ``rserveuser``.
 99 |    For this example let the host be called ``rservehost``.
100 | 2. In the ``rserveuser``'s home directory, inside the ``~/.ssh`` directory, add the public
101 |    ssh key of allowed users to the ``~/.ssh/authorized_keys`` file. This can be done
102 |    in a very special ways that only enables access to Rserve, without any other
103 |    privilege like opening a remote shell etc.
104 | 
105 |    To achive this, a line in the ``~/.ssh/authorized_keys`` must look like::
106 | 
107 |     command="echo 'Rserve only account.'",restrict,port-forwarding,permitopen="localhost:6311" ssh-ed25519 AAAAC3..pxfm user1@someuserhost
108 | 
109 | 3. Start rserve in normal mode, without the ``remote enable`` flag, so it only listens on localhost.
110 | 4. ``user1`` (owning the public ssh key added in 2.) then opens an SSH tunnel to ``rservehost``::
111 | 
112 |     $ ssh -N -L 6311:localhost:6311 rservehost
113 | 
114 |    This command forwards traffic from port 6311 on ``user``'s client machine to ``localhost:6311`` on
115 |    ``rservehost``.
116 | 
117 | 5. ``user1`` on his/her client machine opens Python and establishes an Rserve connection with::
118 | 
119 |     >>> import pyRserve
120 |     >>> conn = pyRserve.connect()
121 | 
122 |    The connection to ``localhost:6311`` on the client machine will be forwarded to Rserve listening
123 |    on ``localhost:6311`` on ``rservehost``.
124 | 
125 | Variant 3: Connect to Rserve through a Unix socket
126 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
127 | This option might be more flexible for concurrent/dynamic connections than variants 1 and 2 and
128 | is slightly more secure than variant 1 (and less than variant 2), as the Unix socket can only be
129 | accessed from within the server.
130 | 
131 | To enable Unix sockets in Rserve a flag needs to be enabled::
132 | 
133 |     R CMD Rserve --RS-socket /tmp/rserve.sock
134 | 
135 | That socket can now be used from pyRserve::
136 | 
137 |     >>> import pyRserve
138 |     >>> conn = pyRserve.connect(unix_socket='/tmp/rserve.sock')
139 | 
140 | .. WARNING::
141 |     Just as in Variant 1, communication between pyRserve and Rserve is not encrypted.
142 |     The only additional security is that this socket cannot be accessed from the network,
143 |     but a user with access to the system can still sniff/manipulate your connection.
144 | 
145 | 
146 | Shutting down Rserve remotely
147 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
148 | 
149 | If you need to shutdown Rserve from your client connection the following command can be called:
150 | 
151 |   >>> conn.shutdown()
152 | 
153 | 
154 | String evaluation in R
155 | -------------------------------
156 | 
157 | Having established a connection to Rserve you can run the first commands on it. A valid R command can be executed
158 | by making a call to the R name space via the connection's `eval()` method, providing a string as argument which
159 | contains valid R syntax::
160 | 
161 |   >>> conn.eval('3 + 5')
162 |   8.0
163 | 
164 | In this example the string ``"3 + 5"`` will be sent to the remote side and evaluated by the R interpreter.
165 | The result is then
166 | delivered back into a native Python object, a floating point number in this case. As an R expert you are
167 | probably aware of the fact that R uses vectors for all numbers internally by default. But why did we receive
168 | a single floating point number? The reason is that pyRserve looks at arrays coming from Rserve and converts
169 | arrays with only one single item into an atomic value. This behaviour is for convenience reasons only.
170 | 
171 | There are two ways to override this behaviour so that the result is a real (numpy) array:
172 | 
173 |  * Apply `atomicArray=True` to the `eval()`-method:
174 | 
175 |    >>> conn.eval('3 + 5', atomicArray=True)
176 |    array([ 8.])
177 | 
178 |    This behaviour is then valid for one single call.
179 | 
180 | * Apply `atomicArray=True` to the `connect()`-function to make it the default for all calls to `eval()`:
181 | 
182 |     ``conn = pyRserve.connect(atomicArray=True)``
183 | 
184 |     Then calling `eval()` would return a `numpy` array in every case:
185 | 
186 |     >>> conn.eval('3 + 5')
187 |     array([ 8.])
188 | 
189 | ``conn.atomicArray`` will tell you how the connection handles results. This attribute contains the value of the
190 | ``atomicArray`` kw-argument given to connect. It can also be changed directly for a running connection.
191 | 
192 |   >>> conn.atomicArray
193 |   True
194 |   >>> conn.atomicArray = False  # change value
195 | 
196 | More expression evaluation
197 | ------------------------------
198 | 
199 | Of course also more complex data types can be sent from R to Python, e.g. lists or real arrays. Here are some examples::
200 | 
201 |   >>> conn.eval("list(1, 'otto')")
202 |   [1, 'otto']
203 |   >>> conn.eval('c(1, 5, 7)')
204 |   array([ 1., 5., 7.])
205 | 
206 | As demonstrated here R-lists are converted into plain Python lists whereas R-vectors are converted into numpy
207 | arrays on the Python side.
208 | 
209 | To set a variable inside the R namespace do::
210 | 
211 |   >>> conn.eval('aVar <- "abc"')
212 |   'abc'
213 | 
214 | and to request its value just do::
215 | 
216 |   >>> conn.eval('aVar')
217 |   'abc'
218 | 
219 | 
220 | Expression evaluation without expecting a result
221 | ----------------------------------------------------
222 | 
223 | In the example above setting a variable in R did not only set the variable but also returned it back to Python::
224 | 
225 |   >>> conn.eval('aVar <- "abc"')
226 |   'abc'
227 | 
228 | This is usually not something one would expect or need, and especially in the case of very large data this can cause
229 | unnecessary network traffic. The solution to this is to either call `eval()` with another option `void=True`, or to
230 | use `conn.voidEval()` directly. The following two calls are identical and do not return the string `'abc'`:
231 | 
232 |   >>> conn.eval('aVar <- "abc"', void=True)
233 |   >>> conn.voidEval('aVar <- "abc"')
234 | 
235 | 
236 | Defining functions and calling them through expression evaluation
237 | --------------------------------------------------------------------
238 | 
239 | It is also possible to create functions inside the R interpreter through the connector's namespace, or even to
240 | execute entire scripts. Basically you can do everything which is possible inside a normal R console::
241 | 
242 |   # create a function and execute it:
243 |   >>> conn.voidEval('doubleit <- function(x) { x*2 }')
244 |   >>> conn.eval('doubleit(2)')
245 |   4.0
246 | 
247 |   # store a mini script definition in a Python string ...
248 |   >>> my_r_script = '''
249 |   squareit <- function(x)
250 |     { x**2 }
251 |   squareit(4)
252 |   '''
253 |   # .... and execute it in R:
254 |   >>> conn.eval(my_r_script)
255 |   16.0
256 | 
257 | 
258 | 
259 | The R namespace - setting and accessing variables in a more Pythonic way
260 | ------------------------------------------------------------------------------
261 | 
262 | Previous sections explained how to set a variable inside R by evaluation a statement in string format::
263 | 
264 |   >>> conn.voidEval('aVar <- "abc"')
265 | 
266 | This is not very elegant and has limited ways to provide values already stored in Python variables. A much nicer
267 | way to do this is by setting the variable name in R as an attribute to a special variable `conn.r` which points
268 | to the namespace in R directly. The following statement does the same thing as the one above, just "more Pythonic"::
269 | 
270 |   >>> conn.r.aVar = "abc"
271 | 
272 | So of course it is then possible to compute values or copy them from Python variables into R::
273 | 
274 |   >>> conn.r.aVar = some_python_number * 1000.505
275 | 
276 | To retrieve a variable from R just use it as expected::
277 | 
278 |   >>> print('A value from R:', conn.r.aVar)
279 | 
280 | In its current implementation pyRserve allows to set and access the following base types:
281 | 
282 | * None (NULL)
283 | * boolean
284 | * integers (32-bit only)
285 | * floating point numbers (64 bit only), i.e. doubles
286 | * complex numbers
287 | * strings
288 | 
289 | Furthermore the following containers are supported:
290 | 
291 | * lists
292 | * numpy arrays
293 | * TaggedList
294 | * AttrArray
295 | * TaggedArray
296 | 
297 | Lists can be nested arbitrarily, containing other lists, numbers, or arrays. ``TaggedList``, ``AttrArray``, and
298 | ``TaggedArray`` are
299 | special containers to handle very R-specific result types. They will be explained further down in the manual.
300 | 
301 | The following example shows how to assign a python list with mixed data types to an R variable called ``aList``,
302 | and then to retrieve it again::
303 | 
304 |   >>> conn.r.aList = [1, 'abcde', numpy.array([1, 2, 3], dtype=int)]
305 |   >>> conn.r.aList
306 |   [1, 'abcde', array([1, 2, 3])]
307 | 
308 | Numpy arrays can also contain dimension information which are translated into R matrices when assigned to the R namespace::
309 | 
310 |   >>> arr = numpy.array(range(12))
311 |   >>> arr.shape = (3, 4)
312 |   >>> conn.r.aMatrix = arr
313 |   >>> conn.r('dim(aMatrix)')  # give me the dimension of aMatrix on the R-side
314 |   array([3, 4])
315 | 
316 | The result of the shape information is - in contrast to what one gets from numpy arrays - an array itself.
317 | There is nothing special about this, this is just the way R internally deals with that information.
318 | 
319 | 
320 | Expression evaluation through the R namespace
321 | ------------------------------------------------
322 | 
323 | Instead of using `conn.eval('1+1')` expressions can also be evaluate by making a function call on the R namespace
324 | directly. The following calls are producing the same result:
325 | 
326 |   >>> conn.r('1+1')
327 |   >>> conn.eval('1+1')
328 | 
329 | `conn.r('...') also accepts the `void`-option in case you want to suppress that a result is returned. Again the
330 | following three calls are producing the same result:
331 | 
332 |   >>> conn.r('1+1', void=True)
333 |   >>> conn.eval('1+1', void=True)
334 |   >>> conn.voidEval('1+1')
335 | 
336 | 
337 | Calling functions in R
338 | ------------------------
339 | 
340 | Functions defined in R can be called as if they were a Python methods, declared in the namespace of R.
341 | 
342 | Before the examples below are usable we need to define a couple of very simple functions within the R namespace:
343 | ``func0()`` accepts no parameters and returns a fixed string, ``func1()`` takes exactly one parameter and
344 | ``funcKKW()`` takes keyword arguments with default values::
345 | 
346 |   conn.voidEval('func0 <- function() { "hello world" }')
347 |   conn.voidEval('func1 <- function(v) { v*2 }')
348 |   conn.voidEval('funcKW <- function(a1=1.0, a2=4.0) { list(a1, a2) }')
349 | 
350 | Now calling R functions is as trivial as calling plain Python functions::
351 | 
352 |   >>> conn.r.func0()
353 |   "hello world"
354 |   >>> conn.r.func1(5)
355 |   10
356 |   >>> conn.r.funcKW(a2=6.0)
357 |   [1.0, 6.0]
358 | 
359 | Of course you can also call functions built-in to R::
360 | 
361 |   >>> conn.r.length([1,2,3])
362 |   3
363 | 
364 | 
365 | Getting help with functions
366 | ------------------------------
367 | 
368 | If R is properly installed including its help messages those can be retrieved directly.
369 | Also here no surprise - just do it the Python way through the ``__doc__`` attribute::
370 | 
371 |   >>> print(conn.r.sapply.__doc__)
372 |   lapply                 package:base                 R Documentation
373 | 
374 |   Apply a Function over a List or Vector
375 | 
376 |   Description:
377 | 
378 |   'lapply' returns a list of the same length as 'X', each element of
379 |   which is the result of applying 'FUN' to the corresponding element
380 |   of 'X'.
381 |   [...]
382 | 
383 | Of course this only works for functions which provide documentation. For all others ``__doc__`` just returns ``None``.
384 | 
385 | 
386 | 
387 | Applying an R function as argument to another function
388 | ---------------------------------------------------------
389 | 
390 | A typical application in R is to apply a vector to a function, especially via ``sapply`` and its brothers (or sisters,
391 | depending how how one sees them).
392 | 
393 | Fortunately this is as easy as you would expect::
394 | 
395 |   >>> conn.voidEval('double <- function(x) { x*2 }')
396 |   >>> conn.r.sapply(numpy.array([1, 2, 3]), conn.r.double)
397 |   array([ 2.,  4.,  6.])
398 | 
399 | Here a Python array and a function defined in R are provided as arguments to the R function ``sapply``.
400 | 
401 | Of course the following attempt to provide a Python function as an argument into R makes no sense::
402 | 
403 |   >>> def double(v): return v*2
404 |   ...
405 |   >>> conn.r.sapply(array([1, 2, 3]), double)
406 |   Traceback (most recent call last):
407 |     File "<stdin>", line 1, in <module>
408 |   NameError: name 'double' is not defined
409 | 
410 | This will result in a NameError error because the connector tries to reference the function 'double' inside the
411 | R namespace. It should be obvious that it is not possible to transfer function implementations from Python to R.
412 | 
413 | 
414 | Applying a variable already defined in R to a function
415 | -----------------------------------------------------------
416 | 
417 | To understand why this is an interesting feature one has to understand how Python and pyRserve works. The following
418 | code is pretty inefficient::
419 | 
420 |   >>> conn.r.arr = numpy.array([1, 2, 3])
421 |   >>> conn.r.sapply(conn.r.arr, conn.r.double)
422 | 
423 | To see why it is inefficient it is reproduced here more explicitly, but doing exactly the same thing::
424 | 
425 |   >>> conn.r.arr = numpy.array([1, 2, 3])
426 |   >>> arr = conn.r.arr
427 |   >>> conn.r.sapply(arr, conn.r.double)
428 | 
429 | Now it is clear that the value of ``conn.r.arr`` is first set inside R, then retrieved back to Python
430 | (in the second line) and then again sent back to the ``sapply`` function. This is pretty inefficient,
431 | it would be much better just to set the array in R and then to refer to ``conn.r.arr`` instead of sending
432 | it back and forth. Here the "reference" namespace called ``ref`` comes into play::
433 | 
434 |    >>> conn.ref.arr
435 |    <RVarProxy to variable "arr">
436 | 
437 | Through ``conn.ref`` it is possible to only reference a variable (or a function) in the R namespace without actually
438 | bringing it over to Python. Such a reference can then be passed as an argument to every function called
439 | from ``conn.r``. So the proper way to make the call above is::
440 | 
441 |   >>> conn.r.arr = numpy.array([1, 2, 3])
442 |   >>> conn.r.sapply(conn.ref.arr, conn.r.double)
443 | 
444 | However it is still possible to retrieve the actual content of a variable proxy through its ``value()`` method::
445 | 
446 |   >>> conn.ref.arr.value()
447 |   array([1., 2., 3.])
448 | 
449 | So using ``conn.ref`` instead of ``conn.r`` primarily returns a reference to the remote variable in the R namespace,
450 | instead of its value. Actually we have done that before with the function ``conn.r.double``. This doesn't return
451 | the R function to Python - something which would be pretty useless. Instead only a proxy to the R function is returned::
452 | 
453 |   >>> conn.r.double
454 |   <RFuncProxy to function "double">
455 | 
456 | Actually functions are always returned as proxy references, both in the ``conn.r`` and the ``conn.ref`` namespace,
457 | so ``conn.r.<function>`` is the same as ``conn.ref.<function>``.
458 | 
459 | Using reference to R variables is indeed absolutely necessary for variable content which is not transferable into
460 | Python, like special types of R classes, complex data frames etc.
461 | 
462 | 
463 | Handling complex result objects from R functions
464 | ---------------------------------------------------
465 | 
466 | Some functions in R (especially those doing statistical calculations) return quite complex result objects.
467 | 
468 | The T-test is such an example. In the R shell you would see something like this (please ignore the silly values
469 | applied to the t test)::
470 | 
471 |    > t.test(c(1,2,3,1),c(1,6,7,8))
472 | 
473 |         Welch Two Sample t-test
474 | 
475 |    data:  c(1, 2, 3, 1) and c(1, 6, 7, 8)
476 |    t = -2.3054, df = 3.564, p-value = 0.09053
477 |    alternative hypothesis: true difference in means is not equal to 0
478 |    95 percent confidence interval:
479 |     -8.4926941  0.9926941
480 |    sample estimates:
481 |    mean of x mean of y
482 |         1.75      5.50
483 | 
484 | This is what you would get to see directly in your R shell.
485 | 
486 | Now, how would this convoluted result be transferred into Python objects? For this to be possible
487 | pyRserve has defined three special classes that allow for a mapping from R to Python objects. These classes
488 | are explained the the following sections. Afterwards - with that knowledge - we have a final look at the result
489 | of the t-test again.
490 | 
491 | 
492 | TaggedLists
493 | ~~~~~~~~~~~~~~~~
494 | 
495 | The first special type of container is called "TaggedList". It reflects a list-type object in R where
496 | items can be accessed in two ways as shown here (this is now pure R code)::
497 | 
498 |   > t <- list(husband="otto", wife="erna", "5th avenue")
499 |   > t[1]
500 |   $husband
501 |   [1] "otto"
502 | 
503 |   > t['husband']
504 |   $husband
505 |   [1] "otto"
506 | 
507 | So items in the list can be either accessed via their index position, or through their "tag". Please note that the
508 | third list item ("5th avenue") is not tagged, so it can only be accessed via its index number, i.e. ``t[3]``
509 | (indexing in R starts at 1 and not at zero as in Python!).
510 | 
511 | There is no direct match to any standard Python construct for a ``TaggedList``. Python dictionaries do not preserve
512 | their elements' order and also don't allow for missing keys (which is why an OrderDict also doesn't help).
513 | NamedTuples on the other side would do the job but don't allow items to be appended or deleted since they are
514 | immutable.
515 | 
516 | The solution was to provide a special class in Python which is called ``TaggedList``. When accessing the
517 | list ``t`` from the example above you'll obtain an instance of a TaggedList in Python::
518 | 
519 |   >>> t = conn.eval('list(husband="otto", wife="erna", "5th avenue")')
520 |   >>> t
521 |   TaggedList(husband='otto', wife='erna', '5th avenue')
522 | 
523 | This ``TaggedList`` instance can be accessed in the same way as its R pendant, except for the fact the indexing is
524 | starting at zero in the usual Pythonic way::
525 | 
526 |   >>> t[0]
527 |   'otto'
528 |   >>> t['husband']
529 |   'otto'
530 |   >>> t[2]
531 |   '5th avenue'
532 | 
533 | To retrieve its data suitable for instantiating another ``TaggedList`` on the Python side get its data as a list of
534 | tuples. This also demonstrates how a ``TaggedList`` can be created directly in Python::
535 | 
536 |   >>> from pyRserve import TaggedList
537 |   >>> t.astuples
538 |   [('husband', 'otto'), ('wife', 'erna'), (None, '5th avenue')]
539 |   >>> new_tagged_list = TaggedList(t.astuples)
540 | 
541 | .. NOTE::
542 |    ``TaggedList`` does not provide the full list API that one would expect, some methods are just to entirely
543 |    implemented yet. However it is useful enough to retrieve all information obtained out of a R result object.
544 | 
545 | 
546 | AttrArrays
547 | ~~~~~~~~~~~~~~~~~
548 | 
549 | An ``AttrArray`` is simply an normal numpy array, with an additional dictionary attribute called ``attr``.
550 | This dicionary is used to store meta data associated to an array retrieved from R.
551 | 
552 | Let's create such an ``AttrArray`` in R, and transfer it into to the Python side::
553 | 
554 |    >>> conn.voidEval("t <- c(-8.49, 0.99)")
555 |    >>> conn.voidEval("attributes(t) <- list(conf.level=0.95)")
556 |    >>> conn.r.t
557 |    AttrArray([-8.49, 0.99], attr={'conf.level': array([ 0.95])})
558 | 
559 | To create such an array from Python in R is also possible via::
560 | 
561 |    >>> from pyRserve import AttrArray
562 |    >>> conn.r.t = AttrArray.new([-8.49, 0.99], {'conf.level': numpy.array([ 0.95])})
563 | 
564 | Instead of a list argument the ``new`` function also accepts a numpy array as well::
565 | 
566 |    >>> conn.r.t = AttrArray.new(numpy.array([-8.49, 0.99]), {'conf.level': numpy.array([ 0.95])})
567 | 
568 | 
569 | TaggedArrays
570 | ~~~~~~~~~~~~~~~~
571 | 
572 | The third special data type provided by pyRserve is the so called ``TaggedArray``. It provides basically the same
573 | features as ``TaggedList`` above, however the underlying data type is a numpy-Array instead of a Python list.
574 | In fact, a TaggedArray is a direct subclass of ``numpy.ndarray``, enhanced with some new features
575 | like accessing array cells by name as in ``TaggedList``.
576 | 
577 | For the moment ``TaggedArrays`` only make real sense if they are 1-dimensional, so please do not change
578 | its shape. The results would not really be predictable.
579 | 
580 | To create a ``TaggedArray`` on the R side and transfer it to Python type:
581 | 
582 |   >>> res = conn.eval('c(a=1.,b=2.,3.)')
583 |   >>> res
584 |   TaggedArray([ 1.,  2.,  3.], key=['a', 'b', ''])
585 |   >>> res[1]
586 |   2.0
587 |   >>> res['b']
588 |   2.0
589 | 
590 | The third element in the array did not obtain a name on the R side, so it is represented by an empty string in
591 | the ``TaggedArray`` object.
592 | 
593 | Although ``TaggedArray``s are normal numpy arrays they loose their tags when further processed in Python, but still
594 | present themselves (via ``__repr__``) as ``TaggedArray``. This is a current flaw in their implementation.
595 | 
596 | To create a ``TaggedArray`` directly in Python there is a constructor function ``new()`` which takes a normal
597 | 1-d numpy array as the first argument and a list of tags as the second. Both arguments must match in their size::
598 | 
599 |   >>> from pyRserve import TaggedArray
600 |   >>> arr = TaggedArray.new(numpy.array([1, 2, 3]), ['a', 'b', ''])
601 |   >>> arr
602 |   TaggedArray([1, 2, 3], key=['a', 'b', ''])
603 | 
604 | 
605 | Back to the t-test example
606 | --------------------------------
607 | 
608 | After ``TaggedList`` and ``TaggedArray`` have been introduced we can now go back to the t-test mentioned
609 | before. Let's make the same call to the test function, this time just from the Python side, and then
610 | look at the result. Again there are two ways to call it, one via string evaluation by the R interpreter,
611 | one by directly providing native Python parameters.
612 | So::
613 | 
614 |    >>> res = conn.eval('t.test(c(1,2,3,1),c(1,6,7,8))')
615 | 
616 | and::
617 | 
618 |    >>> res = conn.r.test(numpy.array([1,2,3,1]), numpy.array([1,6,7,8]))
619 | 
620 | does actually the same thing.
621 | 
622 | Looking at the result we get::
623 |    >>> res
624 |    <TaggedList(statistic=TaggedArray([-2.30541984]),
625 |     parameter=TaggedArray([ 3.56389482], tags=['df']),
626 |     p.value=0.090532640733331213,
627 |     conf.int=TaggedArray([-8.49269413,  0.99269413], attr={'conf.level': array([ 0.95])}),
628 |     estimate=TaggedArray([ 1.75,  5.5 ], tags=['mean of x', 'mean of y']),
629 |     null.value=TaggedArray([ 0.], tags=['difference in means']),
630 |     alternative='two.sided',
631 |     method='Welch Two Sample t-test',
632 |     data.name='c(1, 2, 3, 1) and c(1, 6, 7, 8)')>
633 | 
634 | The result is an instance of a ``TaggedList``, containing different types of list items.
635 | 
636 | So to access e.g. the confidence interval one would type in Python::
637 | 
638 |    >>> res['conf.int']
639 |    AttrArray([-8.49269413,  0.99269413], attr={'conf.level': array([ 0.95])})
640 | 
641 | This returns an AttrArray where the confidence level is stored in an attribute called ``conf.level``
642 | in the ``attr``-dictionary::
643 | 
644 |    >>> res['conf.int'].attr['conf.level']
645 |    array([ 0.95])
646 | 
647 | In the ``res``-result data structure above there are also objects of a container called TaggedArray::
648 | 
649 |    >>> res['estimate']
650 |    TaggedArray([ 1.75,  5.5 ], tags=['mean of x', 'mean of y'])
651 |    >>> res['estimate'][1]
652 |    5.5
653 |    >>> res['estimate']['mean of y']
654 |    5.5
655 | 
656 | Out Of Bounds messages (OOB)
657 | ----------------------------
658 | 
659 | Starting with version 1.7, Rserve allows OOB messages to be sent from R to Rserve clients, i.e. it
660 | allows for nested communication during an ``eval`` call.
661 | 
662 | This capability requires to start Rserve with a configuration enabling it, and loading Rserve itself as a
663 | library into the server. Both is easily accomplished in a config file (e.g. ``oob.config``) like this::
664 | 
665 |    oob enable
666 |    eval library(Rserve)
667 | 
668 | Then start Rserve using this config file::
669 | 
670 |    R CMD Rserve --RS-conf oob.conf
671 | 
672 | OOB messaging works by calling ``self.oobSend`` or ``self.oobMessage`` in R, e.g.::
673 | 
674 |    >>> conn.eval('self.oobSend(1)')
675 |    True
676 | 
677 | This does nothing but to indicate that it works. For real usefulness, one needs to register a callback
678 | that gets called with the sent data and user code as parameters::
679 | 
680 |    >>> def printoobmsg(data, code): print(data, code)
681 |    ...
682 |    >>> conn.oobCallback = printoobmsg
683 |    >>> conn.eval('self.oobSend("foo")')  # user code is 0 per default
684 |    <<< foo 0
685 |    True
686 | 
687 | The other function, ``self.oobMessage`` executes the callback and gives its return value to R::
688 | 
689 |    >>> conn.oobCallback = lambda data, code: data**code
690 |    >>> conn.voidEval('dc <- self.oobMessage(2, 3)')
691 |    >>> conn.r.dc
692 |    8
693 | 
694 | The user code might be useful to create a callback convention used for switching callbacks based
695 | on agreed-upon codes::
696 | 
697 |    >>> C_PRINT = conn.r.C_PRINT = 0
698 |    >>> C_ECHO  = conn.r.C_ECHO  = 1
699 |    >>> C_STORE = conn.r.C_STORE = 2
700 |    >>> store = []
701 |    >>> functions = {
702 |    ...     C_PRINT: lambda data: print('<<<', data),
703 |    ...     C_ECHO:  lambda data: data,
704 |    ...     C_STORE: store.append,
705 |    ... }
706 |    >>> def dispatch(data, code):
707 |    ...     return functions[code](data)
708 |    >>> conn.oobCallback = dispatch
709 |    >>>
710 |    >>> conn.eval('self.oobMessage("foo", C_PRINT)')
711 |    <<< foo
712 |    >>> conn.eval('self.oobMessage("foo", C_ECHO)')
713 |    'foo'
714 |    >>> conn.eval('self.oobMessage("foo", C_STORE)')
715 |    >>> store
716 |    ['foo']
717 |    >>> conn.eval('self.oobMessage('foo', 3)')
718 |    Traceback (most recent call last):
719 |      File "<stdin>", line 1, in <module>
720 |    KeyError: 3
721 | 
722 | 
723 | An example showing how nesting of OOB messages works
724 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
725 | 
726 | The previous examples were showing the bare application of OOB messages,
727 | but the real power of it comes when one understands how messages are
728 | getting nested within a ``eval`` call.
729 | 
730 | For that first create an R function which returns progress information
731 | during a "complicated" calculation:
732 | 
733 |    >>> r_func = """
734 |    ... big_job <- function(x)
735 |    ... {
736 |    ...     a <- x*2
737 |    ...     self.oobSend('25% done')
738 |    ...     b <- a * a
739 |    ...     self.oobSend('50% done')
740 |    ...     c <- a + b
741 |    ...     self.oobSend('75% done')
742 |    ...     d <- c**2
743 |    ...     self.oobSend('100% done')
744 |    ...     -1 * d
745 |    ... }"""
746 |    >>> conn.eval(r_func)
747 | 
748 | Then create a progress report function, register it as a callback and
749 | then call the actual R function:
750 | 
751 |    >>> def progress(msg, code): print(msg)
752 |    ...
753 |    >>> conn.oobCallback = progress
754 |    >>> res = conn.r.big_job(5)
755 |    25% done
756 |    50% done
757 |    75% done
758 |    100% done
759 |    >>> res
760 |    -12100.0
761 | 


--------------------------------------------------------------------------------