├── TAR_EXCLUDELIST ├── testing ├── __init__.py ├── rserve-test.conf ├── test-script.R ├── binCodes.py ├── testtools.py ├── test_taggedContainers.py ├── conftest.py └── test_rparser.py ├── requirements.txt ├── pyRserve ├── version.txt ├── binary_closure.txt ├── __init__.py ├── rexceptions.py ├── misc.py ├── taggedContainers.py ├── rtypes.py ├── rserializer.py ├── rconn.py └── rparser.py ├── doc ├── intro.rst ├── index.rst ├── license.rst ├── Makefile ├── make.bat ├── installation.rst ├── changelog.rst ├── conf.py └── manual.rst ├── requirements_dev.txt ├── .gitignore ├── CREDITS ├── .pre-commit-config.yaml ├── setup.cfg ├── TODO ├── .travis.yml ├── .github └── workflows │ └── build-and-test.yml ├── INSTALL ├── Makefile ├── LICENSE ├── dockerfiles └── R-4.2.2-Rserve-1.8.12-alpine │ └── Dockerfile ├── setup.py └── README.rst /TAR_EXCLUDELIST: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /testing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | -------------------------------------------------------------------------------- /pyRserve/version.txt: -------------------------------------------------------------------------------- 1 | 1.0.3 2 | -------------------------------------------------------------------------------- /doc/intro.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | -------------------------------------------------------------------------------- /testing/rserve-test.conf: -------------------------------------------------------------------------------- 1 | oob enable 2 | eval library(Rserve) 3 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | flake8 3 | coverage 4 | sphinx 5 | pre-commit 6 | wheel 7 | twine 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | MANIFEST 3 | dist 4 | doc/doctrees 5 | doc/html 6 | doc/pyRserve.html.zip 7 | *~ 8 | __pycache__ 9 | .idea 10 | -------------------------------------------------------------------------------- /CREDITS: -------------------------------------------------------------------------------- 1 | Credits: 2 | -------- 3 | 4 | Simon Urbanek, the author of Rserve (http://www.rforge.net/Rserve/), 5 | has been the main source of support to get the binary data conversion 6 | between Rserve and Python working properly. 7 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: end-of-file-fixer 6 | - id: trailing-whitespace 7 | - id: flake8 8 | args: [--max-line-length=88] 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | 4 | [flake8] 5 | max_line_length = 80 6 | per-file-ignores = 7 | pyRserve/__init__.py: F401 8 | # Much improved readability if we allow for extra space character in 9 | # various places for code formatting. So disable some errors for flake8: 10 | pyRserve/rtypes.py: E114,E116,E221,E222 11 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | 2 | Important TODOs: 3 | ================ 4 | - Support for endianess when loading arrays directly with 'fromstring()' or 5 | writing them with 'tostring()'. 6 | 7 | Affected code: 8 | - rserializer.s_xt_array_double() 9 | - rparser.xt_array_numeric() 10 | 11 | 12 | - The rserializer fails on objects like TaggedList([("n","Fred"), 2.0, ("c_ages", 5.5)]) 13 | where the 2nd item has no tag name. 14 | -------------------------------------------------------------------------------- /pyRserve/binary_closure.txt: -------------------------------------------------------------------------------- 1 | \x01\x00\x01\x00\x4c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 2 | 3 | \x0a\x48\x00\x00 4 | 5 | Closure: \x12\x44\x00\x00 6 | ListTag: \x15\x10\x00\x00 7 | Symname: \x13\x04\x00\x00\x00\x00\x00\x00 8 | Symname: \x13\x04\x00\x00\x65\x6e\x76\x00 9 | LangNoTag: \x16\x2c\x00\x00 10 | \x13\x0c\x00\x00\x2e\x49\x6e\x74\x65\x72\x6e\x61\x6c\x00\x00\x00\x16\x18\x00\x00\x13\x0c\x00\x00\x70\x61\x72\x65\x6e\x74\x2e\x65\x6e\x76\x00\x00\x13\x04\x00\x00\x65\x6e\x76\x00 11 | -------------------------------------------------------------------------------- /testing/test-script.R: -------------------------------------------------------------------------------- 1 | # Test file for Ralph with plot returned as raw file 2 | # 3 | # Author: yanabr 4 | ############################################################################### 5 | 6 | rm(list=ls()) 7 | graphics.off() 8 | 9 | pid <- Sys.getpid() 10 | 11 | ## some dummy data 12 | x <- sort(rnorm(100)) 13 | y <- 2*x+rnorm(100,0,0.5) 14 | 15 | ## model 16 | model <- lm(y~x) 17 | 18 | filename <- paste('plot_',pid,'.png',sep="") 19 | png(width=480, height=480, file=filename) 20 | plot(x,y) 21 | abline(coef(model),col=2,lty=2) 22 | dev.off() 23 | 24 | im <- readBin(filename,"raw", 999999) 25 | 26 | result_vector <- list(x,y,coef(model),im) 27 | -------------------------------------------------------------------------------- /pyRserve/__init__.py: -------------------------------------------------------------------------------- 1 | """pyRserve package""" 2 | import os 3 | import sys 4 | import warnings 5 | 6 | from .rconn import connect 7 | from .taggedContainers import TaggedList, TaggedArray, AttrArray 8 | 9 | # Show all deprecated warning only once: 10 | warnings.filterwarnings('once', category=DeprecationWarning) 11 | 12 | if sys.version_info.major == 2: 13 | warnings.warn( 14 | 'Python 2 is deprecated, it will no longer be supported in pyRserve 1.1', 15 | DeprecationWarning 16 | ) 17 | del warnings 18 | 19 | __version__ = open(os.path.join(os.path.dirname(__file__), 20 | 'version.txt')).readline().strip() 21 | -------------------------------------------------------------------------------- /pyRserve/rexceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Exception classes for pyRserve 3 | """ 4 | 5 | 6 | class PyRserveError(Exception): 7 | pass 8 | 9 | 10 | class REvalError(PyRserveError): 11 | """Indicates an error raised by R itself (not by Rserve)""" 12 | pass 13 | 14 | 15 | class RConnectionRefused(PyRserveError): 16 | pass 17 | 18 | 19 | class RResponseError(PyRserveError): 20 | pass 21 | 22 | 23 | class RSerializationError(PyRserveError): 24 | pass 25 | 26 | 27 | class PyRserveClosed(PyRserveError): 28 | pass 29 | 30 | 31 | class EndOfDataError(PyRserveError): 32 | pass 33 | 34 | 35 | class RParserError(PyRserveError): 36 | pass 37 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.6" 5 | - "3.7" 6 | - "3.8" 7 | - "3.9" 8 | before_install: 9 | - "sudo apt-get update -qq" 10 | - "sudo apt-get install -y gfortran" 11 | - "curl -O https://cran.r-project.org/src/base/R-4/R-4.1.0.tar.gz" 12 | - "tar -xzf R-4.1.0.tar.gz" 13 | - "(cd R-4.1.0; ./configure --with-x=no --prefix=/usr --enable-R-shlib && make && sudo make install)" 14 | - "curl -O http://www.rforge.net/Rserve/snapshot/Rserve_1.8-8.tar.gz" 15 | - "sudo PKG_CPPFLAGS=-DNODAEMON R CMD INSTALL Rserve_1.8-8.tar.gz" 16 | install: "pip install -r requirements.txt" 17 | script: 18 | - "py.test testing" 19 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. pyRserve documentation master file 2 | 3 | Welcome to pyRserve's documentation! 4 | ==================================== 5 | 6 | pyRserve is a library for connecting Python to an R process running under Rserve. 7 | Through such a connection variables can be get and set in R from Python, 8 | and also R-functions can be called remotely. 9 | 10 | This documentation applies to pyRserve release V |release| 11 | 12 | Contents: 13 | 14 | .. toctree:: 15 | :maxdepth: 3 16 | 17 | intro 18 | installation 19 | manual 20 | changelog 21 | license 22 | 23 | Indices and tables 24 | ================== 25 | 26 | * :ref:`genindex` 27 | * :ref:`modindex` 28 | * :ref:`search` 29 | -------------------------------------------------------------------------------- /.github/workflows/build-and-test.yml: -------------------------------------------------------------------------------- 1 | name: build-and-test 2 | on: [push] 3 | jobs: 4 | build-and-test: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | matrix: 8 | python-version: ["3.9", "3.10", "3.11"] 9 | 10 | services: 11 | service-rserve: 12 | image: ghcr.io/ralhei/r4.2.2-rserve1.8.12:latest 13 | ports: 14 | - 6311:6311 15 | options: --tty 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | cache: 'pip' # caching pip dependencies 24 | - name: Install dependencies 25 | run: pip install -r requirements.txt -r requirements_dev.txt 26 | - name: Run pytest 27 | run: pytest testing 28 | -------------------------------------------------------------------------------- /testing/binCodes.py: -------------------------------------------------------------------------------- 1 | # This file is just for development purposes 2 | # It demonstrates how binary commands are composed for various purposes 3 | # flake8: noqa 4 | 5 | # A bunch of binary commands: 6 | 7 | # Make an evaluation call to Rserv, giving a simple string with a number: 8 | # CMD_EVAL MSG_SIZE 2nd part of header DT_STRING+len data 9 | c1 = '\3\0\0\0' + '\x08\0\0\0' + '\0\0\0\0\0\0\0\0' + '\4\4\0\0' + '1\0\0\0' 10 | # -> evaluates to: numpy.array([1.0]) 11 | 12 | 13 | # Make a CMD_setSEXP call to Rserve, providing a variable name and a simple expression (array): 14 | # CMD_setSEXP MSG_SIZE 2nd part of header DT_STRING+len str-data 15 | c2 = '\x20\0\0\0' + '\x18\0\0\0' + '\0\0\0\0\0\0\0\0' + '\4\4\0\0' + 'abc\0' + \ 16 | '\0a\x0c\x00\x00\x20\x08\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00' # <- array expression 17 | 18 | # define a function in R 19 | # myfunc <- function(y1, y2) { tst <- y1 + y2; tst } 20 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | 2 | Install R: 3 | ========== 4 | Install some system packages first, e.g. on RHEL: 5 | yum install xz-devel pcre pcre-devel libcurl-devel *gfortran* zlib* bzip2-* png-devel jpeg-devel 6 | 7 | Compilation of R: 8 | 1. curl -LO https://cran.r-project.org/src/base/R-4/R-4.2.2.tar.gz 9 | 2. tar -xf R-4.2.2.tar.gz 10 | 3. $ cd R-4.2.2 11 | $ ./configure --enable-R-shlib --with-x=no 12 | $ make 13 | $ make install 14 | 15 | 16 | Install Rserve: 17 | =============== 18 | 1. curl -LO http://www.rforge.net/Rserve/snapshot/Rserve_1.8-12.tar.gz 19 | 2. R CMD INSTALL Rserve_1.8.12.tar.gz 20 | 21 | This way also the debug version will be compiled and installed. 22 | 23 | To start Rserver type: 24 | 25 | $ R CMD Rserve 26 | 27 | To start Rserve in debug mode type: 28 | 29 | $ R CMD Rserve.dbg 30 | 31 | Now it is ready to be connected from a client. 32 | 33 | 34 | Install pyRserve 35 | ================ 36 | 37 | $ pip install pyRserve 38 | -------------------------------------------------------------------------------- /testing/testtools.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some helper functions for unit testing 3 | """ 4 | from numpy import ndarray, float32, float64, complex64, complex128 5 | 6 | 7 | def compareArrays(arr1, arr2): 8 | """Compare two (possibly nested) arrays""" 9 | def _compareArrays(xarr1, xarr2): 10 | assert xarr1.shape == xarr2.shape 11 | for idx in range(len(xarr1)): 12 | if isinstance(xarr1[idx], ndarray): 13 | _compareArrays(xarr1[idx], xarr2[idx]) 14 | else: 15 | if type(xarr1[idx]) in [float, float32, float64, complex, 16 | complex64, complex128]: 17 | # make a comparison which works for floats and complex 18 | # numbers 19 | assert abs(xarr1[idx] - xarr2[idx]) < 0.000001 20 | else: 21 | assert xarr1[idx] == xarr2[idx] 22 | try: 23 | _compareArrays(arr1, arr2) 24 | except TypeError: 25 | return False 26 | return True 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DATE = $(shell date +"%F") 2 | 3 | all: 4 | 5 | docs: 6 | (cd doc; make html) 7 | (cd doc/html; zip -r ../pyRserve.html.zip *.html objects.inv searchindex.js _static/* ) 8 | echo 9 | echo "Sphinx documentation has been created in doc/html/index.html" 10 | 11 | clean: 12 | find . -name '*.pyc' -exec rm '{}' \; 13 | find . -name '*~' -exec rm '{}' \; 14 | find . -name '*.bak' -exec rm '{}' \; 15 | find . -name '*.log' -exec rm '{}' \; 16 | find . -name '.coverage' -exec rm '{}' \; 17 | rm -rf build dist *.egg-info MANIFEST.in 18 | 19 | upload-prep: docs 20 | rm -f dist/* 21 | python setup.py sdist bdist_wheel 22 | twine check dist/* 23 | 24 | upload: upload-prep 25 | twine upload dist/* 26 | 27 | upload-testpypi: upload-prep 28 | twine upload -r testpypi dist/* 29 | 30 | backup: clean _backup 31 | 32 | _backup: 33 | DIR=`pwd`; bDIR=`basename $$DIR`; cd ..; \ 34 | tar -czf $${bDIR}_$(DATE).tgz -X $$bDIR/TAR_EXCLUDELIST $$bDIR ; \ 35 | echo "Created backup ../$${bDIR}_$(DATE).tgz" 36 | 37 | test: 38 | pytest testing 39 | 40 | coverage: 41 | rm -f pyRserve/binaryRExpressions.py* 42 | coverage run --source pyRserve -m pytest testing && coverage report --show-missing 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is the MIT license (see: http://www.opensource.org/licenses/mit-license.php) 2 | 3 | Copyright (c) 2009 Ralph Heinkel (rh [at] ralph-heinkel.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /doc/license.rst: -------------------------------------------------------------------------------- 1 | This is the MIT license 2 | ======================= 3 | (see: http://www.opensource.org/licenses/mit-license.php) 4 | 5 | Copyright (c) 2009, 2010, 2011 Ralph Heinkel (rh [at] ralph-heinkel.com) 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /dockerfiles/R-4.2.2-Rserve-1.8.12-alpine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:latest 2 | 3 | ENV LC_ALL en_US.UTF-8 4 | ENV LANG en_US.UTF-8 5 | 6 | ENV R_VERSION 4.2.2 7 | ENV R_SOURCE /usr/src 8 | 9 | ENV BUILD_DEPS \ 10 | wget \ 11 | perl \ 12 | tar 13 | 14 | ENV PERSISTENT_DEPS \ 15 | gcc \ 16 | g++ \ 17 | gfortran \ 18 | icu-dev \ 19 | libjpeg-turbo \ 20 | libpng-dev \ 21 | make \ 22 | openblas-dev \ 23 | pcre2-dev \ 24 | readline-dev \ 25 | xz-dev \ 26 | zlib-dev \ 27 | bzip2-dev \ 28 | curl-dev 29 | 30 | RUN apk upgrade --update && \ 31 | apk add --no-cache --virtual .build-deps $BUILD_DEPS && \ 32 | apk add --no-cache --virtual .persistent-deps $PERSISTENT_DEPS 33 | RUN mkdir -p $R_SOURCE && cd $R_SOURCE && \ 34 | wget https://cran.r-project.org/src/base/R-4/R-${R_VERSION}.tar.gz && \ 35 | tar -xf R-${R_VERSION}.tar.gz && \ 36 | cd R-${R_VERSION} && \ 37 | ./configure --prefix=/usr/local --without-x --enable-R-shlib && \ 38 | make && make install && \ 39 | wget https://www.rforge.net/Rserve/snapshot/Rserve_1.8-12.tar.gz && \ 40 | PKG_CPPFLAGS=-DNODAEMON R CMD INSTALL Rserve_1.8-12.tar.gz && \ 41 | apk del .build-deps && \ 42 | cd / && \ 43 | rm -rf $R_SOURCE 44 | RUN echo -e "remote enable\noob enable\neval library(Rserve)" > /etc/Rserv.conf 45 | 46 | CMD ["R", "CMD", "Rserve"] 47 | -------------------------------------------------------------------------------- /testing/test_taggedContainers.py: -------------------------------------------------------------------------------- 1 | """ 2 | unittests for classes from taggedContainers 3 | """ 4 | from pyRserve.taggedContainers import TaggedList 5 | 6 | 7 | def test_TaggedList_init_emtpy(): 8 | t = TaggedList() 9 | assert t.astuples() == [] 10 | assert len(t) == 0 11 | 12 | 13 | def test_TaggedList_init_one_value(): 14 | t = TaggedList([11]) 15 | assert t.astuples() == [(None, 11)] 16 | assert len(t) == 1 17 | assert t[0] == 11 18 | 19 | 20 | def test_TaggedList_init_one_value_with_key(): 21 | t = TaggedList([('v1', 11)]) 22 | assert t.astuples() == [('v1', 11)] 23 | assert len(t) == 1 24 | assert t[0] == 11 25 | assert t['v1'] == 11 26 | 27 | 28 | def test_TaggedList_init_two_values_second_with_key(): 29 | t = TaggedList([11, ('v2', 22)]) 30 | assert t.astuples() == [(None, 11), ('v2', 22)] 31 | assert len(t) == 2 32 | assert t[0] == 11 33 | assert t[1] == 22 34 | assert t['v2'] == 22 35 | 36 | 37 | def test_TaggedList_append(): 38 | t = TaggedList([11, ('v2', 22)]) 39 | t.append(33) 40 | assert len(t) == 3 41 | assert t.values == [11, 22, 33] 42 | 43 | 44 | def test_TaggedList_append_with_key(): 45 | t = TaggedList([11, ('v2', 22)]) 46 | t.append(v3=33) 47 | assert len(t) == 3 48 | assert t.values == [11, 22, 33] 49 | assert t['v3'] == 33 50 | 51 | 52 | def test_TaggedList_insert(): 53 | t = TaggedList([11, ('v2', 22)]) 54 | t.insert(0, 1) 55 | assert len(t) == 3 56 | assert t.values == [1, 11, 22] 57 | 58 | 59 | def test_TaggedList_insert_with_key(): 60 | t = TaggedList([11, ('v2', 22)]) 61 | t.insert(0, x=1) 62 | assert len(t) == 3 63 | assert t.values == [1, 11, 22] 64 | assert t[0] == t['x'] == 1 65 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | 4 | PACKAGE_NAME = "pyRserve" 5 | 6 | __version__ = open(os.path.join('pyRserve', 'version.txt')).readline().strip() 7 | requirements = open('requirements.txt').read().splitlines() 8 | requirements_testing = open('requirements_dev.txt').read().splitlines() 9 | 10 | # Get long_description from intro.txt: 11 | here = os.path.dirname(os.path.abspath(__file__)) 12 | with open('README.rst') as fp: 13 | long_description = fp.read() 14 | 15 | setup( 16 | name=PACKAGE_NAME, 17 | version=__version__, 18 | description='A Python client to remotely access the R statistic package ' 19 | 'via network', 20 | long_description=long_description, 21 | long_description_content_type='text/x-rst', 22 | author='Ralph Heinkel', 23 | author_email='rh@ralph-heinkel.com', 24 | url='https://github.com/ralhei/pyRserve', 25 | project_urls={ 26 | 'Documentation': 'https://pyrserve.readthedocs.io/', 27 | 'Changelog': 'https://pyrserve.readthedocs.io/en/latest/changelog.html', 28 | 'PyPI': 'https://pypi.org/project/pyRserve/', 29 | 'Tracker': 'https://github.com/ralhei/pyRserve/issues', 30 | }, 31 | keywords='R Rserve', 32 | packages=['pyRserve'], 33 | include_package_data=True, 34 | package_data={ 35 | 'pyRserve': ['version.txt'], 36 | }, 37 | data_files=[('.', ['requirements.txt', 'requirements_dev.txt'])], 38 | python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4', 39 | install_requires=requirements, 40 | extras_require={ 41 | 'testing': requirements_testing 42 | }, 43 | license='MIT license', 44 | platforms=['unix', 'linux', 'cygwin', 'win32'], 45 | classifiers=[ 46 | 'Development Status :: 5 - Production/Stable', 47 | 'Environment :: Console', 48 | 'License :: OSI Approved :: MIT License', 49 | 'Operating System :: POSIX', 50 | 'Operating System :: Microsoft :: Windows', 51 | 'Programming Language :: Python', 52 | 'Programming Language :: Python :: 2', 53 | 'Programming Language :: Python :: 3', 54 | 'Intended Audience :: Developers', 55 | 'Topic :: Software Development :: Libraries', 56 | 'Topic :: System :: Networking', 57 | 'Topic :: Scientific/Engineering :: Information Analysis', 58 | 'Topic :: Scientific/Engineering :: Mathematics', 59 | ], 60 | ) 61 | -------------------------------------------------------------------------------- /pyRserve/misc.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # global variable to indicate whether this is Python3 or not: 4 | PY3 = sys.version_info[0] == 3 5 | 6 | 7 | class FunctionMapper(object): 8 | """ 9 | This class is used in Lexer, Parser, and Serializer to map IDs 10 | to functions""" 11 | def __init__(self, adict): 12 | self.adict = adict 13 | 14 | def __call__(self, *args): 15 | def wrap(func): 16 | for a in args: 17 | self.adict[a] = func 18 | return func 19 | return wrap 20 | 21 | 22 | def hexString(aString): 23 | """ 24 | convert a binary string in its hexadecimal representation, 25 | like '\x00\x01...' 26 | """ 27 | if PY3: 28 | # in Py3 iterating over a byte-sequence directly provides the 29 | # numeric values of the bytes ... 30 | return ''.join([r'\x%02x' % c for c in aString]) 31 | else: 32 | # ... while in Py2 we need to use ord() to convert chars to 33 | # their numeric values: 34 | return ''.join([r'\x%02x' % ord(c) for c in aString]) 35 | 36 | 37 | def byteEncode(aString, encoding='utf-8'): 38 | # check for __name__ not to get faked by Python2.x! 39 | if PY3 and type(aString).__name__ != 'bytes': 40 | return bytes(aString, encoding=encoding) 41 | else: 42 | if type(aString).__name__.startswith('unicode'): 43 | return aString.encode('utf-8') 44 | else: 45 | return aString 46 | 47 | 48 | def stringEncode(byteData, encoding='utf-8'): 49 | # check for __name__ not to get faked by Python2.x! 50 | if PY3 and type(byteData).__name__ == 'bytes': 51 | if byteData == b'\xff': 52 | return None 53 | # got a real bytes object, must be python3 ! 54 | return byteData.decode(encoding=encoding) 55 | else: 56 | # in py2.x there is no real byte-data, it is a string already 57 | return byteData 58 | 59 | 60 | def padLen4(aString): 61 | """ 62 | Calculate how many additional bytes a given string needs to have a length 63 | of a multiple of 4. A zero-length array is considered a multiple of 4. 64 | """ 65 | mod = divmod(len(aString), 4)[1] 66 | return 4-mod if mod else 0 67 | 68 | 69 | def string2bytesPad4(aString): 70 | """ 71 | Return a given string converted into bytes, padded with zeros at the end 72 | to make its length be a multiple of 4. 73 | A zero-length string is considered a multiple of 4. 74 | """ 75 | byteString = byteEncode(aString) + b'\0' 76 | return byteString + padLen4(byteString) * b'\0' 77 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = . 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | 15 | .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest 16 | 17 | help: 18 | @echo "Please use \`make ' where is one of" 19 | @echo " html to make standalone HTML files" 20 | @echo " dirhtml to make HTML files named index.html in directories" 21 | @echo " pickle to make pickle files" 22 | @echo " json to make JSON files" 23 | @echo " htmlhelp to make HTML files and a HTML help project" 24 | @echo " qthelp to make HTML files and a qthelp project" 25 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 26 | @echo " changes to make an overview of all changed/added/deprecated items" 27 | @echo " linkcheck to check all external links for integrity" 28 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 29 | 30 | clean: 31 | -rm -rf $(BUILDDIR)/* 32 | 33 | html-zip: html 34 | (cd html; zip ../html.zip *.html _static/* ) 35 | 36 | html: 37 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 38 | @echo 39 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 40 | 41 | dirhtml: 42 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 43 | @echo 44 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 45 | 46 | pickle: 47 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 48 | @echo 49 | @echo "Build finished; now you can process the pickle files." 50 | 51 | json: 52 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 53 | @echo 54 | @echo "Build finished; now you can process the JSON files." 55 | 56 | htmlhelp: 57 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 58 | @echo 59 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 60 | ".hhp project file in $(BUILDDIR)/htmlhelp." 61 | 62 | qthelp: 63 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 64 | @echo 65 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 66 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 67 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pyRserve.qhcp" 68 | @echo "To view the help file:" 69 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pyRserve.qhc" 70 | 71 | latex: 72 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 73 | @echo 74 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 75 | @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ 76 | "run these through (pdf)latex." 77 | 78 | changes: 79 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 80 | @echo 81 | @echo "The overview file is in $(BUILDDIR)/changes." 82 | 83 | linkcheck: 84 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 85 | @echo 86 | @echo "Link check complete; look for any errors in the above output " \ 87 | "or in $(BUILDDIR)/linkcheck/output.txt." 88 | 89 | doctest: 90 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 91 | @echo "Testing of doctests in the sources finished, look at the " \ 92 | "results in $(BUILDDIR)/doctest/output.txt." 93 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | set SPHINXBUILD=sphinx-build 6 | set BUILDDIR=_build 7 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 8 | if NOT "%PAPER%" == "" ( 9 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 10 | ) 11 | 12 | if "%1" == "" goto help 13 | 14 | if "%1" == "help" ( 15 | :help 16 | echo.Please use `make ^` where ^ is one of 17 | echo. html to make standalone HTML files 18 | echo. dirhtml to make HTML files named index.html in directories 19 | echo. pickle to make pickle files 20 | echo. json to make JSON files 21 | echo. htmlhelp to make HTML files and a HTML help project 22 | echo. qthelp to make HTML files and a qthelp project 23 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 24 | echo. changes to make an overview over all changed/added/deprecated items 25 | echo. linkcheck to check all external links for integrity 26 | echo. doctest to run all doctests embedded in the documentation if enabled 27 | goto end 28 | ) 29 | 30 | if "%1" == "clean" ( 31 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 32 | del /q /s %BUILDDIR%\* 33 | goto end 34 | ) 35 | 36 | if "%1" == "html" ( 37 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 38 | echo. 39 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 40 | goto end 41 | ) 42 | 43 | if "%1" == "dirhtml" ( 44 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 45 | echo. 46 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 47 | goto end 48 | ) 49 | 50 | if "%1" == "pickle" ( 51 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 52 | echo. 53 | echo.Build finished; now you can process the pickle files. 54 | goto end 55 | ) 56 | 57 | if "%1" == "json" ( 58 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 59 | echo. 60 | echo.Build finished; now you can process the JSON files. 61 | goto end 62 | ) 63 | 64 | if "%1" == "htmlhelp" ( 65 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 66 | echo. 67 | echo.Build finished; now you can run HTML Help Workshop with the ^ 68 | .hhp project file in %BUILDDIR%/htmlhelp. 69 | goto end 70 | ) 71 | 72 | if "%1" == "qthelp" ( 73 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 74 | echo. 75 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 76 | .qhcp project file in %BUILDDIR%/qthelp, like this: 77 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pyRserve.qhcp 78 | echo.To view the help file: 79 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pyRserve.ghc 80 | goto end 81 | ) 82 | 83 | if "%1" == "latex" ( 84 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 85 | echo. 86 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 87 | goto end 88 | ) 89 | 90 | if "%1" == "changes" ( 91 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 92 | echo. 93 | echo.The overview file is in %BUILDDIR%/changes. 94 | goto end 95 | ) 96 | 97 | if "%1" == "linkcheck" ( 98 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 99 | echo. 100 | echo.Link check complete; look for any errors in the above output ^ 101 | or in %BUILDDIR%/linkcheck/output.txt. 102 | goto end 103 | ) 104 | 105 | if "%1" == "doctest" ( 106 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 107 | echo. 108 | echo.Testing of doctests in the sources finished, look at the ^ 109 | results in %BUILDDIR%/doctest/output.txt. 110 | goto end 111 | ) 112 | 113 | :end 114 | -------------------------------------------------------------------------------- /doc/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Before pyRserve can be used, R and Rserv have to be installed properly. 5 | Installation instructions for both packages are available on their corresponding 6 | websites at ``_ and ``_ 7 | 8 | 9 | Installing R from sources 10 | ------------------------- 11 | 12 | For R being able to run Rserve properly it has to be installed with the 13 | ``--enable-R-shlib`` option. 14 | 15 | The following command show how to do this for the sources. Make sure you have a 16 | fortran compiler installed, otherwise installation will not be possible. 17 | 18 | .. NOTE:: 19 | You need a couple of LINUX packages and libraries to be installed, like a fortran 20 | compile and readline/bzip2/... development libraries. On OpenSuse these can be installed 21 | with ``zypper install -y gcc-fortran readline-devel libbz2-devel xz-devel pcre2-devel libcurl-devel`` 22 | Other Linux distributions provide packages with similar names. 23 | 24 | On installing R then looks like:: 25 | 26 | R_VER=4.3.1 # possibly find the latest version, or use the version you require 27 | curl -LO https://cran.r-project.org/src/base/R-4/R-${R_VER}.tar.gz 28 | tar -xzf R-${R_VER}.tar.gz 29 | cd R-${R_VER} 30 | ./configure --enable-R-shlib -with-x=no 31 | make 32 | make install 33 | 34 | For Windows it might be just enough to install a prebuilt R package. The same 35 | might be true for some Linux distributions, just make sure to install a 36 | version which also contains all headers necessary for compiling Rserve in the 37 | next step. 38 | 39 | Installing Rserve 40 | ------------------ 41 | 42 | If you have already downloaded the tar file then from your command line run:: 43 | 44 | curl -LO http://www.rforge.net/Rserve/snapshot/Rserve_1.8-12.tar.gz 45 | R CMD INSTALL Rserve_1.8-12.tar.gz 46 | 47 | Older versions of Rserve might also work, the earliest function version however 48 | seems to be 0.6.6. 49 | 50 | .. NOTE:: 51 | Rserve usually daemonizes itself after starting from the command 52 | line. If you want to prevent this from happening (e.g. because you would 53 | like to control Rserve by a process management tool like ``supervisord`` 54 | or want to control Rserve running the unittests with ``pytest --run-rserve``) 55 | then Rserve has to be install with the special ``-DNODAEMON`` compiler flag:: 56 | 57 | PKG_CPPFLAGS=-DNODAEMON R CMD INSTALL Rserve_1.8-12.tar.gz 58 | 59 | 60 | Installing pyRserve 61 | ------------------- 62 | 63 | From your unix/windows command line run:: 64 | 65 | pip install pyRserve 66 | 67 | If you want to develop or test locally, then also install extra packages for testing:: 68 | 69 | pip install pyRserve[testing] 70 | 71 | Currently supported Python versions are 3.6 to 3.11. It might still run on Python 2.7 72 | but this is not supported anymore and will be deprecated in future versions. 73 | 74 | In the next section you'll find instructions how to use everything together. 75 | 76 | 77 | Running unittests 78 | ----------------- 79 | After installation is completed - and for those who want to contribute to pyRserve's developement - 80 | unittests can be run straight from the command line. Remember to have pyRserve installed with 81 | the testing dependencies, as described in the previous section. 82 | 83 | In the current setup pytest is able to automatically fire up an Rserve-process which needs to be available 84 | for the unittests to run against. This is achieved by calling:: 85 | 86 | $ pytest testing --run-rserve 87 | =========================== test session starts =========================== 88 | platform linux -- Python 3.11.3, pytest-7.4.0, pluggy-1.2.0 89 | rootdir: /home/user/pyRserve 90 | collected 50 items 91 | 92 | testing/test_rparser.py .......................................... [ 84%] 93 | testing/test_taggedContainers.py ........ [100%] 94 | =========================== 50 passed in 4.19s ============================ 95 | 96 | In case you have Rserve already running on localhost, it is sufficient to call ``pytest testing``. 97 | -------------------------------------------------------------------------------- /testing/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configurations and fixtures for testing pyRserve with pytest. 3 | """ 4 | import os 5 | import time 6 | import shutil 7 | import socket 8 | import subprocess 9 | 10 | import pytest 11 | 12 | import pyRserve.rexceptions 13 | 14 | HERE_PATH = os.path.dirname(os.path.realpath(__file__)) 15 | 16 | # Use different port from default to avoid clashes with regular Rserve 17 | # running on same machine: 18 | EXTRA_RPORT = 6355 19 | 20 | 21 | def start_Rserve(port): 22 | """Start an Rserve process for unittesting""" 23 | # First check that 'R' is in PATH: 24 | if not shutil.which('R'): 25 | pytest.exit("Cannot start R interpreter, R executable not in PATH", returncode=1) 26 | 27 | rProc = subprocess.Popen( 28 | ['R', 'CMD', 'Rserve', '--no-save', '--RS-conf', 29 | os.path.join(HERE_PATH, 'rserve-test.conf'), 30 | '--RS-port', str(port)], 31 | stdout=open('/dev/null'), stderr=subprocess.PIPE) 32 | # wait a moment until Rserve starts listening on EXTRA_RPORT 33 | time.sleep(0.6) 34 | if rProc.poll(): 35 | # process has already terminated, so provide its stderr to the user: 36 | raise RuntimeError('Rserve has terminated prematurely with the ' 37 | 'following message: %s' % rProc.stderr.read()) 38 | 39 | # store original socket timeout and set timeout to new value during startup 40 | # of Rserve: 41 | defaultTimeout = socket.getdefaulttimeout() 42 | socket.setdefaulttimeout(1) 43 | 44 | rserv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 45 | cnt = 0 46 | # give it a maximum of 10 tries with some sleep in between to wait for 47 | # Rserve to come into action! 48 | while cnt < 10: 49 | try: 50 | # open a socket connection to Rserve 51 | rserv.connect(('', port)) 52 | except socket.error: 53 | time.sleep(0.3) 54 | cnt += 1 55 | else: 56 | # got a connection! Jump out of the loop 57 | break 58 | else: 59 | # after trying 10 times we still got no connection to Rserv - something 60 | # must be wrong. 61 | raise RuntimeError('Could not connect to Rserv over the network') 62 | 63 | # set back original default timeout value: 64 | socket.setdefaulttimeout(defaultTimeout) 65 | 66 | # make a simple test that Rserve really answers correctly by looking at the 67 | # first few bytes: 68 | hdr = rserv.recv(1024) 69 | rserv.close() 70 | if not hdr.startswith(b'Rsrv01'): 71 | rProc.terminate() 72 | raise RuntimeError( 73 | 'received wrong header information from socket (was: "%s")' 74 | % str(hdr[:10]) 75 | ) 76 | return rProc 77 | 78 | 79 | def pytest_addoption(parser): 80 | """Let the developer control whether or not to start extra Rserve process.""" 81 | parser.addoption( 82 | "--run-rserve", action="store_true", default=False, 83 | help="Run separate Rserve process for unit testing on port %d" % EXTRA_RPORT 84 | ) 85 | 86 | 87 | @pytest.fixture(scope="session") 88 | def run_rserve(request): 89 | """Fixture providing given command line option.""" 90 | return request.config.getoption("--run-rserve") 91 | 92 | 93 | @pytest.fixture(scope="module") 94 | def conn(run_rserve): 95 | """Fixture providing a connection to a newly started Rserve process.""" 96 | if run_rserve: 97 | # Fire up separate Rserve process: 98 | port = EXTRA_RPORT 99 | r_proc = start_Rserve(port) 100 | else: 101 | port = pyRserve.rconn.RSERVEPORT 102 | r_proc = None 103 | 104 | try: 105 | conn = pyRserve.connect(port=port) 106 | except pyRserve.rexceptions.RConnectionRefused: 107 | try: 108 | r_proc and r_proc.terminate() 109 | except subprocess.SubprocessError: 110 | pass 111 | pytest.exit('Error: Cannot reach running Rserve process.\nEither start' 112 | 'one manually or run pytest with option --run-rserve', 113 | returncode=1) 114 | raise 115 | 116 | # Create an 'ident' function in R which just returns its argument. 117 | # Needed for testing below. 118 | conn.r('ident <- function(v) { v }') 119 | 120 | yield conn 121 | 122 | conn.close() 123 | r_proc and r_proc.terminate() 124 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Overview 2 | ========= 3 | 4 | What pyRserve does 5 | ------------------ 6 | 7 | pyRserve is a library for connecting Python to `R `_ 8 | (an excellent statistic package). Running `Rserve `_ 9 | in R attaches the R-interpreter to a network socket, waiting for pyRserve to connect to it. 10 | Through such a connection, variables can be get and set in R from Python, 11 | and also R-functions can be called remotely. 12 | 13 | In contrast to `rpy or rpy2 `_ the R process does not have to 14 | run on the same machine, it can run on a remote machine and all variable access and 15 | function calls will be delegated there through the network. 16 | 17 | Furthermore - and this makes everything feel very pythonic - all data structures will 18 | automatically be converted from native R to native Python and numpy types and back. 19 | 20 | 21 | Supported platforms 22 | ---------------------------- 23 | 24 | This package has been mainly developed under Linux, and hence should run on all standard unix 25 | platforms, as well as on MacOS. pyRserve has also been successfully used on Windows machines. 26 | Unittests have been used on the Linux and MacOS side, however they might just work fine for Windows. 27 | 28 | It has been tested to work with Python 2.7.x, 3.6 to 3.9. 29 | 30 | The latest development has been tested with some previous and current versions of R and Rserve. 31 | 32 | License 33 | ------- 34 | pyRserve has been written by Ralph Heinkel `(ralph-heinkel.com) `_ and is 35 | released under `MIT license `_. 36 | 37 | 38 | Quick installation 39 | ------------------- 40 | From your unix/macOS,windows command line run:: 41 | 42 | pip install pyRserve 43 | 44 | For a fully functional setup also R and Rserve have to be installed. See section 45 | `installation `_ in the pyRserve 46 | documentation for instructions. 47 | 48 | 49 | Quick usage 50 | ------------ 51 | Open a **first shell** and start up the R server, by calling the module `Rserve` that provides 52 | the actual network connectivity for R:: 53 | 54 | $ R CMD Rserve 55 | 56 | R (Rserve) will now listen on port 6311 (on localhost). Of course Rserve can be configured to 57 | listen on an exposed port and hence will be accessible from remote hosts as well. 58 | 59 | Open a **second shell**, start Python, import pyRserve, and initialize the connection to Rserve:: 60 | 61 | $ python 62 | >>> import pyRserve 63 | >>> conn = pyRserve.connect() 64 | 65 | The default connection will be done on ``localhost:6311``. Other hosts can be reached by 66 | calling ``pyRserve.connect(host=..., port=...)`` as well. 67 | 68 | 69 | The ``conn`` object provides a namespace called ``conn.r`` that directly maps all variables 70 | and other global symbols (like functions etc) and hence makes them accessible from Python. 71 | 72 | Now create a vector in R, access the vector from Python (will be converted into a numpy array), and 73 | call the ``sum()``-function in R:: 74 | 75 | >>> conn.r("vec <- c(1, 2, 4)") 76 | >>> conn.r.vec # access vector 'vec' as an attribute of 'conn.r' 77 | array([1., 2., 4.]) 78 | >>> conn.r.sum(conn.r.vec) # 'sum' in running in the R-interpreter, returning the result to Python 79 | 7.0 80 | 81 | The other way around also works:: 82 | 83 | >>> conn.r.somenumber = 444 # set a variable called 'somenumber' in the R interpreter... 84 | >>> conn.r("somenumber * 2") # ... and double the number 85 | 888.0 86 | 87 | 88 | Source code repository 89 | ---------------------- 90 | pyRserve is now hosted on GitHub at ``_. 91 | 92 | 93 | Documentation 94 | ---------------- 95 | Documentation can be found at ``_. 96 | 97 | 98 | Support 99 | -------- 100 | For discussion of pyRserve and getting help please use the Google newsgroup 101 | available at ``_. 102 | 103 | Issues with the code (like bugs, etc.) should be reported on GitHub at 104 | ``_. 105 | 106 | 107 | Missing features 108 | ----------------- 109 | * Authentication is implemented in Rserve but not yet in pyRserve 110 | * TLS encryption is not implemented yet in pyRserve. However using ssh tunnels 111 | can solve security issues in the meantime (see documentation). 112 | -------------------------------------------------------------------------------- /doc/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | * V.1.0.1 (2023-01-10) 4 | * Replace deprecated numpy.bool8 with numpy.bool_ 5 | * Upgraded installation instructions in INSTALL file (more up-to-date R and Rserve) 6 | * Added Dockerfile for installing R and Rserve into container (used for github actions) 7 | * Enabled github actions for testing 8 | 9 | * V 1.0.0 (2022-10-13) 10 | * Added docu for secure connection to Rserve via SSH tunnel 11 | * Updated meta data for pyRserve package 12 | * Added deprecation warning for Python 2 13 | * Corrected links to documentation 14 | 15 | * V 1.0.0b3 (2021-06-25) 16 | Brought usage of pytest into the year 2021. 17 | 18 | * use fixtures for setting up an Rserve connection 19 | * put fixtures into conftest.py 20 | * added command line option for controlling rserve startup 21 | * properly named rserve-test.conf file. 22 | 23 | * V 1.0.0b2 (2021-06-22) 24 | * Added missing version.txt file to wheel 25 | 26 | * V 1.0.0b1 (2021-06-22) 27 | * Updated and cleanup documentation 28 | * Updated for more recent versions of R and Rserve 29 | * Added pre-commit hooks 30 | * Separated packages for dev/testing from production ones 31 | * Enhanced handling of NA values (thanks to Max Taggart) 32 | * Fixed numpy deprecation warnings (thanks to chaddcw) 33 | 34 | * V 0.9.2 (2019-12-19) 35 | * Replaced deprecated numpy.fromstring with numpy.frombuffer 36 | * Flake8/pep8 cleanup 37 | * Refactored exception hierarchy 38 | * V 0.9.1 (2017-05-19) 39 | * Removed a bug on some Python3 versions 40 | * Added proper support for S4 objects (`thanks to flying-sheep `_) 41 | * Added support for Python3 unitests on travis (`thanks to flying-sheep `_) 42 | 43 | * V 0.9.0 (2016-04-11) 44 | * Full support for data objects larger than 2**24 bytes 45 | * Maximum size of message sent to Rserv can now be 2**64 bytes 46 | 47 | * V 0.8.4 (2015-09-06) 48 | * fixed missing requirements.txt in MANIFEST.in 49 | * fixed bug in installer (setup.py) 50 | 51 | * V 0.8.3 (2015-09-04) 52 | * Fixed exception catching for Python 3.4 (thanks to eeue56) 53 | * Some pep8 cleanups 54 | * explicit initialization of a number of instance variables in some classes 55 | * cleanup of import statements in test modules 56 | * Allow for message sizes greater than 4GB coming from R server 57 | 58 | * V 0.8.2 (2015-07-11) 59 | * Added support for S4 objects (generated when e.g. creating a db object in R) 60 | 61 | * V 0.8.1 (2014-07-17) 62 | * Fixed errors in the documentation, updated outdated parts 63 | * For unittesting run Rserve on different port from the default 6311 to 64 | avoid clashes with regular Rserve running on the same server 65 | * Fixed but when passing a R-function as argument to a function call (e.g. to ``sapply``), 66 | added unittest for this 67 | 68 | * V 0.8.0 (2014-06-26) 69 | * Added support for remote shutdown of Rserve (thanks to Uwe Schmitt) 70 | * Added support for Out-Of-Bounds (OOB) messages (thanks to Philipp alias flying-sheep) 71 | 72 | * V 0.7.3 (2013-08-01) 73 | * Added missing MANIFEST.in to produce a complete tgz package (now includes docs etc) 74 | * Fixed bug on x64 machines when handling integers larger than 2**31 75 | 76 | * V 0.7.2 (2013-07-19) 77 | * Tested with Python 3.3.x, R 3.0.1 and Rserve 1.7.0 78 | * Updated documentation accordingly 79 | * Code cleanup for pep8 (mostly) 80 | * Marked code as production stable 81 | 82 | * V 0.7.1 (2013-06-23) 83 | * Added link to new GitHub repository 84 | * fixed URL to documentation 85 | 86 | * V 0.7.0 (2013-02-25) 87 | * Fixed problem when receiving very large result sets from R (added support for XT_LARGE header flag) 88 | * Correctly translate multi-dimensional R arrays into numpy arrays (preserve axes the right way) 89 | Removed 'arrayOrder' keyword argument as a consequence. 90 | THIS IS AN API CHANGE - PLEASE CHECK AND ADAPT YOUR CODE, ESPECIALLY IF YOU USE MULTI-DIM ARRAYS!! 91 | * Support for conn.voidEval and conn.eval and new 'defaultVoid'-kw argument in the connect() function 92 | * Fixed bug in receiving multi-dimensional boolean (logical) arrays from R 93 | * Added support for multi-dimensional string arrays 94 | * added support for XT_VECTOR_EXPR type generated e.g. via "expression()" in R (will return a list 95 | with the expression content as list content) 96 | * windows users can now connect to localhost by pyRserve.connect() (omitting 'localhost' parameter) 97 | 98 | * V 0.6.0 (2012-06-25) 99 | * support for Python3.x 100 | * Python versions <= 2.5 no more supported (due to Py3 support) 101 | * support for unicode strings in Python 2.x 102 | * full support complex numbers, partial support for 64bit integers and arrays 103 | * suport for Fortran-style ordering of numpy arrays 104 | * elements of single-item arrays are now translated to native python data types 105 | * much improved documentation 106 | * better unit test coverage 107 | * usage of the deprecated conn() is no more possible 108 | * pyRserve.rconnect() now also removed 109 | 110 | * V 0.5.2 (2011-12-02) 111 | * Fixed problem with 32bit integers being mistakenly rendered into 64bit integers on 64bit machines 112 | 113 | * V 0.5.1 (2011-11-22) 114 | * Fixed improper DeprecationWarning when evaluating R statements via conn.r(...) 115 | 116 | * V 0.5 (2011-10-03) 117 | * Renamed pyRserve.rconnect() to pyRserve.connect(). The former still works but shows a DeprecationWarning 118 | * String evaluation should now only be executed on the namespace directly, not on the connection object anymore. 119 | The latter still works but shows a DeprecationWarning. 120 | * New kw argument `atomicArray=True` added to pyRserve.connect() for preventing single valued arrays from being 121 | converted into atomic python data types. 122 | 123 | * V 0.4 (2011-09-20) 124 | * Added support for nested function calls. E.g. conn.r.t.test( ....) now works. 125 | * Proper support for boolean variables and vectors 126 | 127 | * V 0.3 (2010-06-08) 128 | * Added conversion of more complex R structures into Python 129 | * Updated documentation (installation, manual) 130 | 131 | * V 0.2 (2010-03-19) Fixed rendering of TaggedArrays 132 | 133 | * V 0.1 (2010-01-10) Initial version 134 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # pyRserve documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Oct 23 13:26:42 2009. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys 15 | sys.path.insert(0, '..') 16 | from pyRserve import __version__ # noqa 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | # sys.path.append(os.path.abspath('.')) 22 | 23 | # -- General configuration ----------------------------------------------------- 24 | 25 | # Add any Sphinx extension module names here, as strings. They can be extensions 26 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 27 | extensions = ['sphinx.ext.todo'] 28 | 29 | # Add any paths that contain templates here, relative to this directory. 30 | templates_path = ['_templates'] 31 | 32 | # The suffix of source filenames. 33 | source_suffix = '.rst' 34 | 35 | # The encoding of source files. 36 | # source_encoding = 'utf-8' 37 | 38 | # The master toctree document. 39 | master_doc = 'index' 40 | 41 | # General information about the project. 42 | project = u'pyRserve' 43 | copyright = u'2009-2021 Ralph Heinkel' 44 | 45 | # The version info for the project you're documenting, acts as replacement for 46 | # |version| and |release|, also used in various other places throughout the 47 | # built documents. 48 | # 49 | # The short X.Y version. 50 | version = __version__ 51 | # The full version, including alpha/beta/rc tags. 52 | release = __version__ 53 | 54 | # The language for content autogenerated by Sphinx. Refer to documentation 55 | # for a list of supported languages. 56 | # language = None 57 | 58 | # There are two options for replacing |today|: either, you set today to some 59 | # non-false value, then it is used: 60 | # today = '' 61 | # Else, today_fmt is used as the format for a strftime call. 62 | # today_fmt = '%B %d, %Y' 63 | 64 | # List of documents that shouldn't be included in the build. 65 | # unused_docs = [] 66 | 67 | # List of directories, relative to source directory, that shouldn't be searched 68 | # for source files. 69 | exclude_trees = ['_build'] 70 | 71 | # The reST default role (used for this markup: `text`) to use for all documents. 72 | # default_role = None 73 | 74 | # If true, '()' will be appended to :func: etc. cross-reference text. 75 | # dd_function_parentheses = True 76 | 77 | # If true, the current module name will be prepended to all description 78 | # unit titles (such as .. function::). 79 | # dd_module_names = True 80 | 81 | # If true, sectionauthor and moduleauthor directives will be shown in the 82 | # output. They are ignored by default. 83 | # show_authors = False 84 | 85 | # The name of the Pygments (syntax highlighting) style to use. 86 | pygments_style = 'sphinx' 87 | 88 | # A list of ignored prefixes for module index sorting. 89 | # modindex_common_prefix = [] 90 | 91 | 92 | # -- Options for HTML output --------------------------------------------------- 93 | 94 | # The theme to use for HTML and HTML Help pages. Major themes that come with 95 | # Sphinx are currently 'default' and 'sphinxdoc'. 96 | html_theme = 'default' 97 | 98 | # Theme options are theme-specific and customize the look and feel of a theme 99 | # further. For a list of options available for each theme, see the 100 | # documentation. 101 | # html_theme_options = {} 102 | 103 | # Add any paths that contain custom themes here, relative to this directory. 104 | # html_theme_path = [] 105 | 106 | # The name for this set of Sphinx documents. If None, it defaults to 107 | # " v documentation". 108 | # html_title = None 109 | 110 | # A shorter title for the navigation bar. Default is the same as html_title. 111 | # html_short_title = None 112 | 113 | # The name of an image file (relative to this directory) to place at the top 114 | # of the sidebar. 115 | # html_logo = None 116 | 117 | # The name of an image file (within the static path) to use as favicon of the 118 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 119 | # pixels large. 120 | # html_favicon = None 121 | 122 | # Add any paths that contain custom static files (such as style sheets) here, 123 | # relative to this directory. They are copied after the builtin static files, 124 | # so a file named "default.css" will overwrite the builtin "default.css". 125 | # html_static_path = ['html/_static'] 126 | 127 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 128 | # using the given strftime format. 129 | # html_last_updated_fmt = '%b %d, %Y' 130 | 131 | # If true, SmartyPants will be used to convert quotes and dashes to 132 | # typographically correct entities. 133 | # html_use_smartypants = True 134 | 135 | # Custom sidebar templates, maps document names to template names. 136 | # html_sidebars = {} 137 | 138 | # Additional templates that should be rendered to pages, maps page names to 139 | # template names. 140 | # html_additional_pages = {} 141 | 142 | # If false, no module index is generated. 143 | # html_use_modindex = True 144 | 145 | # If false, no index is generated. 146 | # html_use_index = True 147 | 148 | # If true, the index is split into individual pages for each letter. 149 | # html_split_index = False 150 | 151 | # If true, links to the reST sources are added to the pages. 152 | # html_show_sourcelink = True 153 | 154 | # If true, an OpenSearch description file will be output, and all pages will 155 | # contain a tag referring to it. The value of this option must be the 156 | # base URL from which the finished HTML is served. 157 | # html_use_opensearch = '' 158 | 159 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). 160 | # html_file_suffix = '' 161 | 162 | # Output file base name for HTML help builder. 163 | htmlhelp_basename = 'pyRservedoc' 164 | 165 | 166 | # -- Options for LaTeX output -------------------------------------------------- 167 | 168 | # The paper size ('letter' or 'a4'). 169 | # latex_paper_size = 'letter' 170 | 171 | # The font size ('10pt', '11pt' or '12pt'). 172 | # latex_font_size = '10pt' 173 | 174 | # Grouping the document tree into LaTeX files. List of tuples 175 | # (source start file, target name, title, author, documentclass [howto/manual]). 176 | latex_documents = [ 177 | ('index', 'pyRserve.tex', u'pyRserve Documentation', 178 | u'Ralph Heinkel', 'manual'), 179 | ] 180 | 181 | # The name of an image file (relative to this directory) to place at the top of 182 | # the title page. 183 | # latex_logo = None 184 | 185 | # For "manual" documents, if this is true, then toplevel headings are parts, 186 | # not chapters. 187 | # latex_use_parts = False 188 | 189 | # Additional stuff for the LaTeX preamble. 190 | # latex_preamble = '' 191 | 192 | # Documents to append as an appendix to all manuals. 193 | # latex_appendices = [] 194 | 195 | # If false, no module index is generated. 196 | # latex_use_modindex = True 197 | -------------------------------------------------------------------------------- /pyRserve/taggedContainers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some specialized list and array classes to store results obtained from R. These 3 | classes provide means not to only access object items by index but also - sort 4 | of like a dictionary - by key. However keys must not be unique or can even be 5 | None. In those cases only the first item with that key is found. 6 | 7 | Available classes: 8 | - TaggedList 9 | - TaggedArray 10 | """ 11 | import numpy 12 | 13 | 14 | class TaggedList(object): 15 | # This code is mainly based on UserList.UserList and modified for tags 16 | """ 17 | A tagged list is useful for additionally addressing individual items by 18 | name instead of only by index. In contrast to dictionaries multiple items 19 | can have the same name or key. However only the first one will be found. 20 | 21 | In many cases a TaggedList behaves like a normal list, however for lazyness 22 | reasons of the programmer not all methods are implemented yet. 23 | 24 | Example: 25 | l = TaggedList( [('v1', 1), ('v2', 2), 3, ('v2', 4)] ) 26 | l[0] # returns 1 27 | l['v1'] # returns 1 28 | l['v2'] # returns 2 (not 4 !) 29 | l[3] # returns 4 30 | 31 | Data can be appended or inserted in the following way: 32 | l.insert(0, x=3) 33 | l['x'] # returns 3 34 | l[0] # also returns 3 35 | 36 | l.append(y=3) 37 | l[-1] # returns 3 38 | """ 39 | def __init__(self, initlist=()): 40 | """ 41 | Items in initlist can either be 42 | - tuples of (key,values) 43 | - or plain values 44 | Keys can be None or empty strings in item tuples. 45 | """ 46 | self.values = [] 47 | self.keys = [] 48 | for idx, item in enumerate(initlist): 49 | try: 50 | key, value = item 51 | key = None if key == '' else key 52 | except (TypeError, ValueError): 53 | value = item 54 | key = None 55 | 56 | self.values.append(value) 57 | self.keys.append(key) 58 | 59 | def astuples(self): 60 | """ 61 | Convert a TaggedList into a representation suitable to be provided 62 | to __init__() 63 | """ 64 | return list(zip(self.keys, self.values)) 65 | 66 | def __repr__(self): 67 | data = ["%s=%s" % (key, repr(value)) if key else "'%s'" % value 68 | for key, value in self.astuples()] 69 | return '' % ', '.join(data) 70 | 71 | # def __lt__(self, other): return self.values < self.__cast(other) 72 | # def __le__(self, other): return self.values <= self.__cast(other) 73 | # def __eq__(self, other): return self.values == self.__cast(other) 74 | # def __ne__(self, other): return self.values != self.__cast(other) 75 | # def __gt__(self, other): return self.values > self.__cast(other) 76 | # def __ge__(self, other): return self.values >= self.__cast(other) 77 | # def __cast(self, other): 78 | # if isinstance(other, UserList): return other.data 79 | # else: return other 80 | # def __cmp__(self, other): 81 | # return cmp(self.values, self.__cast(other)) 82 | __hash__ = None # Mutable sequence, so not hashable 83 | 84 | def __eq__(self, other): 85 | if not isinstance(other, self.__class__): 86 | return False 87 | return self.__dict__ == other.__dict__ 88 | 89 | def __ne__(self, other): 90 | return not self.__eq__(other) 91 | 92 | def __contains__(self, item): 93 | return item in self.values 94 | 95 | def __len__(self): 96 | return len(self.values) 97 | 98 | def __getitem__(self, i): 99 | if type(i) == str: 100 | i = self.keys.index(i) 101 | return self.values[i] 102 | 103 | def __setitem__(self, i, item): 104 | if type(i) == str: 105 | i = self.keys.index[i] 106 | self.values[i] = item 107 | 108 | def __delitem__(self, i): 109 | if type(i) == str: 110 | i = self.keys.index[i] 111 | del self.keys[i] 112 | del self.values[i] 113 | 114 | def __getslice__(self, i, j): 115 | i = max(i, 0) 116 | j = max(j, 0) 117 | return self.__class__(self.astuples()[i:j]) 118 | 119 | # def __setslice__(self, i, j, other): 120 | # i = max(i, 0); j = max(j, 0) 121 | # if isinstance(other, UserList): 122 | # self.values[i:j] = other.data 123 | # elif isinstance(other, type(self.values)): 124 | # self.values[i:j] = other 125 | # else: 126 | # self.values[i:j] = list(other) 127 | 128 | def __delslice__(self, i, j): 129 | raise NotImplementedError() 130 | # i = max(i, 0); j = max(j, 0) 131 | # del self.values[i:j] 132 | # del self.keys[i:j] 133 | 134 | def __add__(self, other): 135 | raise NotImplementedError() 136 | # if isinstance(other, UserList): 137 | # return self.__class__(self.values + other.data) 138 | # elif isinstance(other, type(self.values)): 139 | # return self.__class__(self.values + other) 140 | # else: 141 | # return self.__class__(self.values + list(other)) 142 | 143 | def __radd__(self, other): 144 | raise NotImplementedError() 145 | # if isinstance(other, UserList): 146 | # return self.__class__(other.data + self.values) 147 | # elif isinstance(other, type(self.values)): 148 | # return self.__class__(other + self.values) 149 | # return self.__class__(list(other) + self.values) 150 | 151 | def __iadd__(self, other): 152 | raise NotImplementedError() 153 | # if isinstance(other, UserList): 154 | # self.values += other.data 155 | # elif isinstance(other, type(self.values)): 156 | # self.values += other 157 | # else: 158 | # self.values += list(other) 159 | # return self 160 | 161 | def __mul__(self, n): 162 | raise NotImplementedError() 163 | # return self.__class__(self.values*n) 164 | __rmul__ = __mul__ 165 | 166 | def __imul__(self, n): 167 | raise NotImplementedError() 168 | # self.values *= n 169 | # return self 170 | 171 | def append(self, *value, **key_and_value): 172 | """ 173 | Append an item to the list, either given as plain value or as a 174 | keyword-arg pair. 175 | Example: 176 | taggedlist.append(4) 177 | or 178 | taggedlist.append(k=4) 179 | """ 180 | if len(value) == 1 and not key_and_value: 181 | key = None 182 | value = value[0] 183 | elif len(key_and_value) == 1 and not value: 184 | [(key, value)] = key_and_value.items() 185 | else: 186 | raise ValueError("Only either one single value or one single pair " 187 | "of key/value is allowed") 188 | self.values.append(value) 189 | self.keys.append(key) 190 | 191 | def insert(self, i, *value, **key_and_value): 192 | """ 193 | Insert an item in the list at position i, either given as plain value 194 | or as a keyword-arg pair. 195 | Example: 196 | taggedlist.insert(4, 'abc) 197 | or 198 | taggedlist.append(4, k='abc') 199 | """ 200 | if len(value) == 1 and not key_and_value: 201 | key = None 202 | value = value[0] 203 | elif len(key_and_value) == 1 and not value: 204 | [(key, value)] = key_and_value.items() 205 | else: 206 | raise ValueError("Only either one single value or one single pair " 207 | "of key/value is allowed") 208 | self.values.insert(i, value) 209 | self.keys.insert(i, key) 210 | 211 | def pop(self, i=-1): 212 | """ 213 | Remove an item from the list. By default the last item will be removed. 214 | If an item at a specific position should be removed, pass an additional 215 | index arguemnt. 216 | """ 217 | return self.values.pop(i) 218 | 219 | def remove(self, item): 220 | raise NotImplementedError() 221 | # self.values.remove(item) 222 | 223 | def count(self, item): 224 | return self.values.count(item) 225 | 226 | def index(self, item, *args): 227 | return self.values.index(item, *args) 228 | 229 | def reverse(self): 230 | self.values.reverse() 231 | self.keys.reverse() 232 | 233 | def sort(self, *args, **kwds): 234 | raise NotImplementedError() 235 | # self.values.sort(*args, **kwds) 236 | 237 | def extend(self, other): 238 | raise NotImplementedError() 239 | # if isinstance(other, UserList): 240 | # self.values.extend(other.data) 241 | # else: 242 | # self.values.extend(other) 243 | 244 | 245 | class AttrArray(numpy.ndarray): 246 | """ 247 | numpy.ndarray with additional "attr"-container. 248 | Used as base class for TaggedArray. 249 | """ 250 | attr = None 251 | 252 | def __repr__(self): 253 | r = super(AttrArray, self).__repr__() 254 | if hasattr(self, 'attr'): 255 | return r[:-1] + ', attr=' + repr(self.attr) + ')' 256 | return r 257 | 258 | @classmethod 259 | def new(cls, data, attr): 260 | """ 261 | Factory method to create AttrArray objects from ndarrays or Python 262 | lists. 263 | Usage: 264 | AttrArray.new(array([1, 2, 3, 4]), {'attr1': val1, 'attr2': val2}) 265 | """ 266 | if not isinstance(data, numpy.ndarray): 267 | # assume it is a Python list or any other valid data type 268 | # for arrays 269 | arr = numpy.array(data) 270 | else: 271 | arr = data 272 | 273 | attrArr = arr.view(cls) 274 | attrArr.attr = attr 275 | return attrArr 276 | 277 | 278 | def asAttrArray(data, attr): 279 | return AttrArray.new(data, attr) 280 | 281 | 282 | class TaggedArray(AttrArray): 283 | """ 284 | A tagged array is useful for additionally addressing individual items by 285 | name instead of only by index. In contrast to dictionaries multiple items 286 | can have the same name or key. However only the first one will be found. 287 | 288 | In many cases a TaggedArray behaves like a normal array and is the 289 | equivalent for TaggedList. 290 | This class is basically only useful to translate results created by R into 291 | something useful in Python. 292 | 293 | Instances of TaggedArray should only be created using the factory function 294 | 'asTaggedArray([values)], [tags])', where 'values' and 'tags' can be plain 295 | python lists or numpy-arrays. 296 | 297 | Example: 298 | l = asTaggedArray(array([1, 2, 3, 4]), ['v1', 'v2', 'v3', 'v4']) 299 | l[0] # returns 1 300 | l['v1'] # returns 1 301 | l['v2'] # returns 2 (not 4 !) 302 | l[3] # returns 4 303 | 304 | It is recommended not to do lots of manipulations that modify the 305 | structure of the arrary. This could lead to mismatched btw. tags and 306 | values (those are only very loosely coupled internally). However any type 307 | of mathematics like multiplying the array should be possible without 308 | problems. 309 | """ 310 | attr = [] 311 | 312 | def __repr__(self): 313 | r = super(AttrArray, self).__repr__() 314 | if hasattr(self, 'attr'): 315 | return r[:-1] + ', key=' + repr(self.attr) + ')' 316 | return r 317 | 318 | def __getitem__(self, idx_or_name): 319 | try: 320 | return numpy.ndarray.__getitem__(self, idx_or_name) 321 | except Exception: 322 | pass 323 | try: 324 | return numpy.ndarray.__getitem__(self, 325 | self.attr.index(idx_or_name)) 326 | except ValueError: 327 | raise KeyError('No key "%s" available for array' % idx_or_name) 328 | 329 | def keys(self): 330 | return self.attr[:] 331 | 332 | @classmethod 333 | def new(cls, data, tags): 334 | """ 335 | Factory method to create TaggedArray objects from ndarrays or Python 336 | lists. 337 | Check the docs in TaggedArray for more information. 338 | Usage: 339 | l = TaggedArray.new(array([1, 2, 3, 4]), ['v1', 'v2', 'v3', 'v4']) 340 | l[0] # returns 1 341 | l['v1'] # returns 1 342 | l['v2'] # returns 2 (not 4 !) 343 | l[3] # returns 4 344 | """ 345 | if len(tags) != len(data): 346 | raise ValueError('Number of keys must match size of array') 347 | if not isinstance(data, numpy.ndarray): 348 | # assume it is a Python list or any other valid data type 349 | # for arrays 350 | arr = numpy.array(data) 351 | else: 352 | arr = data 353 | 354 | taggedArr = arr.view(cls) 355 | taggedArr.attr = tags 356 | return taggedArr 357 | 358 | 359 | def asTaggedArray(data, tags): 360 | return TaggedArray.new(data, tags) 361 | -------------------------------------------------------------------------------- /pyRserve/rtypes.py: -------------------------------------------------------------------------------- 1 | """ 2 | types module for pyRserve 3 | """ 4 | import numpy 5 | from pyRserve.misc import PY3 6 | 7 | # some general constants: 8 | SOCKET_BLOCK_SIZE = 4096 9 | MAX_INT32 = 2**31 - 1 10 | MIN_INT32 = -MAX_INT32 11 | 12 | # Rserve constants and mappings ############################################### 13 | 14 | # Main Rserve header size [bytes] 15 | RHEADER_SIZE = 16 16 | 17 | # Header sizes (in SEXPR) without and with XT_LARGE or DT_LARGE flag [bytes] 18 | SMALL_DATA_HEADER_SIZE = 4 19 | LARGE_DATA_HEADER_SIZE = 8 20 | 21 | 22 | CMD_RESP = 0x10000 # all responses have this flag set 23 | 24 | RESP_OK = CMD_RESP | 0x0001 # command succeeded; returned parameters depend 25 | # on the command issued 26 | RESP_ERR = CMD_RESP | 0x0002 # command failed, check stats code 27 | 28 | 29 | CMD_OOB = 0x20000 # out-of-band data - i.e. unsolicited messages 30 | 31 | OOB_SEND = CMD_OOB | 0x1000 # OOB send - unsolicited SEXP sent from the 32 | # R instance to the client. 12 LSB are 33 | # reserved for application-specific code 34 | OOB_MSG = CMD_OOB | 0x2000 # OOB message - unsolicited message sent 35 | # from the R instance to the client 36 | # requiring a response. 12 LSB are reserved 37 | # for application-specific code 38 | OOB_STREAM_READ = CMD_OOB | 0x4000 # OOB stream read request - server requests 39 | # streaming data from the client (typically 40 | # streaming input for computation) 41 | 42 | ############################################################################### 43 | # Error codes 44 | 45 | ERR_auth_failed = 0x41 # auth.failed or auth.requested but no 46 | # login came. in case of authentification 47 | # failure due to name/pwd mismatch, 48 | # server may send CMD_accessDenied instead 49 | 50 | ERR_conn_broken = 0x42 # connection closed or broken packet killed it 51 | ERR_inv_cmd = 0x43 # unsupported/invalid command 52 | ERR_inv_par = 0x44 # some parameters are invalid 53 | ERR_Rerror = 0x45 # R-error occured, usually followed by connection 54 | # shutdown 55 | ERR_IOerror = 0x46 # I/O error 56 | ERR_notOpen = 0x47 # attempt to perform fileRead/Write on closed file 57 | ERR_accessDenied = 0x48 # this answer is also valid on 58 | # CMD_login; otherwise it's sent 59 | # if the server deosn;t allow the user 60 | # to issue the specified command. 61 | # (e.g. some server admins may block 62 | ERR_unsupportedCmd = 0x49 # unsupported command 63 | ERR_unknownCmd = 0x4a # unknown command - the difference 64 | # between unsupported and unknown is that 65 | # unsupported commands are known to the 66 | # server but for some reasons (e.g. 67 | # platform dependent) it's not supported. 68 | # unknown commands are simply not recognized 69 | # by the server at all. 70 | 71 | # The following ERR_.. exist since 1.23/0.1-6 72 | ERR_data_overflow = 0x4b # incoming packet is too big. 73 | # currently there is a limit as of the 74 | # size of an incoming packet. 75 | ERR_object_too_big = 0x4c # the requested object is too big 76 | # to be transported in that way. 77 | # If received after CMD_eval then 78 | # the evaluation itself was successful. 79 | # optional parameter is the size of the object 80 | 81 | # since 1.29/0.1-9 82 | ERR_out_of_mem = 0x4d # out of memory. the connection is usually 83 | # closed after this error was sent 84 | # since 0.6-0 85 | ERR_ctrl_closed = 0x4e # control pipe to the master process is closed 86 | # or broken 87 | 88 | # since 0.4-0 89 | ERR_session_busy = 0x50 # session is still busy 90 | ERR_detach_failed = 0x51 # unable to detach seesion (cannot determine 91 | # peer IP or problems creating a listening socket 92 | # for resume) 93 | 94 | # pack all error codes with their names into a dictionary: 95 | ERRORS = dict([(errCode, err_name) for (err_name, errCode) in locals().items() 96 | if err_name.startswith('ERR_')]) 97 | 98 | 99 | ############################################################################### 100 | # Available commands 101 | 102 | CMD_login = 0x001 # "name\npwd" : - 103 | CMD_voidEval = 0x002 # string : - 104 | CMD_eval = 0x003 # string : encoded SEXP 105 | CMD_shutdown = 0x004 # [admin-pwd] : - 106 | 107 | # file I/O routines. server may answe 108 | CMD_openFile = 0x010 # fn : - 109 | CMD_createFile = 0x011 # fn : - 110 | CMD_closeFile = 0x012 # - : - 111 | CMD_readFile = 0x013 # [int size] : data... ; if size not present, 112 | # server is free to choose any value - usually 113 | # it uses the size of its static buffer 114 | CMD_writeFile = 0x014 # data : - 115 | CMD_removeFile = 0x015 # fn : - 116 | 117 | # object manipulation 118 | CMD_setSEXP = 0x020 # string(name), REXP : - 119 | CMD_assignSEXP = 0x021 # string(name), REXP : - ; same as setSEXP 120 | # except that the name is parsed 121 | 122 | # session management (since 0.4-0) 123 | CMD_detachSession = 0x030 # : session key 124 | CMD_detachedVoidEval = 0x031 # string : session key; doesn't 125 | CMD_attachSession = 0x032 # session key : - 126 | 127 | # control commands (since 0.6-0) - passed on to the master process 128 | # Note: currently all control commands are asychronous, i.e. RESP_OK 129 | # indicates that the command was enqueued in the master pipe, but there 130 | # is no guarantee that it will be processed. Moreover non-forked 131 | # connections (e.g. the default debug setup) don't process any 132 | # control commands until the current client connection is closed so 133 | # the connection issuing the control command will never see its result. 134 | 135 | CMD_ctrl = 0x40 # -- not a command - just a constant -- 136 | CMD_ctrlEval = 0x42 # string : - 137 | CMD_ctrlSource = 0x45 # string : - 138 | CMD_ctrlShutdown = 0x44 # - : - 139 | 140 | # 'internal' commands (since 0.1-9) 141 | CMD_setBufferSize = 0x081 # [int sendBufSize] 142 | # this commad allow clients to request 143 | # bigger buffer sizes if large data is to be 144 | # transported from Rserve to the client. 145 | # (incoming buffer is resized automatically) 146 | 147 | CMD_setEncoding = 0x082 # string (one of "native","latin1","utf8") 148 | 149 | # special commands - the payload of packages with this mask does not contain 150 | # defined parameters 151 | 152 | CMD_SPECIAL_MASK = 0xf0 153 | 154 | CMD_serEval = 0xf5 # serialized eval - the packets are raw 155 | # serialized data without data header 156 | CMD_serAssign = 0xf6 # serialized assign - serialized list with 157 | # [[1]]=name, [[2]]=value 158 | CMD_serEEval = 0xf7 # serialized expression eval - like serEval with 159 | # one additional evaluation round 160 | 161 | 162 | ############################################################################### 163 | # Data types for the transport protocol (QAP1) do NOT confuse with any 164 | # XT_.. values. 165 | 166 | DT_INT = 0x01 # int 167 | DT_CHAR = 0x02 # char 168 | DT_DOUBLE = 0x03 # double 169 | DT_STRING = 0x04 # 0 terminted string 170 | DT_BYTESTREAM = 0x05 # stream of bytes (unlike DT_STRING may contain 0) 171 | DT_SEXP = 0x0A # encoded SEXP 172 | 173 | DT_ARRAY = 0x0B # array of objects (i.e. first 4 bytes specify how 174 | # many subsequent objects are part of the array; 175 | # 0 is legitimate) 176 | DT_LARGE = 0x40 # new in 0102: if this flag is set then the length of 177 | # the object is coded as 56-bit integer enlarging 178 | # the header by 4 bytes 179 | 180 | ############################################################################### 181 | # XpressionTypes 182 | 183 | # REXP - R expressions are packed in the same way as command parameters 184 | # transport format of the encoded Xpressions: 185 | # [0] int type/len (1 byte type, 3 bytes len - same as SET_PAR) 186 | # [4] REXP attr (if bit 8 in type is set) 187 | # [4/8] data .. 188 | 189 | XT_NULL = 0x00 # P data: [0] 190 | XT_INT = 0x01 # - data: [4]int 191 | XT_DOUBLE = 0x02 # - data: [8]double 192 | XT_STR = 0x03 # P data: [n]char null-term. strg. 193 | XT_LANG = 0x04 # - data: same as XT_LIST 194 | XT_SYM = 0x05 # - data: [n]char symbol name 195 | XT_BOOL = 0x06 # - data: [1]byte boolean (1=TRUE, 0=FALSE, 2=NA) 196 | 197 | XT_S4 = 0x07 # P data: [0] 198 | 199 | XT_BYTE = 0x08 # extension for pyRserve 200 | XT_INT3 = 0x09 # extension for pyRserve, a 3-byte integer as used 201 | # in REXP 202 | XT_INT7 = 0x0A # extension for pyRserve, a 7-byte integer as used 203 | # in REXP 204 | 205 | XT_VECTOR = 0x10 # 16dec: P data: [?]REXP,REXP,.. 206 | XT_LIST = 0x11 # 17dec: - X head, X vals, X tag (since 0.1-5) 207 | XT_CLOS = 0x12 # 18dec: P X formals, X body (closure; since 0.1-5) 208 | XT_SYMNAME = 0x13 # 19dec: s same as XT_STR (since 0.5) 209 | XT_LIST_NOTAG = 0x14 # 20dec: s same as XT_VECTOR (since 0.5) 210 | XT_LIST_TAG = 0x15 # 21dec: P X tag, X val, Y tag, Y val, (since 0.5) 211 | XT_LANG_NOTAG = 0x16 # 22dec: s same as XT_LIST_NOTAG (since 0.5) 212 | XT_LANG_TAG = 0x17 # 23dec: s same as XT_LIST_TAG (since 0.5) 213 | XT_VECTOR_EXP = 0x1a # 26dec: s same as XT_VECTOR (since 0.5) 214 | XT_VECTOR_STR = 0x1b # 27dec: - same as XT_VECTOR (since 0.5 but unused, 215 | # use XT_ARRAY_STR instead) 216 | 217 | XT_ARRAY_INT = 0x20 # 32dec: P data: [n*4]int,int,.. 218 | XT_ARRAY_DOUBLE = 0x21 # 33dec: P data: [n*8]double,double,.. 219 | XT_ARRAY_STR = 0x22 # 34dec: P data: string,string,.. ( 220 | # string=byte,byte,...,0) padded with '\01' 221 | XT_ARRAY_BOOL_UA = 0x23 # 35dec: - data: [n]byte,byte,.. 222 | # (unaligned! NOT supported anymore) 223 | XT_ARRAY_BOOL = 0x24 # 36dec: P data: int(n),byte,byte,... 224 | XT_RAW = 0x25 # 37dec: P data: int(n),byte,byte,... 225 | XT_ARRAY_CPLX = 0x26 # 38dec: P data: [n*16]double,double,... 226 | # (Re,Im,Re,Im,...) 227 | 228 | XT_UNKNOWN = 0x30 # 48dec: P data: [4]int - SEXP 229 | # type (as from TYPEOF(x)) 230 | # | 231 | # +--- interesting flags for client implementations: 232 | # P = primary type 233 | # s = secondary type - its decoding is identical to 234 | # a primary type and thus the client doesn't need 235 | # to decode it separately. 236 | # - = deprecated/removed. if a client doesn't need to 237 | # support old Rserve versions, those can be 238 | # safely skipped. 239 | # Total primary: 4 trivial types (NULL, STR, S4, UNKNOWN) + 6 array types + 240 | # 3 recursive types 241 | 242 | 243 | XT_LARGE = 0x40 # 64dec: new in 0102: if this flag is set then the 244 | # length of the object is coded as 56-bit integer 245 | # enlarging the header by 4 bytes 246 | XT_HAS_ATTR = 0x80 # 128dec: flag; if set, the following REXP is the 247 | # attribute the use of attributes and vectors 248 | # results in recursive storage of REXPs 249 | 250 | # Build up a dictionary that translates all codes for XT_* and DT_* constants 251 | # into their names: 252 | 253 | XTs = dict([(rTypeCode, xt_name) for (xt_name, rTypeCode) in locals().items() 254 | if xt_name.startswith('XT_')]) 255 | DTs = dict([(rTypeCode, dt_name) for (dt_name, rTypeCode) in locals().items() 256 | if dt_name.startswith('DT_')]) 257 | 258 | 259 | BOOL_TRUE = 1 260 | BOOL_FALSE = 0 261 | BOOL_NA = 2 262 | 263 | VALID_R_TYPES = [ 264 | DT_SEXP, XT_BOOL, XT_INT, XT_DOUBLE, XT_STR, XT_SYMNAME, XT_VECTOR, 265 | XT_LIST_TAG, XT_LANG_TAG, XT_LIST_NOTAG, XT_LANG_NOTAG, XT_CLOS, 266 | XT_ARRAY_BOOL, XT_ARRAY_INT, XT_ARRAY_DOUBLE, XT_ARRAY_CPLX, XT_ARRAY_STR, 267 | XT_VECTOR_EXP, XT_NULL, XT_UNKNOWN, XT_RAW, XT_S4 268 | ] 269 | 270 | STRING_TYPES = [str, numpy.string_, numpy.str_] 271 | if not PY3: 272 | STRING_TYPES.append(unicode) # noqa: F821 'unicode' unknown in Python3 273 | 274 | ############################################################################### 275 | # Mapping btw. numpy and R data types, in both directions 276 | 277 | # map r-types and some python types to typecodes used in the 'struct' module 278 | structMap = { 279 | XT_BOOL: 'b', 280 | bool: 'b', 281 | XT_BYTE: 'B', 282 | XT_INT: 'i', 283 | int: 'i', 284 | numpy.int32: 'i', 285 | XT_INT3: 'i', 286 | XT_INT7: 'q', # 64 bit integer 287 | XT_DOUBLE: 'd', # double (float64) 288 | float: 'd', 289 | numpy.double: 'd', 290 | complex: 'd', 291 | complex: 'd', 292 | numpy.complex128: 'd', 293 | } 294 | 295 | # mapping to determine overall type of message. 296 | DT_Map = { 297 | str: DT_STRING, 298 | int: DT_INT, 299 | float: DT_DOUBLE, 300 | } 301 | 302 | 303 | numpyMap = { 304 | XT_ARRAY_BOOL: numpy.bool_, 305 | XT_ARRAY_INT: numpy.int32, 306 | XT_ARRAY_DOUBLE: numpy.double, # double float64 307 | XT_ARRAY_CPLX: complex, 308 | XT_ARRAY_STR: numpy.string_, 309 | } 310 | 311 | # also add the inverse mapping to it: 312 | for k, v in list(numpyMap.items()): 313 | numpyMap[v] = k 314 | 315 | # some manual additions for numpy variants: 316 | numpyMap[numpy.complex128] = XT_ARRAY_CPLX 317 | numpyMap[numpy.int32] = XT_ARRAY_INT 318 | numpyMap[numpy.int64] = XT_ARRAY_INT 319 | numpyMap[numpy.compat.long] = XT_ARRAY_INT 320 | numpyMap[numpy.str_] = XT_ARRAY_STR 321 | numpyMap[numpy.unicode_] = XT_ARRAY_STR 322 | 323 | 324 | atom2ArrMap = { 325 | # map atomic python objects to their array counterparts in R 326 | int: XT_ARRAY_INT, 327 | numpy.int32: XT_ARRAY_INT, 328 | float: XT_ARRAY_DOUBLE, 329 | numpy.double: XT_ARRAY_DOUBLE, 330 | complex: XT_ARRAY_CPLX, 331 | numpy.complex128: XT_ARRAY_CPLX, 332 | str: XT_ARRAY_STR, 333 | numpy.str_: XT_ARRAY_STR, 334 | numpy.string_: XT_ARRAY_STR, 335 | numpy.unicode_: XT_ARRAY_STR, 336 | bool: XT_ARRAY_BOOL, 337 | } 338 | -------------------------------------------------------------------------------- /pyRserve/rserializer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Serializer class to convert Python objects into a binary data stream for 3 | sending them to Rserve. 4 | """ 5 | import os 6 | import io 7 | import struct 8 | import socket 9 | 10 | import numpy 11 | 12 | from . import rtypes 13 | from .misc import PY3, FunctionMapper, byteEncode, padLen4, string2bytesPad4 14 | from .taggedContainers import TaggedList, TaggedArray 15 | 16 | # turn on DEBUG to see extra information about what the serializer is 17 | # doing with your data 18 | DEBUG = 0 19 | 20 | NoneType = type(None) 21 | 22 | if PY3: 23 | # make test work with Python 3 where 'long'-type does not exist: 24 | long = int 25 | 26 | 27 | class RSerializer(object): 28 | """ 29 | Class to to serialize Python objects into a binary data stream for sending 30 | them to Rserve. 31 | 32 | Depending on 'commandType' given to __init__ the resulting binary string 33 | can be used to send a command, to assign a variable in Rserve, or to 34 | reply to a request received from Rserve. 35 | """ 36 | serializeMap = {} 37 | fmap = FunctionMapper(serializeMap) 38 | 39 | def __init__(self, commandType, fp=None): 40 | if isinstance(fp, socket.socket): 41 | # kwargs = {'mode': 'b'} if PY3 else {} 42 | self._fp = fp 43 | self._buffer = io.BytesIO() 44 | elif not fp: 45 | self._buffer = fp or io.BytesIO() 46 | self._fp = None 47 | else: 48 | # expect fp to be a seekable file(-like) object: 49 | self._buffer = self._fp = fp 50 | self._dataSize = 0 51 | self._writeHeader(commandType) 52 | 53 | def _getRetVal(self): 54 | if self._fp is self._buffer: 55 | # file(-like) object - data has been written, nothing to return 56 | return None 57 | elif not self._fp: 58 | # data has only been written into buffer, so return its value: 59 | return self._buffer.getvalue() 60 | else: 61 | # i.e. socket: write result of _fp into socket-fp 62 | self._fp.send(self._buffer.getvalue()) 63 | return None 64 | 65 | def _writeHeader(self, commandType): 66 | """Write main header of message for Rserve""" 67 | # Set length to zero initially, will be fixed in _finalizerHeader() 68 | # when msg size is determined: 69 | msg_length_lower = msg_length_higher = 0 70 | data_offset = 0 71 | header = struct.pack(' Set the length of the entire data package in the general message hdr 80 | as number of bytes of the entire message minus the general hdr 81 | """ 82 | # Jump to end of buffer to determine its length: 83 | self._buffer.seek(0, os.SEEK_END) 84 | messageSize = self._buffer.tell() - rtypes.RHEADER_SIZE 85 | if DEBUG: 86 | print('writing size of header: %2d' % messageSize) 87 | # Goto position 4 of the general Rserve package header and write the 88 | # size of the overall rserve message there. For message size > 2**32 89 | # the size is split into two parts, the lower 32 bits are written at 90 | # position 4, the higher part is written at position 12 (see QAP1 docs) 91 | bin_messageSize = struct.pack(' 1: 204 | xt_tag_list.append((b'dim', numpy.array(o.shape, numpy.int32))) 205 | if isinstance(o, TaggedArray): 206 | xt_tag_list.append((b'names', numpy.array(o.attr))) 207 | 208 | attrFlag = rtypes.XT_HAS_ATTR if xt_tag_list else 0 209 | rTypeCode = rtypes.numpyMap[o.dtype.type] | attrFlag 210 | # write length of zero for now, will be corrected later: 211 | self._writeDataHeader(rTypeCode, 0) 212 | if attrFlag: 213 | self.s_xt_tag_list(xt_tag_list) 214 | return rTypeCode 215 | 216 | def __s_update_xt_array_header(self, headerPos, rTypeCode): 217 | """ 218 | Update length information of xt array header which has been 219 | previously temporarily set to 0 in __s_write_xt_array_tag_data() 220 | @arg headerPos: file position where header information should be 221 | written. 222 | @arg rTypeCode 223 | """ 224 | # subtract length of data header (8 bytes), does not count to payload! 225 | length = self._buffer.tell() - headerPos - rtypes.LARGE_DATA_HEADER_SIZE 226 | self._buffer.seek(headerPos) 227 | self._writeDataHeader(rTypeCode, length) 228 | self._buffer.seek(0, os.SEEK_END) 229 | 230 | @fmap(*rtypes.STRING_TYPES) 231 | def s_xt_array_single_str(self, o): 232 | """Serialize single string object""" 233 | arr = numpy.array([o]) 234 | self.s_xt_array_str(arr) 235 | 236 | @fmap(rtypes.XT_ARRAY_STR) 237 | def s_xt_array_str(self, o): 238 | """Serialize array of strings""" 239 | startPos = self._buffer.tell() 240 | rTypeCode = self.__s_write_xt_array_tag_data(o) 241 | 242 | # reshape into 1d array: 243 | o1d = o.reshape(o.size, order='F') 244 | # Byte-encode them: 245 | bo = [byteEncode(d) for d in o1d] 246 | # add empty string to that the following join with \0 adds an 247 | # additional zero at the end of the last string! 248 | bo.append(b'') 249 | # Concatenate them as null-terminated strings: 250 | nullTerminatedStrings = b'\0'.join(bo) 251 | 252 | padLength = padLen4(nullTerminatedStrings) 253 | self._buffer.write(nullTerminatedStrings) 254 | self._buffer.write(b'\1\1\1\1'[:padLength]) 255 | 256 | # Update the array header: 257 | self.__s_update_xt_array_header(startPos, rTypeCode) 258 | 259 | @fmap(bool, numpy.bool_) 260 | def s_atom_to_xt_array_boolean(self, o): 261 | """ 262 | Render single boolean items into their corresponding array 263 | counterpart in R. 264 | Always convert a boolean atomic value into a specialized boolean 265 | R vector. 266 | """ 267 | arr = numpy.array([o]) 268 | self.s_xt_array_boolean(arr) 269 | 270 | @fmap(rtypes.XT_ARRAY_BOOL) 271 | def s_xt_array_boolean(self, o): 272 | """ 273 | - o: numpy array or subclass (e.g. TaggedArray) with boolean values 274 | Note: If o is multi-dimensional a tagged array is created. Also if o 275 | is of type TaggedArray. 276 | """ 277 | startPos = self._buffer.tell() 278 | rTypeCode = self.__s_write_xt_array_tag_data(o) 279 | 280 | # A boolean vector starts with its number of boolean values in the 281 | # vector (as int32): 282 | structCode = '<'+rtypes.structMap[int] 283 | self._buffer.write(struct.pack(structCode, o.size)) 284 | # Then write the boolean values themselves. Note that R expects binary 285 | # array data in Fortran order, so prepare this accordingly: 286 | data = o.tobytes(order='F') 287 | self._buffer.write(data) 288 | # Finally pad the binary data to be of a multiple of four in length: 289 | self._buffer.write(padLen4(data) * b'\xff') 290 | 291 | # Update the array header: 292 | self.__s_update_xt_array_header(startPos, rTypeCode) 293 | 294 | @fmap(int, numpy.int32, long, numpy.int64, numpy.compat.long, float, complex, 295 | numpy.float64, numpy.complex64, numpy.complex128) 296 | def s_atom_to_xt_array_numeric(self, o): 297 | """ 298 | Render single numeric items into their corresponding array counterpart 299 | in R 300 | """ 301 | if isinstance(o, (int, long, numpy.int64, numpy.compat.long)): 302 | if rtypes.MIN_INT32 <= o <= rtypes.MAX_INT32: 303 | # even though this type of data is 'long' it still fits into a 304 | # normal integer. Good! 305 | o = int(o) 306 | else: 307 | raise ValueError('Cannot serialize long integers larger than ' 308 | 'MAX_INT32 (**31-1)') 309 | 310 | rTypeCode = rtypes.atom2ArrMap[type(o)] 311 | structCode = '<'+rtypes.structMap[type(o)] 312 | length = struct.calcsize(structCode) 313 | if type(o) is complex: 314 | self._writeDataHeader(rTypeCode, length*2) 315 | self._buffer.write(struct.pack(structCode, o.real)) 316 | self._buffer.write(struct.pack(structCode, o.imag)) 317 | else: 318 | self._writeDataHeader(rTypeCode, length) 319 | self._buffer.write(struct.pack(structCode, o)) 320 | 321 | @fmap(rtypes.XT_ARRAY_CPLX, rtypes.XT_ARRAY_DOUBLE, rtypes.XT_ARRAY_INT) 322 | def s_xt_array_numeric(self, o): 323 | """ 324 | @param o: numpy array or subclass (e.g. TaggedArray) 325 | @note: If o is multi-dimensional a tagged array is created. Also if o 326 | is of type TaggedArray. 327 | """ 328 | if o.dtype in (numpy.int64, numpy.compat.long): 329 | # Note: use int instead of compat.long once Py2 is abandoned. 330 | if rtypes.MIN_INT32 <= o.min() and o.max() <= rtypes.MAX_INT32: 331 | # even though this type of array is 'long' its values still 332 | # fit into a normal int32 array. Good! 333 | o = o.astype(numpy.int32) 334 | else: 335 | raise ValueError('Cannot serialize long integer arrays with ' 336 | 'values outside MAX_INT32 (2**31-1) range') 337 | 338 | startPos = self._buffer.tell() 339 | rTypeCode = self.__s_write_xt_array_tag_data(o) 340 | 341 | # TODO: make this also work on big endian machines (data must be 342 | # written in little-endian!!) 343 | 344 | # Note: R expects binary array data in Fortran order, so prepare this 345 | # accordingly: 346 | self._buffer.write(o.tobytes(order='F')) 347 | 348 | # Update the array header: 349 | self.__s_update_xt_array_header(startPos, rTypeCode) 350 | 351 | # ############## Vectors and Tag lists #################################### 352 | 353 | @fmap(list, TaggedList) 354 | def s_xt_vector(self, o): 355 | """Render all objects of given python list into generic r vector""" 356 | startPos = self._buffer.tell() 357 | # remember start position for calculating length in bytes of entire 358 | # list content 359 | attrFlag = rtypes.XT_HAS_ATTR if o.__class__ == TaggedList else 0 360 | self._writeDataHeader(rtypes.XT_VECTOR | attrFlag, 0) 361 | if attrFlag: 362 | self.s_xt_tag_list([(b'names', numpy.array(o.keys))]) 363 | for v in o: 364 | self.serializeExpr(v) 365 | length = self._buffer.tell() - startPos 366 | self._buffer.seek(startPos) 367 | # now write header again with correct length information 368 | # subtract length of list data header: 369 | self._writeDataHeader(rtypes.XT_VECTOR | attrFlag, 370 | length - rtypes.LARGE_DATA_HEADER_SIZE) 371 | self._buffer.seek(0, os.SEEK_END) 372 | 373 | def s_xt_tag_list(self, o): 374 | startPos = self._buffer.tell() 375 | self._writeDataHeader(rtypes.XT_LIST_TAG, 0) 376 | for tag, data in o: 377 | self.serializeExpr(data) 378 | self.s_string_or_symbol(tag, rTypeCode=rtypes.XT_SYMNAME) 379 | length = self._buffer.tell() - startPos 380 | self._buffer.seek(startPos) 381 | # now write header again with correct length information 382 | # subtract length of list data header: 383 | self._writeDataHeader(rtypes.XT_LIST_TAG, 384 | length - rtypes.LARGE_DATA_HEADER_SIZE) 385 | self._buffer.seek(0, os.SEEK_END) 386 | 387 | # ########################################################## 388 | # ### class methods for calling specific Rserv functions ### 389 | 390 | @classmethod 391 | def rEval(cls, aString, fp=None, void=False): 392 | """ 393 | Create binary code for evaluating a string expression remotely in 394 | Rserve 395 | """ 396 | cmd = rtypes.CMD_voidEval if void else rtypes.CMD_eval 397 | s = cls(cmd, fp=fp) 398 | s.serialize(aString, dtTypeCode=rtypes.DT_STRING) 399 | return s.finalize() 400 | 401 | @classmethod 402 | def rAssign(cls, varname, o, fp=None): 403 | """ 404 | Create binary code for assigning an expression to a variable remotely 405 | in Rserve 406 | """ 407 | s = cls(rtypes.CMD_setSEXP, fp=fp) 408 | s.serialize(varname, dtTypeCode=rtypes.DT_STRING) 409 | s.serialize(o, dtTypeCode=rtypes.DT_SEXP) 410 | return s.finalize() 411 | 412 | @classmethod 413 | def rShutdown(cls, fp=None): 414 | s = cls(rtypes.CMD_shutdown, fp=fp) 415 | return s.finalize() 416 | 417 | @classmethod 418 | def rSerializeResponse(cls, Rexp, fp=None): 419 | # mainly used for unittesting 420 | s = cls(rtypes.RESP_OK, fp=fp) 421 | s.serialize(Rexp, dtTypeCode=rtypes.DT_SEXP) 422 | return s.finalize() 423 | 424 | 425 | # Some shortcuts: 426 | rEval = RSerializer.rEval 427 | rAssign = RSerializer.rAssign 428 | rSerializeResponse = RSerializer.rSerializeResponse 429 | rShutdown = RSerializer.rShutdown 430 | -------------------------------------------------------------------------------- /pyRserve/rconn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Module providing functionality to connect to a running Rserve instance 4 | """ 5 | import socket 6 | import time 7 | import pydoc 8 | 9 | from . import rtypes 10 | from .rexceptions import RConnectionRefused, REvalError, PyRserveClosed 11 | from .rserializer import rEval, rAssign, rSerializeResponse, rShutdown 12 | from .rparser import rparse, OOBMessage 13 | from .misc import hexString 14 | 15 | RSERVEPORT = 6311 16 | DEBUG = False 17 | 18 | 19 | def _defaultOOBCallback(data, code=0): # noqa 20 | return None 21 | 22 | 23 | class OOBCallback(object): 24 | """Sets up conn with a new callback when entering the `with` block and 25 | restores the old one when exiting 26 | """ 27 | def __init__(self, conn, callback): 28 | self.conn = conn 29 | self.callback = callback 30 | 31 | def __enter__(self): 32 | self.old_callback = self.conn.oobCallback 33 | self.conn.oobCallback = self.callback 34 | return self.conn 35 | 36 | def __exit__(self, exc_type, exc_value, traceback): 37 | self.conn.oobCallback = self.old_callback 38 | 39 | 40 | def connect(host='', port=RSERVEPORT, unix_socket=None, atomicArray=False, defaultVoid=False, 41 | oobCallback=_defaultOOBCallback): 42 | """Open a connection to an Rserve instance 43 | Params: 44 | - host: provide hostname where Rserve runs, or leave as empty string to 45 | connect to localhost 46 | - port: Rserve port number, defaults to 6311 47 | - unix_socket: Unix Socket path (use in place of (host,port)) 48 | - atomicArray: 49 | If True: when a result from an Rserve call is an array with 50 | a single element that single element 51 | is returned. Otherwise the array is returned unmodified. 52 | Default: True 53 | - arrayOrder: 54 | The order in which data in multi-dimensional arrays is returned. 55 | Provide 'C' for c-order, F for fortran. Default: 'C' 56 | - defaultVoid: 57 | If True then calls to conn.r('..') don't return a result by default 58 | - oobCallback: 59 | Callback to be executed when self.oobSend/oobMessage is called from 60 | R. The callback receives the submitted data and a user code as 61 | parameters. If self.oobMessage was used, the result value of the 62 | callback is sent back to R. 63 | Default: lambda data, code=0: None (oobMessage will return NULL) 64 | """ 65 | if host in (None, ''): 66 | # On Win32 it seems that passing an empty string as 'localhost' does 67 | # not work. So just to be sure provide the full local hostname if None 68 | # or '' were passed. 69 | host = 'localhost' 70 | assert port is not None, 'port number must be given' 71 | return RConnector(host, port, unix_socket, atomicArray, defaultVoid, oobCallback) 72 | 73 | 74 | def checkIfClosed(func): 75 | def decoCheckIfClosed(self, *args, **kw): 76 | if self.isClosed: 77 | raise PyRserveClosed('Connection to Rserve already closed') 78 | try: 79 | return func(self, *args, **kw) 80 | except socket.error as msg: 81 | if msg.strerror in ['Connection reset by peer', 'Broken pipe']: 82 | # seems like the connection to Rserve has died, so mark 83 | # the connection as closed 84 | self.close() 85 | raise PyRserveClosed('Connection to Rserve already closed') 86 | else: 87 | raise 88 | return decoCheckIfClosed 89 | 90 | 91 | class RConnector(object): 92 | """Provide a network connector to an Rserve process""" 93 | def __init__(self, host, port, unix_socket, atomicArray, defaultVoid, 94 | oobCallback=_defaultOOBCallback): 95 | self.sock = None 96 | self.__closed = True 97 | self.host = host 98 | self.port = port 99 | self.unix_socket = unix_socket 100 | self.atomicArray = atomicArray 101 | self.defaultVoid = defaultVoid 102 | self.oobCallback = oobCallback 103 | self.r = RNameSpace(self) 104 | self.ref = RNameSpaceReference(self) 105 | self.connect() 106 | 107 | def __repr__(self): 108 | txt = 'Closed handle' if self.isClosed else 'Handle' 109 | if self.unix_socket: 110 | return '<%s to Rserve on %s>' % \ 111 | (txt, self.unix_socket) 112 | else: 113 | return '<%s to Rserve on %s:%s>' % \ 114 | (txt, self.host or 'localhost', self.port) 115 | 116 | @property 117 | def isClosed(self): 118 | return self.__closed 119 | 120 | def connect(self): 121 | if self.unix_socket: 122 | self.sock = socket.socket(socket.AF_UNIX) 123 | try: 124 | self.sock.connect(self.unix_socket) 125 | except socket.error: 126 | raise RConnectionRefused('Connection denied, server not reachable ' 127 | 'or not accepting connections') 128 | else: 129 | self.sock = socket.socket() 130 | try: 131 | self.sock.connect((self.host, self.port)) 132 | except socket.error: 133 | raise RConnectionRefused('Connection denied, server not reachable ' 134 | 'or not accepting connections') 135 | time.sleep(0.2) 136 | hdr = self.sock.recv(1024) 137 | self.__closed = False 138 | if DEBUG: 139 | print('received hdr %s from rserve' % hdr) 140 | # make sure we are really connected with rserv 141 | assert hdr.startswith(b'Rsrv01'), \ 142 | 'Protocol error with Rserv, obtained invalid header string' 143 | # TODO: possibly also do version checking here to make sure we 144 | # understand the protocol... 145 | 146 | @checkIfClosed 147 | def close(self): 148 | """Close network connection to rserve""" 149 | self.sock.close() 150 | self.__closed = True 151 | 152 | @checkIfClosed 153 | def shutdown(self): 154 | rShutdown(fp=self.sock) 155 | self.close() 156 | 157 | def _reval(self, aString, void): 158 | rEval(aString, fp=self.sock, void=void) 159 | 160 | def _rrespond(self, aObj): 161 | rSerializeResponse(aObj, fp=self.sock) 162 | 163 | @checkIfClosed 164 | def eval(self, aString, atomicArray=None, void=False): 165 | """ 166 | Evaluate a string expression through Rserve and return the result 167 | transformed into python objects 168 | """ 169 | if not type(aString in rtypes.STRING_TYPES): 170 | raise TypeError('Only string evaluation is allowed') 171 | self._reval(aString, void) 172 | if DEBUG: 173 | # Read entire data into memory en bloque, it's easier to debug 174 | src = self._receive() 175 | print('Raw response: %s' % hexString(src)) 176 | else: 177 | src = self.sock 178 | 179 | if atomicArray is None: 180 | # if not specified, use the global default: 181 | atomicArray = self.atomicArray 182 | 183 | try: 184 | message = rparse(src, atomicArray=atomicArray) 185 | # Before the result is returned, 0-∞ OOB messages may be sent 186 | while isinstance(message, OOBMessage): 187 | if DEBUG: 188 | print('OOB Message received:', message) 189 | ret = self.oobCallback(message.data, message.userCode) 190 | if message.type == rtypes.OOB_MSG: 191 | self._rrespond(ret) 192 | 193 | if isinstance(src, (str, bytes)): 194 | # This is no stream, so we have to cut off data 195 | src = src[len(message):] 196 | 197 | message = rparse(src, atomicArray=atomicArray) 198 | return message 199 | except REvalError: 200 | # R has reported an evaluation error, so let's obtain a descriptive 201 | # explanation about why the error has occurred. R allows to 202 | # retrieve the error message of the last exception via a built-in 203 | # function called 'geterrmessage()'. 204 | errorMsg = self.eval('geterrmessage()').strip() 205 | raise REvalError(errorMsg) 206 | 207 | @checkIfClosed 208 | def voidEval(self, aString): 209 | """ 210 | Evaluate a string expression through Rserve without returning 211 | any result data 212 | """ 213 | self.eval(aString, void=True) 214 | 215 | @checkIfClosed 216 | def _receive(self): 217 | """Receive the result from a previous call to rserve.""" 218 | raw = self.sock.recv(rtypes.SOCKET_BLOCK_SIZE) 219 | d = [raw] 220 | while len(raw) == rtypes.SOCKET_BLOCK_SIZE: 221 | raw = self.sock.recv(rtypes.SOCKET_BLOCK_SIZE) 222 | d.append(raw) 223 | return ''.join(d) 224 | 225 | # @checkIfClosed 226 | # def _raw(self, *args, **kw): 227 | # self.send(*args) 228 | # return self.receive() 229 | 230 | @checkIfClosed 231 | def setRexp(self, name, o): 232 | """ 233 | Convert a python object into an RExp and bind it to a variable 234 | called "name" in the R namespace 235 | """ 236 | rAssign(name, o, self.sock) 237 | # Rserv sends an emtpy confirmation message, or error message in case 238 | # of an error. rparse() will raise an Exception in the latter case. 239 | rparse(self.sock, atomicArray=self.atomicArray) 240 | 241 | @checkIfClosed 242 | def getRexp(self, name): 243 | """Retrieve a Rexp stored in a variable called 'name'""" 244 | return self.eval(name) 245 | 246 | @checkIfClosed 247 | def callFunc(self, name, *args, **kw): 248 | """ 249 | @brief make a call to a function "name" through Rserve 250 | @detail positional and keyword arguments are first stored as local 251 | variables in the R namespace and then delivered to the 252 | function. 253 | @result Whatever the result of the called function is. 254 | """ 255 | if name == 'rm': 256 | # SPECIAL HANDLING FOR "rm()": 257 | # Calling "rm" with real values instead of reference to values 258 | # works, however it doesn't produce the desired effect (it only 259 | # removes temporaily created variables). To avoid confusion for 260 | # the users a check is applied here to make sure that "args" only 261 | # contains variable or function references (proxies) and NOT 262 | # values! 263 | assert [x for x in args if not isinstance(x, RBaseProxy)] == (),\ 264 | 'Only references to variables or functions allowed for "rm()"' 265 | 266 | argNames = [] 267 | for idx, arg in enumerate(args): 268 | if isinstance(arg, RBaseProxy): 269 | argName = arg.__name__ 270 | else: 271 | # a real python value is passed. Set a value of an artificial 272 | # variable on the R side, memorize its name for making the 273 | # actual call to the function below 274 | argName = 'arg_%d_' % idx 275 | self.setRexp(argName, arg) 276 | argNames.append(argName) 277 | for key, value in kw.items(): 278 | if isinstance(value, RBaseProxy): 279 | argName = value.__name__ 280 | else: 281 | argName = 'kwarg_%s_' % key 282 | self.setRexp(argName, value) 283 | argNames.append('%s=%s' % (key, argName)) 284 | return self.eval(name+'(%s)' % ', '.join(argNames)) 285 | 286 | @checkIfClosed 287 | def assign(self, aDict): 288 | """Assign all items of the dictionary to the default R namespace""" 289 | for k, v in aDict.items(): 290 | self.setRexp(k, v) 291 | 292 | @checkIfClosed 293 | def isFunction(self, name): 294 | """Check whether given name references an existing function in R""" 295 | return self.eval('is.function(%s)' % name) 296 | 297 | 298 | class RNameSpace(object): 299 | """ 300 | An instance of this class serves as access point to the default namesspace 301 | of an Rserve connection 302 | """ 303 | def __init__(self, rconn): 304 | self.__dict__['_rconn'] = rconn 305 | 306 | def __setattr__(self, name, o): 307 | """Assign an rExpr to a variable called 'name'""" 308 | self._rconn.setRexp(name, o) 309 | 310 | def __getattr__(self, name): 311 | """ 312 | Retrieve either Rexp stored in a variable called "name" or make call 313 | to function called 'name' 314 | """ 315 | realname = name[1:] if name.startswith('_') else name 316 | try: 317 | isFunction = self._rconn.isFunction(realname) 318 | except Exception: 319 | # an error is only raised if neither such a function or variable 320 | # exists at all! 321 | raise NameError('no such variable or function "%s" ' 322 | 'defined in Rserve' % realname) 323 | if isFunction: 324 | return RFuncProxy(realname, self._rconn) 325 | elif name.startswith('_'): 326 | return RVarProxy(realname, self._rconn) 327 | else: 328 | return self._rconn.getRexp(name) 329 | 330 | def __call__(self, aString, atomicArray=None, void=None): 331 | if void is None: 332 | void = self._rconn.defaultVoid 333 | return self._rconn.eval(aString, atomicArray=atomicArray, void=void) 334 | 335 | 336 | class RNameSpaceReference(object): 337 | """ 338 | Provide reference to R objects (a proxy), NOT directly to their values 339 | """ 340 | def __init__(self, rconn): 341 | self.__dict__['_rconn'] = rconn 342 | 343 | def __getattr__(self, name): 344 | """Return either a reference proxy to a variable to to a function""" 345 | try: 346 | isFunction = self._rconn.isFunction(name) 347 | except Exception: 348 | # an error is only raised if neither such a function or variable 349 | # exists at all! 350 | raise NameError('no such variable or function "%s" ' 351 | 'defined in Rserve' % name) 352 | if isFunction: 353 | return RFuncProxy(name, self._rconn) 354 | else: 355 | return RVarProxy(name, self._rconn) 356 | 357 | 358 | class RBaseProxy(object): 359 | """ 360 | Proxy for a reference to a variable or function in R. 361 | Do not use this directly, only its subclasses. 362 | """ 363 | def __init__(self, name, rconn): 364 | self.__name__ = name 365 | self._rconn = rconn 366 | 367 | 368 | class RVarProxy(RBaseProxy): 369 | """Proxy for a reference to a variable in R""" 370 | def __repr__(self): 371 | return '' % self.__name__ 372 | 373 | def value(self): 374 | return self._rconn.getRexp(self.__name__) 375 | 376 | 377 | class RFuncProxy(RBaseProxy): 378 | """Proxy for function calls to Rserve""" 379 | def __repr__(self): 380 | return '' % self.__name__ 381 | 382 | def __call__(self, *args, **kw): 383 | return self._rconn.callFunc(self.__name__, *args, **kw) 384 | 385 | # command to send to R in order to get the help for a function in text 386 | # format: 387 | R_HELP = "capture.output(tools:::Rd2txt(utils:::.getHelpFile(help(%s))))" 388 | 389 | @property 390 | def __doc__(self): 391 | """ 392 | There are different ways to get the help message from R: 393 | # The the package db file: 394 | pkgRdDB = tools:::fetchRdDB(file.path(find.package('base'), 395 | 'help', 'base')) 396 | # show all available topics in the help package: 397 | names(pkgRdDB) 398 | # convert the 'lapply' help message to text (from the base package): 399 | tools::Rd2txt(pkgRdDB[['lapply']]) 400 | # capture this output into a variable: 401 | a <- capture.output(tools::Rd2txt(pkgRdDB[['lapply']])) 402 | Disadvantage: One needs to know the package beforehand. 403 | 404 | Better: 405 | Everything in one line and better (doesn't need to know the pkg): 406 | a <- capture.output(tools:::Rd2txt(utils:::.getHelpFile(help(sapply)))) 407 | """ 408 | try: 409 | d = self._rconn.eval(self.R_HELP % self.__name__) 410 | except REvalError: 411 | # probably no help available, unfortunately there is no specific 412 | # code for this... 413 | return None 414 | # Join the list of strings: 415 | helpstring = '\n'.join(d) 416 | # remove some obscure characters: 417 | # helpstring = helpstring.replace('_\x08', '') 418 | return helpstring 419 | 420 | def help(self): 421 | """Directly page the help message to the terminal (e.g. via less)""" 422 | pydoc.pager(self.__doc__) 423 | 424 | def __getattr__(self, name): 425 | """Allow for nested name space calls, e.g. 't.test' """ 426 | if name == '__name__': 427 | # this is useful for py.test which does some code inspection 428 | # during runtime 429 | return self.__name__ 430 | 431 | concatName = "%s.%s" % (self.__name__, name) 432 | try: 433 | self._rconn.isFunction(concatName) 434 | except Exception: 435 | # an error is only raised if neither such a function or variable 436 | # exists at all! 437 | raise NameError('no such variable or function "%s" ' 438 | 'defined in R' % concatName) 439 | return RFuncProxy(concatName, self._rconn) 440 | 441 | 442 | def _test_main(): 443 | import os 444 | import readline 445 | import atexit 446 | # Setup history and readline facility for remote q: 447 | histfile = os.path.join(os.environ['HOME'], '.pyhistory') 448 | try: 449 | readline.read_history_file(histfile) 450 | except IOError: 451 | pass 452 | atexit.register(readline.write_history_file, histfile) 453 | 454 | conn = connect() 455 | print('"conn" is your handle to rserve. Type e.g. "conn(\'1\')" ' 456 | 'for string evaluation.') 457 | # r('x<-1:20; y<-x*2; lm(y~x)') 458 | sc = open('../testData/test-script.R').read() 459 | v = conn.r(sc) 460 | open('r-test-png.png', 'w').write(v[3]) 461 | conn.r.v = 'abc' 462 | conn.r('func0 <- function() { 3 }') 463 | conn.r('func1 <- function(a1) { a1 }') 464 | conn.r('func2 <- function(a1, a2) { list(a1, a2) }') 465 | conn.r('funcKW <- function(a1=1, a2=4) { list(a1, a2) }') 466 | conn.r('squared<-function(t) t^2') 467 | 468 | 469 | if __name__ == '__main__': 470 | _test_main() 471 | -------------------------------------------------------------------------------- /testing/test_rparser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Unittesting module for rparser 4 | """ 5 | import datetime 6 | ### 7 | import numpy 8 | import pytest 9 | ### 10 | from pyRserve import rtypes, rserializer, rparser 11 | from pyRserve.rconn import RVarProxy, OOBCallback 12 | from pyRserve.misc import PY3 13 | from pyRserve.rexceptions import REvalError 14 | from pyRserve.taggedContainers import TaggedList, TaggedArray 15 | ### 16 | from .testtools import compareArrays 17 | 18 | 19 | # ### Test string evaluations in R 20 | 21 | def test_eval_strings(conn): 22 | """ 23 | Test plain string, byte-strings, unicodes (depending on Python version) 24 | """ 25 | assert conn.r("''") == '' 26 | assert conn.r("'abc'") == 'abc' 27 | 28 | # make sure also byte-strings are handled successfully. 29 | # Makes no difference in PY2, but in PY3 it does: 30 | assert conn.r(b"'abc'") == 'abc' 31 | 32 | # test via call to ident function with single argument: 33 | assert conn.r.ident('abc') == 'abc' 34 | 35 | try: 36 | # make sure also unicode strings are handled successfully in Python2.x 37 | # Since u'abc' would raise a SyntaxError when this module is loaded 38 | # in Py3 < 3.3 we have to create the unicode string via eval at 39 | # runtime: 40 | unicode_str = eval("""u'"abc"'""") 41 | except SyntaxError: 42 | # outdated PY3 version, so just skip the rest 43 | return 44 | 45 | assert conn.r(unicode_str) == 'abc' 46 | 47 | # test via call to ident function with single argument: 48 | assert conn.r.ident(eval("u'abc'")) == 'abc' 49 | 50 | 51 | def test_eval_string_arrays(conn): 52 | """Test for string arrays""" 53 | assert compareArrays(conn.r("'abc'", atomicArray=True), 54 | numpy.array(['abc'])) 55 | assert compareArrays(conn.r("c('abc', 'def')"), 56 | numpy.array(['abc', 'def'])) 57 | assert compareArrays(conn.r("c('abc', NA, 'def')"), 58 | numpy.array(['abc', None, 'def'])) 59 | 60 | # test via call to ident function with single argument: 61 | assert compareArrays(conn.r.ident(numpy.array(['abc', 'def'])), 62 | numpy.array(['abc', 'def'])) 63 | 64 | 65 | def test_eval_unicode_arrays(conn): 66 | """ 67 | Test for unicode arrays. The ident function should return the 68 | same array, just not as unicode 69 | """ 70 | try: 71 | u1 = eval("u'abc'") 72 | u2 = eval("u'def'") 73 | except SyntaxError: 74 | # Python 3 below 3.3 does not accept the u'' operator, 75 | # just skip this test! 76 | return 77 | 78 | # test via call to ident function with single argument: 79 | assert conn.r.ident(numpy.array([u1])) == 'abc' 80 | assert compareArrays(conn.r.ident(numpy.array([u1, u2])), 81 | numpy.array(['abc', 'def'])) 82 | 83 | 84 | # ### Test integers 85 | 86 | def test_eval_integers(conn): 87 | """ 88 | Test different types and sizes of integers. 89 | Note that R converts all integers into floats 90 | """ 91 | res = conn.r("0") 92 | assert res == 0.0 93 | assert type(res) is float 94 | 95 | assert conn.r("1") == 1.0 96 | 97 | # ### Create real integers in R: 98 | res = conn.r('as.integer(c(1))') 99 | assert res == 1 100 | assert type(res) == int 101 | 102 | # test via call to ident function with single argument: 103 | assert conn.r.ident(5) == 5 104 | 105 | 106 | def test_eval_long(conn): 107 | """ 108 | Test long integers. Going beyond MAX_INT32 works with eval() because 109 | in R all integers are converted to floats right away. However sending a 110 | long as functional parameter should raise a NotImplementedError if 111 | its value is outside the normal integer range (i.e. MAX_INT32). 112 | """ 113 | assert conn.r("%d" % rtypes.MAX_INT32) == rtypes.MAX_INT32 114 | # Next test for long integers, handled as floats in R via eval(): 115 | assert conn.r("%d" % (rtypes.MAX_INT32*2)) == rtypes.MAX_INT32*2 116 | 117 | # The syntax like 234L only exists in Python2! So use long in Py2. I 118 | # n Python3 everything is of type 119 | # Send a long value which is still within below the rtypes.MAX_INT32. 120 | # It it automatically converted to a normal int in the rserializer and 121 | # hence should work fine: 122 | toLong = int if PY3 else long # noqa No 'long' function in PY3 123 | assert conn.r.ident(toLong(123)) 124 | 125 | # Here comes the problem - there is no native 64bit integer on the R side, 126 | # so this should raise a ValueError 127 | pytest.raises(ValueError, conn.r.ident, rtypes.MAX_INT32*2) 128 | 129 | 130 | def test_eval_integer_arrays(conn): 131 | """ 132 | Test integer arrays. The result from R is actually always a numpy 133 | float array 134 | """ 135 | assert compareArrays(conn.r("266", atomicArray=True), numpy.array([266])) 136 | assert compareArrays(conn.r("c(55, -35)"), numpy.array([55.0, -35.0])) 137 | res = conn.r("c(55, -35)") 138 | assert isinstance(res, numpy.ndarray) 139 | assert res.dtype == float 140 | 141 | # ### Create real integer arrays in R: 142 | res = conn.r('as.integer(c(1, 5))') 143 | assert compareArrays(res, numpy.array([1, 5])) 144 | assert res.dtype in (int, numpy.int32) 145 | 146 | # test via call to ident function with single argument: 147 | assert compareArrays(conn.r.ident(numpy.array([1, 5])), 148 | numpy.array([1, 5])) 149 | 150 | 151 | def test_eval_long_arrays(conn): 152 | """ 153 | Test calling with a long array where all values are smaller than 154 | MAX_INT32. Such an array is internally handled as a 32bit integer array 155 | and hence should work. 156 | """ 157 | toLong = int if PY3 else long # noqa No 'long' function in PY3 158 | # arr64 = numpy.array([rtypes.MIN_INT32, toLong(5)], dtype=numpy.int64) 159 | # assert compareArrays(conn.r.ident(arr64), arr64) 160 | 161 | # Here again comes the problem: a int64 array with values beyong 162 | # rtypes.MAX_INT32. This should raise a ValueError: 163 | arr64big = numpy.array([toLong(-rtypes.MAX_INT32 * 2), toLong(5)], 164 | dtype=numpy.int64) 165 | pytest.raises(ValueError, conn.r.ident, arr64big) 166 | 167 | 168 | # ### Test floats 169 | 170 | def test_eval_floats(conn): 171 | """Test different types and sizes of floats""" 172 | res = conn.r("0.0") 173 | assert res == 0.0 174 | assert type(res) is float 175 | 176 | assert conn.r("1.0") == 1.0 177 | assert conn.r("c(1.0)") == 1.0 178 | assert conn.r("-746586.56") == -746586.56 179 | 180 | # test via call to ident function with single argument: 181 | assert conn.r.ident(5.5) == 5.5 182 | 183 | 184 | def test_eval_float_arrays(conn): 185 | """Test float arrays""" 186 | assert compareArrays(conn.r("266.5", atomicArray=True), 187 | numpy.array([266.5])) 188 | assert compareArrays(conn.r("c(55.2, -35.7)"), numpy.array([55.2, -35.7])) 189 | res = conn.r("c(55.5, -35.5)") 190 | assert isinstance(res, numpy.ndarray) 191 | assert res.dtype == float 192 | 193 | # test via call to ident function with single argument: 194 | assert compareArrays(conn.r.ident(numpy.array([1.7, 5.6])), 195 | numpy.array([1.7, 5.6])) 196 | 197 | 198 | # ### Test complex numbers 199 | 200 | def test_eval_complex(conn): 201 | """Test different types and sizes of complex numbers""" 202 | res = conn.r("complex(real = 0, imaginary = 0)") 203 | assert res == (0+0j) 204 | assert type(res) is complex 205 | 206 | assert conn.r("complex(real = 5.5, imaginary = -3.3)") == 5.5-3.3j 207 | 208 | # test via call to ident function with single argument: 209 | assert conn.r.ident(5.5-3.3j) == 5.5-3.3j 210 | 211 | 212 | def test_eval_complex_arrays(conn): 213 | """Test complex number arrays""" 214 | res = conn.r("complex(real = 5.5, imaginary = 6.6)", atomicArray=True) 215 | assert compareArrays(res, numpy.array([(5.5+6.6j)])) 216 | assert isinstance(res, numpy.ndarray) 217 | assert res.dtype == complex 218 | 219 | # test via call to ident function with single argument: 220 | arr = numpy.array([(5.5+6.6j), (-3.0-6j)]) 221 | assert compareArrays(conn.r.ident(arr), arr) 222 | 223 | 224 | # ### Test boolean values 225 | 226 | def test_eval_bool(conn): 227 | """Test boolean values""" 228 | res = conn.r('TRUE') 229 | assert res is True 230 | assert type(res) == bool 231 | assert conn.r('FALSE') is False 232 | 233 | # test via call to ident function with single argument: 234 | assert conn.r.ident(True) is True 235 | 236 | 237 | def test_eval_bool_arrays(conn): 238 | """Test boolean arrays""" 239 | res = conn.r('TRUE', atomicArray=True) 240 | assert compareArrays(res, numpy.array([True])) 241 | assert res.dtype == bool 242 | assert compareArrays(conn.r('c(TRUE, FALSE)'), numpy.array([True, False])) 243 | assert compareArrays(conn.r('c(TRUE, NA, FALSE)'), numpy.array([True, None, False])) 244 | 245 | # test via call to ident function with single argument: 246 | assert compareArrays(conn.r.ident(numpy.array([True, False, False])), 247 | numpy.array([True, False, False])) 248 | 249 | 250 | def test_empty_boolean_array(conn): 251 | """Check that zero-length boolean ('logical') array is returned fine""" 252 | conn.r('empty_bool_arr = as.logical(c())') 253 | assert compareArrays(conn.r.empty_bool_arr, numpy.array([], dtype=bool)) 254 | 255 | 256 | # ### Test null value 257 | 258 | def test_null_value(conn): 259 | """Test NULL value, which is None in Python""" 260 | assert conn.r('NULL') is None 261 | assert conn.r.ident(None) is None 262 | 263 | 264 | # ### Test large data objects 265 | 266 | def test_large_objects(conn): 267 | """Test that data objects larger than 2**24 bytes are supported 268 | Sent array back and forth btw Python and R before comparing them. 269 | """ 270 | # make an integer (int32) array a little bit larger than 2**24 271 | arr = numpy.arange(2**24 / 4 + 100, dtype=numpy.int32) 272 | conn.r.largearr = arr 273 | compareArrays(arr, conn.r.largearr) 274 | 275 | 276 | # ### Test list function 277 | 278 | def test_lists(conn): 279 | """Test lists which directtly translate into Python lists""" 280 | assert conn.r('list()') == [] 281 | # with strings 282 | assert conn.r('list("otto")') == ['otto'] 283 | assert conn.r('list("otto", "amma")') == ['otto', 'amma'] 284 | # with numbers, same type and mixed 285 | assert conn.r('list(1)') == [1] 286 | assert conn.r('list(1, 5)') == [1, 5] 287 | assert conn.r('list(1, complex(real = 5.5, imaginary = -3.3))') == \ 288 | [1, 5.5-3.3j] 289 | 290 | # make a Python-style call to the list-function: 291 | assert conn.r.list(1, 2, 5) == [1, 2, 5] 292 | 293 | # test via call to ident function with single argument: 294 | assert conn.r.ident([1, 2, 5]) == [1, 2, 5] 295 | 296 | 297 | def test_tagged_lists(conn): 298 | """ 299 | Tests 'tagged' lists, i.e. lists which allow to address their items via 300 | name, not only via index. 301 | Those R lists are translated into 'TaggedList'-objects in Python. 302 | """ 303 | res = conn.r('list(husband="otto")') 304 | assert res == TaggedList([("husband", "otto")]) 305 | # a mixed list, where the 2nd item has no tag: 306 | 307 | exp_res = TaggedList([("n", "Fred"), ("v", 2.0), 308 | ("c_ages", numpy.array([1.0, 2.0]))]) 309 | res = conn.r('list(n="Fred", v=2, c_ages=c(1, 2))') 310 | # do string comparison because of complex nested data! 311 | assert repr(res) == repr(exp_res) 312 | 313 | # test via call to ident function with single argument: 314 | # do string comparison because of complex nested data! 315 | assert repr(conn.r.ident(exp_res)) == repr(exp_res) 316 | 317 | # NOTE: The following fails in the rserializer because of the missing tag 318 | # of the 2nd element: <<<<--------- TODO!! 319 | # conn.r.ident(TaggedList([("n","Fred"), 2.0, ("c_ages", 5.5)]) 320 | 321 | 322 | def test_vector_expression(conn): 323 | """ 324 | Tests for typecode 0x1a XT_VECTOR_EXP - returns the expression content 325 | as python list 326 | """ 327 | # first empty expression 328 | res = conn.r('expression()') 329 | assert res == [] 330 | 331 | # second expression with content 332 | res = conn.r('expression("1+1")') 333 | assert res == ['1+1'] 334 | 335 | 336 | # ### Test more numpy arrays 337 | # ### Many have been test above, but generally only 1-d arrays. Let's look at 338 | # ### arrays with higher dimensions 339 | 340 | def test_2d_arrays_created_in_python(conn): 341 | """ 342 | Check that transferring various arrays to R preserves columns, rows, 343 | and shape. 344 | """ 345 | bools = [True, False, True, True] 346 | strings = ['abc', 'def', 'ghi', 'jkl'] 347 | arrays = [ 348 | # next is same as: numpy.array([[1,2,3], [4,5,6]]) 349 | numpy.arange(6).reshape((2, 3), order='C'), 350 | # next is same as: numpy.array([[1,3,5], [2,4,6]]) 351 | numpy.arange(6).reshape((2, 3), order='F'), 352 | # next is same as: numpy.array([[True, False], [True, True]]) 353 | numpy.array(bools).reshape((2, 2), order='C'), 354 | # next is same as: numpy.array([[True, True], [False, True]]) 355 | numpy.array(bools).reshape((2, 2), order='F'), 356 | numpy.array(strings).reshape((2, 2), order='C'), 357 | numpy.array(strings).reshape((2, 2), order='F'), 358 | ] 359 | 360 | for arr in arrays: 361 | res = conn.r.ident(arr) 362 | assert res.shape == arr.shape 363 | assert compareArrays(res, arr) 364 | 365 | # assign array within R namespace and check some cols and rows: 366 | conn.r.arr = arr 367 | # check that 2nd row (last row) is equal: 368 | assert compareArrays(arr[1], conn.r('arr[2,]')) 369 | # check that 2nd column (middle col) is equal: 370 | assert compareArrays(arr[:, 1], conn.r('arr[,2]')) 371 | 372 | 373 | def test_2d_numeric_array_created_in_R(conn): 374 | """ 375 | Create an array in R, transfer it to python, and check that columns, 376 | rows, and shape are preserved. 377 | Note: Arrays in R are always in Fortran order, i.e. first index moves 378 | fastest. 379 | 380 | The array in R looks like: 381 | [,1] [,2] [,3] 382 | [1,] 1 3 5 383 | [2,] 2 4 6 384 | """ 385 | arr = conn.r('arr = array(1:6, dim=c(2, 3))') 386 | assert compareArrays(conn.r.arr, arr) 387 | 388 | # check that 2nd row (last row) is equal: 389 | assert len(arr[1]) == len(conn.r('arr[2,]')) == 3 390 | assert compareArrays(arr[1], conn.r('arr[2,]')) 391 | 392 | # check that 2nd column (middle col) is equal: 393 | assert len(arr[:, 1]) == len(conn.r('arr[,2]')) == 2 394 | assert compareArrays(arr[:, 1], conn.r('arr[,2]')) 395 | 396 | 397 | def test_tagged_array(conn): 398 | res = conn.r('c(a=1.,b=2.,c=3.)') 399 | exp_res = TaggedArray.new(numpy.array([1., 2., 3.]), ['a', 'b', 'c']) 400 | assert compareArrays(res, exp_res) 401 | assert res.keys() == exp_res.keys() # compare the tags of both arrays 402 | 403 | 404 | def test_very_large_result_array(conn): 405 | """Check that a SEXP with XT_LARGE set in header is properly parsed """ 406 | res = conn.r('c(1:9999999)') 407 | assert res.size == 9999999 408 | 409 | 410 | def test_eval_void(conn): 411 | """ 412 | Check that conn.voidEval() does not return any result in contrast to 413 | conn.eval() 414 | """ 415 | assert conn.r('a=1') == 1.0 416 | assert conn.eval('a=1') == 1.0 417 | assert conn.voidEval('a=1') is None 418 | assert conn.eval('a=1', void=True) is None 419 | assert conn.r('a=1', void=True) is None 420 | 421 | 422 | # ### Test evaluation of some R functions 423 | 424 | def test_eval_sequence(conn): 425 | # first string evaluate of R expression: 426 | res = conn.r('seq(1, 5)') 427 | assert compareArrays(res, numpy.array(range(1, 6))) 428 | assert res.dtype == numpy.int32 429 | 430 | # now make Python-style call to the R function: 431 | assert compareArrays(conn.r.seq(1, 5), numpy.array(range(1, 6))) 432 | 433 | 434 | def test_eval_polyroot(conn): 435 | # first string evaluate of R expression: 436 | res = conn.r('polyroot(c(-39.141,151.469,401.045))') 437 | exp_res = numpy.array([0.1762039 + 1.26217745e-29j, 438 | -0.5538897 - 1.26217745e-29j]) 439 | assert compareArrays(res, exp_res) 440 | 441 | # now make Python-style call to the R function: 442 | assert compareArrays(conn.r.polyroot(conn.r.c(-39.141, 151.469, 401.045)), 443 | exp_res) 444 | 445 | 446 | def test_eval_very_convoluted_function_result(conn): 447 | """ 448 | The result of this call is a highly nested data structure. 449 | Have fun on evaluation it! 450 | """ 451 | res = conn.r('x<-1:20; y<-x*2; lm(y~x)') 452 | assert res.__class__ == TaggedList 453 | # check which tags the TaggedList has: 454 | assert res.keys == ['coefficients', 'residuals', 'effects', 'rank', 455 | 'fitted.values', 'assign', 'qr', 'df.residual', 456 | 'xlevels', 'call', 'terms', 'model'] 457 | assert compareArrays(res['coefficients'], 458 | TaggedArray.new(numpy.array([-0., 2.]), 459 | ['(Intercept)', 'x'])) 460 | # ... many more tags could be tested here ... 461 | 462 | 463 | def test_s4(conn): 464 | """ 465 | S4 classes behave like dicts but usually have a 'class' attribute. 466 | """ 467 | res = conn.r(''' 468 | track <- setClass("track", 469 | slots = c(x="numeric", y="NULL")) 470 | track(x = 1:10, y = NULL) 471 | ''') 472 | assert isinstance(res, rparser.S4) 473 | assert res.classes == ['track'] 474 | assert set(res.keys()) == {'x', 'y'} 475 | assert compareArrays(res['x'], numpy.arange(1, 11)) 476 | assert res['y'] is None 477 | assert "> 24) & 127 59 | self.responseCode = code & 0xfffff # lowest 20 bit 60 | 61 | 62 | class Lexeme(list): 63 | """Basic Lexeme class for parsing binary data coming from Rserve""" 64 | def __init__(self, rTypeCode, length, hasAttr, lexpos): 65 | list.__init__(self, [rTypeCode, length, hasAttr, lexpos]) 66 | self.rTypeCode = rTypeCode 67 | self.length = length 68 | self.hasAttr = hasAttr 69 | self.lexpos = lexpos 70 | self.attrLexeme = None 71 | self.data = None 72 | 73 | def setAttr(self, attrLexeme): 74 | self.attrLexeme = attrLexeme 75 | 76 | @property 77 | def attr(self): 78 | return self.attrLexeme.data if self.attrLexeme else None 79 | 80 | @property 81 | def attrLength(self): 82 | return self.attrLexeme.length 83 | 84 | @property 85 | def attrTypeCode(self): 86 | return self.attrLexeme.rTypeCode 87 | 88 | @property 89 | def dataLength(self): 90 | """Return length (in bytes) of actual REXPR data body""" 91 | if self.hasAttr: 92 | if not self.attrLexeme: 93 | raise RuntimeError('Attribute lexeme not yet set') 94 | # also subtract size of REXP header=4 95 | return self.length - self.attrLength - 4 96 | else: 97 | return self.length 98 | 99 | def __str__(self): 100 | return 'Typecode: %s Length: %s hasAttr: %s, Lexpos: %d' % \ 101 | (hex(self.rTypeCode), self.length, self.hasAttr, self.lexpos) 102 | 103 | 104 | class Lexer(object): 105 | """Rserve message lexer 106 | Can either read a OOBMessage or a R Object 107 | """ 108 | lexerMap = {} 109 | fmap = FunctionMapper(lexerMap) 110 | 111 | def __init__(self, src): 112 | """ 113 | @param src: Either a string, a file object, a socket - 114 | all providing valid binary r data 115 | """ 116 | if type(src) == str: 117 | # this only works for objects implementing the buffer protocol, 118 | # e.g. strings, arrays, ... 119 | # convert string to byte object 120 | self.fp = io.BytesIO(byteEncode(src)) 121 | elif type(src) == bytes: 122 | self.fp = io.BytesIO(src) 123 | else: 124 | self.fp = src 125 | if isinstance(self.fp, socket.socket): 126 | self._read = self.fp.recv 127 | else: 128 | self._read = self.fp.read 129 | # The following attributes will be set thru 'readHeader()': 130 | self.lexpos = None 131 | self.messageSize = None 132 | self.errCode = None 133 | self.responseCode = None 134 | self.responseOK = None 135 | self.isOOB = False 136 | self.oobType = None 137 | self.oobUserCode = None 138 | 139 | def readHeader(self): 140 | """ 141 | Called initially when reading fresh data from an input source 142 | (file or socket). Reads header which contains data like response/error 143 | code and size of data entire package. 144 | 145 | QAP1 header structure parts (16 bytes total): 146 | 147 | [ 0-3 ] (int) command 148 | [ 4-7 ] (int) length of the message (bits 0-31) 149 | [ 8-11] (int) offset of the data part 150 | [12-15] (int) length of the message (bits 32-63) 151 | """ 152 | self.lexpos = 0 153 | 154 | command = Command(struct.unpack(' 0 is not implemented' 159 | # Obtain upper 32bit part of message length: 160 | messageSize2 = self.__unpack(XT_INT) << 32 # shift 32bits to the left 161 | self.messageSize = messageSize2 + messageSize1 162 | 163 | self.isOOB = command.isOOB 164 | if self.isOOB: 165 | # FIXME: Rserve has a bug(?) that sets CMD_RESP on 166 | # OOB commands so we clear it for now 167 | self.oobType = command.oobType 168 | self.oobUserCode = command.oobUserCode 169 | 170 | if DEBUG: 171 | print('oob type: %x, oob user code: %x, message size: %d' % 172 | (self.oobType, self.oobUserCode, self.messageSize)) 173 | else: 174 | self.errCode = command.errCode 175 | 176 | self.responseCode = command.responseCode 177 | if self.responseCode == RESP_OK: 178 | self.responseOK = True 179 | elif self.responseCode == RESP_ERR: 180 | self.responseOK = False 181 | else: 182 | self.clearSocketData() 183 | raise ValueError('Received illegal response code (%x)' % 184 | self.responseCode) 185 | 186 | if DEBUG: 187 | print('response ok? %s (responseCode=%x), error-code: %x, ' 188 | 'message size: %d' % 189 | (self.responseOK, self.responseCode, 190 | self.errCode, self.messageSize)) 191 | 192 | return self.messageSize 193 | 194 | def clearSocketData(self): 195 | """ 196 | If for any reason the parsing process returns an error, make sure that 197 | all data from a socket is removed to avoid data pollution with further 198 | parsing attempts. 199 | """ 200 | if not isinstance(self.fp, socket.socket): 201 | # not a socket. Nothing to do here. 202 | return 203 | # Switch socket into non-blocking mode and read from it until it 204 | # is empty (and hence socket.error is raised): 205 | self.fp.setblocking(False) 206 | try: 207 | while True: 208 | self.fp.recv(SOCKET_BLOCK_SIZE) 209 | except socket.error: 210 | # socket has no more data, it can be considered as cleared 211 | pass 212 | finally: 213 | # Now set it back to blocking mode (no matter what exception): 214 | self.fp.setblocking(True) 215 | 216 | def read(self, length): 217 | """ 218 | Read number of bytes from input data source (file or socket). 219 | If end of data is reached it raises EndOfDataError(). 220 | 221 | Sockets might not return all requested data at once, so use an io 222 | buffer to collect all data needed in a loop. 223 | """ 224 | bytesToRead = length 225 | buf = io.BytesIO(b'') 226 | while bytesToRead > 0: 227 | fragment = self._read(bytesToRead) 228 | lenFrag = len(fragment) 229 | if lenFrag == 0: 230 | raise EndOfDataError() 231 | buf.write(fragment) 232 | bytesToRead -= lenFrag 233 | 234 | self.lexpos += length 235 | data = buf.getvalue() 236 | return data 237 | 238 | def __unpack(self, tCode, num=None): 239 | """ 240 | Read 'num' (atomic) data items from the input source and convert them 241 | into a list of python objects. Byteswapping for numeric data will 242 | be done. 243 | """ 244 | structCode = structMap[tCode] if type(tCode) == int else tCode 245 | # All data from Rserve is stored in little-endian format! 246 | fmt = byteEncode('<' + str(num) + structCode if (num is not None) 247 | else '<' + structCode) # convert into bytes! 248 | if tCode == XT_INT3: 249 | length = 3 250 | rawData = self.read(length) + b'\x00' 251 | elif tCode == XT_INT7: 252 | length = 7 253 | rawData = self.read(length) + b'\x00' 254 | else: 255 | length = struct.calcsize(fmt or 1) 256 | rawData = self.read(length) 257 | d = struct.unpack(fmt, rawData) 258 | return d[0] if num is None else list(d) 259 | 260 | def nextExprHdr(self): 261 | """ 262 | From the input file/socket determine the type of the next data item, 263 | and its length. 264 | This method can be applied to read the 265 | - entire data header (containing one of the DT_* codes) 266 | - an REXPR header 267 | """ 268 | startLexpos = self.lexpos 269 | _rTypeCode = self.__unpack('B') # unsigned byte! 270 | # extract pure rTypeCode without XT_HAS_ATTR or XT_LARGE flags: 271 | rTypeCode = _rTypeCode & 0x3F 272 | # extract XT_HAS_ATTR flag (if it exists)" 273 | hasAttr = (_rTypeCode & XT_HAS_ATTR) != 0 274 | # extract XT_LARGE flag (if it exists): 275 | isXtLarge = (_rTypeCode & XT_LARGE) != 0 276 | if isXtLarge: 277 | # header is larger, use all 7 bytes for length information 278 | # (new in Rserve 0.3) 279 | length = self.__unpack(XT_INT7) 280 | else: 281 | # small header, use 3 bytes for length information 282 | length = self.__unpack(XT_INT3) 283 | if rTypeCode not in VALID_R_TYPES: 284 | raise RParserError( 285 | "Unknown SEXP type %s found at lexpos %d, length %d" % 286 | (hex(rTypeCode), startLexpos, length)) 287 | return Lexeme(rTypeCode, length, hasAttr, startLexpos) 288 | 289 | def nextExprData(self, lexeme): 290 | """ 291 | Read next data item from binary r data and transform it into a 292 | python object. 293 | """ 294 | return self.lexerMap[lexeme.rTypeCode](self, lexeme) 295 | 296 | ########################################################################### 297 | 298 | @fmap(XT_INT, XT_DOUBLE) 299 | def xt_atom(self, lexeme): 300 | raw = self.read(lexeme.dataLength) 301 | return struct.unpack( 302 | byteEncode('<%s' % structMap[lexeme.rTypeCode]), raw)[0] 303 | 304 | @fmap(XT_BOOL) 305 | def xt_bool(self, lexeme): 306 | raw = self.read(lexeme.dataLength) 307 | # a boolean is stored in a 4 bytes word, but only the first byte 308 | # is significant: 309 | if PY3: 310 | # python3 directly converts a single byte item into a number! 311 | b = raw[0] 312 | else: 313 | b = struct.unpack(byteEncode('<%s' % structMap[XT_BOOL]), 314 | raw[0])[0] 315 | # b can be 2, meaning NA. Otherwise transform 0/1 into False/True 316 | return None if b == 2 else b == 1 317 | 318 | @fmap(XT_ARRAY_INT, XT_ARRAY_DOUBLE, XT_ARRAY_CPLX) 319 | def xt_array_numeric(self, lexeme): 320 | raw = self.read(lexeme.dataLength) 321 | # TODO: swapping... 322 | data = numpy.frombuffer(raw, dtype=numpyMap[lexeme.rTypeCode]) 323 | return data 324 | 325 | @fmap(XT_ARRAY_BOOL) 326 | def xt_array_bool(self, lexeme): 327 | """A boolean array consists of a 4-byte word (i.e. integer) 328 | determining the number of boolean values in the following dataLength-4 329 | bytes. 330 | E.g. a bool array of one TRUE item looks like: 331 | 01 00 00 00 01 ff ff ff 332 | 333 | The first 01 value tells that there is one bool value in the array. 334 | The other 01 is the TRUE value, the other 3 'ff's are padding bytes. 335 | Those will be used if the vector has 2,3 or 4 boolean values. 336 | For a fifth value another 4 bytes are appended. 337 | """ 338 | numBools = self.__unpack(XT_INT, 1)[0] 339 | # read the actual boolean values, including padding bytes: 340 | raw = self.read(lexeme.dataLength - 4) 341 | # Check if the array contains any NA values (encoded as \x02). 342 | # If so we need to convert the 2's to None's and use a numpy 343 | # array of type Object otherwise numpy will cast the None's into False's. 344 | # This is handled for us for numeric types since numpy can use it's own 345 | # nan type, but here we need to help it out. 346 | if 2 in raw: 347 | data = numpy.frombuffer(raw[:numBools], dtype=numpy.int8).astype(object) 348 | data[data == 2] = None 349 | else: 350 | data = numpy.frombuffer( 351 | raw[:numBools], 352 | dtype=numpyMap[lexeme.rTypeCode] 353 | ) 354 | return data 355 | 356 | @fmap(XT_ARRAY_STR) 357 | def xt_array_str(self, lexeme): 358 | """ 359 | An array of one or more null-terminated strings. 360 | The XT_ARRAY_STR can contain trailing chars \x01 which need to be 361 | chopped off. Since strings are encoded as bytes (in Py3) they need 362 | to be converted into real strings. 363 | """ 364 | if lexeme.dataLength == 0: 365 | return '' 366 | raw = self.read(lexeme.dataLength) 367 | bytesStrList = raw.split(b'\0')[:-1] 368 | strList = [stringEncode(byteString) for byteString in bytesStrList] 369 | return numpy.array(strList) 370 | 371 | @fmap(XT_STR) 372 | def xt_str(self, lexeme): 373 | """ 374 | A null-terminated string. 375 | It's length can be larger than the actual string since it is always a 376 | multiple of 4. 377 | The rest is filled with trailing \0s which need to be chopped off. 378 | """ 379 | raw = self.read(lexeme.dataLength) 380 | byteStr = raw.split(b'\0', 1)[0] 381 | return stringEncode(byteStr) 382 | 383 | @fmap(XT_SYMNAME) 384 | def xt_symname(self, lexeme): 385 | """ 386 | Just like a string, but in S4 classes, a special value for NULL exists 387 | """ 388 | string = self.xt_str(lexeme) 389 | return None if string == '\x01NULL\x01' else string 390 | 391 | @fmap(XT_NULL) 392 | def xt_null(self, lexeme): 393 | return None 394 | 395 | @fmap(XT_UNKNOWN) 396 | def xt_unknown(self, lexeme): 397 | return self.__unpack(XT_INT) 398 | 399 | @fmap(XT_RAW) 400 | def xt_raw(self, lexeme): 401 | self.__unpack(XT_INT) 402 | return self.read(lexeme.dataLength - 4) 403 | 404 | 405 | class RParser(object): 406 | # 407 | parserMap = {} 408 | fmap = FunctionMapper(parserMap) 409 | 410 | def __init__(self, src, atomicArray): 411 | """ 412 | atomicArray: if False parsing arrays with only one element will just 413 | return this element 414 | arrayOrder: The order in which data in multi-dimensional arrays is 415 | returned. 'C' for c-order, F for fortran. 416 | """ 417 | self.lexer = Lexer(src) 418 | self.atomicArray = atomicArray 419 | self.indentLevel = None 420 | 421 | def __getitem__(self, key): 422 | return self.parserMap[key] 423 | 424 | def __getattr__(self, attr): 425 | if attr in ['messageSize']: 426 | return getattr(self.lexer, attr) 427 | else: 428 | raise AttributeError(attr) 429 | 430 | @property 431 | def __ind(self): 432 | # return string with number of spaces appropriate for current 433 | # indentation level 434 | return self.indentLevel * 4 * ' ' 435 | 436 | def _debugLog(self, lexeme, isRexpr=True): 437 | if DEBUG: 438 | lx = lexeme 439 | typeCodeDict = XTs if isRexpr else DTs 440 | print('%s %s (%s), hasAttr=%s, lexpos=%d, length=%s' % 441 | (self.__ind, typeCodeDict[lx.rTypeCode], hex(lx.rTypeCode), 442 | lx.hasAttr, lx.lexpos, lx.length)) 443 | 444 | def parse(self): 445 | """ 446 | Parse data stream and return result converted into 447 | python data structure 448 | """ 449 | self.indentLevel = 1 450 | self.lexer.readHeader() 451 | 452 | message = None 453 | if self.lexer.messageSize > 0: 454 | try: 455 | message = self._parse() 456 | except Exception: 457 | # If any error is raised during lexing and parsing, make sure 458 | # that the entire data is read from the input source if it is 459 | # a socket, otherwise following attempts to 460 | # parse again from a socket will return polluted data: 461 | self.lexer.clearSocketData() 462 | raise 463 | elif not self.lexer.responseOK: 464 | try: 465 | rserve_err_msg = ERRORS[self.lexer.errCode] 466 | except KeyError: 467 | raise REvalError("R evaluation error (code=%d)" % 468 | self.lexer.errCode) 469 | else: 470 | raise RResponseError('Response error %s (error code=%d)' % 471 | (rserve_err_msg, self.lexer.errCode)) 472 | 473 | if self.lexer.isOOB: 474 | return OOBMessage(self.lexer.oobType, self.lexer.oobUserCode, 475 | message, self.lexer.messageSize) 476 | else: 477 | return message 478 | 479 | def _parse(self): 480 | dataLexeme = self.lexer.nextExprHdr() 481 | self._debugLog(dataLexeme, isRexpr=False) 482 | if dataLexeme.rTypeCode == DT_SEXP: 483 | lexeme = self._parseExpr() 484 | return self._postprocessData(lexeme.data) 485 | else: 486 | raise NotImplementedError() 487 | 488 | def _parseExpr(self): 489 | self.indentLevel += 1 490 | lexeme = self.lexer.nextExprHdr() 491 | self._debugLog(lexeme) 492 | if lexeme.hasAttr: 493 | self.indentLevel += 1 494 | if DEBUG: 495 | print('%s Attribute:' % self.__ind) 496 | lexeme.setAttr(self._parseExpr()) 497 | self.indentLevel -= 1 498 | lexeme.data = self.parserMap.get(lexeme.rTypeCode, 499 | self[None])(self, lexeme) 500 | self.indentLevel -= 1 501 | return lexeme 502 | 503 | def _nextExprData(self, lexeme): 504 | lexpos = self.lexer.lexpos 505 | data = self.lexer.nextExprData(lexeme) 506 | if DEBUG: 507 | print('%s data-lexpos: %d, data-length: %d bytes' % 508 | (self.__ind, lexpos, lexeme.dataLength)) 509 | print('%s data: %s' % (self.__ind, repr(data))) 510 | try: 511 | dataLen = len(data) 512 | print('%s length: %d' % (self.__ind, dataLen)) 513 | except TypeError: 514 | pass 515 | return data 516 | 517 | def _postprocessData(self, data): 518 | """ 519 | Postprocess parsing results depending on configuration parameters 520 | Currently only arrays are effected. 521 | """ 522 | if data.__class__ == numpy.ndarray: 523 | # this does not apply for arrays with attributes 524 | # (__class__ would be TaggedArray)! 525 | if len(data) == 1 and not self.atomicArray: 526 | # if data is a plain numpy array, and has only one element, 527 | # just extract and return this. 528 | # For convenience reasons type-convert it into a native 529 | # Python data type: 530 | data = data[0] 531 | if isinstance(data, (float, numpy.float64)): 532 | # convert into native python float: 533 | data = float(data) 534 | elif isinstance(data, (int, numpy.int32, numpy.int64)): 535 | # convert into native int or long, depending on value: 536 | data = int(data) 537 | elif isinstance(data, (complex, numpy.complex64, 538 | numpy.complex128)): 539 | # convert into native python complex number: 540 | data = complex(data) 541 | elif isinstance(data, (numpy.string_, str)): 542 | # convert into native python string: 543 | data = str(data) 544 | elif isinstance(data, (bool, numpy.bool_)): 545 | # convert into native python string 546 | data = bool(data) 547 | return data 548 | 549 | @fmap(None) 550 | def xt_(self, lexeme): 551 | # apply this for atomic data 552 | return self._nextExprData(lexeme) 553 | 554 | @fmap(XT_ARRAY_BOOL, XT_ARRAY_INT, XT_ARRAY_DOUBLE, XT_ARRAY_STR) 555 | def xt_array(self, lexeme): 556 | # converts data into a numpy array already: 557 | data = self._nextExprData(lexeme) 558 | if lexeme.hasAttr and lexeme.attrTypeCode == XT_LIST_TAG: 559 | for tag, value in lexeme.attr: 560 | if tag == 'dim': 561 | # the array has a defined shape, and R stores and 562 | # sends arrays in Fortran mode: 563 | data = data.reshape(value, order='F') 564 | elif tag == 'names': 565 | # convert numpy-vector 'value' into list to make 566 | # TaggedArray work properly: 567 | data = asTaggedArray(data, list(value)) 568 | else: 569 | # there are additional tags in the attribute, just collect 570 | # them in a dictionary attached to the array. 571 | try: 572 | data.attr[tag] = value 573 | except AttributeError: 574 | data = asAttrArray(data, {tag: value}) 575 | return data 576 | 577 | @fmap(XT_VECTOR, XT_VECTOR_EXP, XT_LANG_NOTAG, XT_LIST_NOTAG) 578 | def xt_vector(self, lexeme): 579 | """ 580 | A vector is e.g. return when sending "list('abc','def')" to R. It can 581 | contain mixed types of data items. 582 | The binary representation of an XT_VECTOR is weird: a vector contains 583 | unknown number of items, with possibly variable length. Only the number 584 | of bytes of the data of a vector is known in advance. 585 | The end of this REXP can only be detected by keeping track of how 586 | many bytes have been consumed (lexeme.length!) until the end of the 587 | REXP has been reached. 588 | 589 | A vector expression (type 0x1a) is according to Rserve docs the same 590 | as XT_VECTOR. For now just a list with the expression content is 591 | returned in this case. 592 | """ 593 | finalLexpos = self.lexer.lexpos + lexeme.dataLength 594 | if DEBUG: 595 | print('%s Vector-lexpos: %d, length %d, finished at: %d' % 596 | (self.__ind, self.lexer.lexpos, 597 | lexeme.dataLength, finalLexpos)) 598 | data = [] 599 | while self.lexer.lexpos < finalLexpos: 600 | # convert single item arrays into atoms (via stripArray) 601 | data.append(self._postprocessData(self._parseExpr().data)) 602 | 603 | if lexeme.hasAttr and lexeme.attrTypeCode == XT_LIST_TAG: 604 | # The vector is actually a tagged list, i.e. a list which allows 605 | # to access its items by name (like in a dictionary). However items 606 | # are ordered, and there is not necessarily a name available for 607 | # every item. 608 | for tag, value in lexeme.attr: 609 | if tag == 'names': 610 | # the vector has named items 611 | data = TaggedList(zip(value, data)) 612 | else: 613 | if DEBUG: 614 | print('Warning: applying LIST_TAG "%s" on xt_vector ' 615 | 'not yet implemented' % tag) 616 | return data 617 | 618 | @fmap(XT_LIST_TAG, XT_LANG_TAG) 619 | def xt_list_tag(self, lexeme): 620 | # a xt_list_tag usually occurs as an attribute of a vector or list 621 | # (like for a tagged list) 622 | finalLexpos = self.lexer.lexpos + lexeme.dataLength 623 | r = [] 624 | while self.lexer.lexpos < finalLexpos: 625 | value, tag = self._parseExpr().data, self._parseExpr().data 626 | # reverse order of tag and value when adding it to result list 627 | r.append((tag, value)) 628 | return r 629 | 630 | @fmap(XT_CLOS) 631 | def xt_closure(self, lexeme): 632 | # read entire data provided for closure (a R code object) even though 633 | # we don't know what to do with it on the Python side ;-) 634 | aList1 = self._parseExpr().data 635 | aList2 = self._parseExpr().data 636 | # Some closures seem to provide their sourcecode in an attrLexeme, 637 | # but some don't. 638 | # return Closure(lexeme.attrLexeme.data[0][1]) 639 | # So for now let's just return the entire parse tree in a 640 | # Closure instance. 641 | return Closure(lexeme, aList1, aList2) 642 | 643 | @fmap(XT_S4) 644 | def xt_s4(self, lexeme): 645 | """A S4 object only contains attributes, no other payload""" 646 | if lexeme.hasAttr and lexeme.attrTypeCode == XT_LIST_TAG: 647 | return S4(lexeme.attr) 648 | else: 649 | return S4([]) 650 | 651 | 652 | ############################################################################## 653 | 654 | 655 | def rparse(src, atomicArray=False): 656 | rparser = RParser(src, atomicArray) 657 | return rparser.parse() 658 | 659 | ############################################################################## 660 | 661 | 662 | class Closure(object): 663 | """ 664 | Very simple container to return "something" for a closure. 665 | Not really usable in Python though. 666 | """ 667 | def __init__(self, lexeme, aList1, aList2): 668 | self.lexeme = lexeme 669 | self.aList1 = aList1 670 | self.aList2 = aList2 671 | 672 | def __repr__(self): 673 | return '' % id(self) 674 | 675 | 676 | class S4(dict): 677 | """Very simple representation of a S4 instance""" 678 | def __init__(self, source=(), **entries): 679 | super(S4, self).__init__(source, **entries) 680 | 681 | if 'class' in self: 682 | self.classes = self['class'] 683 | del self['class'] 684 | else: 685 | self.classes = [] 686 | 687 | def __repr__(self): 688 | attrs = super(S4, self).__repr__() 689 | return "".format(self.classes, attrs) 690 | -------------------------------------------------------------------------------- /doc/manual.rst: -------------------------------------------------------------------------------- 1 | pyRserve manual 2 | =============== 3 | 4 | This manual is written in sort of a `walk-through`-style. All examples can be tried out on the Python 5 | command line as you read through it. 6 | 7 | Setting up a connection to Rserve 8 | --------------------------------- 9 | 10 | Running both Rserve and pyRserve locally on one host 11 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 12 | This is the most simple solution, and we will begin with it before explaining remote connections. 13 | 14 | First of all startup Rserve if it is not yet running:: 15 | 16 | $ R CMD Rserve 17 | 18 | By default Rserve is listening on port port 6311 (its default) on localhost (or 127.0.0.1) only, 19 | for security reasons. This means that no connection from any other machine is possible to it. 20 | For now, and for simplicity, we stick with running everything (Rserve and pyRserve) on the same host. 21 | 22 | R puts itself into daemon mode, meaning that your shell comes back, and you have no way to 23 | shutdown R via ``ctrl-C`` (you need to call ``kill`` with it's process id). However ``Rserve`` can be started in 24 | debug mode during development. In this mode it'll print messages to stdout helping you to see whether your 25 | connection works etc. To do so `Rserve` needs to be started like:: 26 | 27 | $ R CMD Rserve.dbg 28 | 29 | Now we can try to connect to it. 30 | From the python interpreter import the pyRserve package and by omitting any arguments to the ``connect()`` function 31 | setup the connection to your locally running ``Rserve``:: 32 | 33 | $ python 34 | >>> import pyRserve 35 | >>> conn = pyRserve.connect() 36 | 37 | The resulting connection handle can tell you where it is connected to:: 38 | 39 | >>> conn 40 | 41 | 42 | The connection will be closed automatically when conn is deleted, or by explicitly calling the ``close()``-method:: 43 | 44 | >>> conn.close() 45 | >>> conn 46 | 47 | 48 | Running operations on a closed pyRserve connector results in an exception. However a connection can be reopened by 49 | calling the ``connect()`` method. It reuses the previously given values (or defaults) for ``host`` and ``port``:: 50 | 51 | >>> conn.connect() 52 | 53 | 54 | To check the status of the connection use:: 55 | 56 | >>> conn.isClosed 57 | False 58 | 59 | Setting up a remote connection to Rserve 60 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 61 | 62 | Variant 1: Make Rserve listen to a public port 63 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 64 | 65 | To allow Rserve accept connections from remote hosts on a public port a special flag needs to be set in its 66 | configuration file (which might be missing initially). Once the ``remote enable``-flag is set there, 67 | Rserve needs to be restarted in order to honor it. 68 | 69 | .. WARNING:: 70 | Opening Rserve on a port which is publically (or maybe within an organization like a company) 71 | accessible allows anyone who has access to this machine to connect to the Rserve server process. 72 | 73 | .. WARNING:: 74 | Traffic between Rserve and pyRserve is not encrypted - so anyone with access to the network 75 | would in principle be able to sniff your communication, or even manipulate it. 76 | 77 | By default Rserve tries to load the configuration file from ``/etc/Rserv.conf``. So if you have 78 | root privileges on your host you can enable remote connections with the following command:: 79 | 80 | $ sudo echo "remote enable" > /etc/Rserv.conf 81 | 82 | Then restart Rserve. 83 | 84 | In case you don't have sudo privileges the config file can be created anywhere else, e.g.:: 85 | 86 | $ echo "remote enable" > ~/.config/Rserv.conf 87 | 88 | Then restart Rserve like ``$ R CMD Rserve --RS-conf ~/.config/Rserv.conf``. 89 | 90 | Variant 2: Connect to Rserve through an SSH tunnel 91 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 92 | This option is definitely more secure than variant 1. First of all communication is encrypted. 93 | Secondly you can easily control who is allowed to access Rserve from outside the host 94 | Rserve is running on. 95 | 96 | The approach could be: 97 | 98 | 1. Create a generic account on the host running Rserve, called e.g. ``rserveuser``. 99 | For this example let the host be called ``rservehost``. 100 | 2. In the ``rserveuser``'s home directory, inside the ``~/.ssh`` directory, add the public 101 | ssh key of allowed users to the ``~/.ssh/authorized_keys`` file. This can be done 102 | in a very special ways that only enables access to Rserve, without any other 103 | privilege like opening a remote shell etc. 104 | 105 | To achive this, a line in the ``~/.ssh/authorized_keys`` must look like:: 106 | 107 | command="echo 'Rserve only account.'",restrict,port-forwarding,permitopen="localhost:6311" ssh-ed25519 AAAAC3..pxfm user1@someuserhost 108 | 109 | 3. Start rserve in normal mode, without the ``remote enable`` flag, so it only listens on localhost. 110 | 4. ``user1`` (owning the public ssh key added in 2.) then opens an SSH tunnel to ``rservehost``:: 111 | 112 | $ ssh -N -L 6311:localhost:6311 rservehost 113 | 114 | This command forwards traffic from port 6311 on ``user``'s client machine to ``localhost:6311`` on 115 | ``rservehost``. 116 | 117 | 5. ``user1`` on his/her client machine opens Python and establishes an Rserve connection with:: 118 | 119 | >>> import pyRserve 120 | >>> conn = pyRserve.connect() 121 | 122 | The connection to ``localhost:6311`` on the client machine will be forwarded to Rserve listening 123 | on ``localhost:6311`` on ``rservehost``. 124 | 125 | Variant 3: Connect to Rserve through a Unix socket 126 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 127 | This option might be more flexible for concurrent/dynamic connections than variants 1 and 2 and 128 | is slightly more secure than variant 1 (and less than variant 2), as the Unix socket can only be 129 | accessed from within the server. 130 | 131 | To enable Unix sockets in Rserve a flag needs to be enabled:: 132 | 133 | R CMD Rserve --RS-socket /tmp/rserve.sock 134 | 135 | That socket can now be used from pyRserve:: 136 | 137 | >>> import pyRserve 138 | >>> conn = pyRserve.connect(unix_socket='/tmp/rserve.sock') 139 | 140 | .. WARNING:: 141 | Just as in Variant 1, communication between pyRserve and Rserve is not encrypted. 142 | The only additional security is that this socket cannot be accessed from the network, 143 | but a user with access to the system can still sniff/manipulate your connection. 144 | 145 | 146 | Shutting down Rserve remotely 147 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 148 | 149 | If you need to shutdown Rserve from your client connection the following command can be called: 150 | 151 | >>> conn.shutdown() 152 | 153 | 154 | String evaluation in R 155 | ------------------------------- 156 | 157 | Having established a connection to Rserve you can run the first commands on it. A valid R command can be executed 158 | by making a call to the R name space via the connection's `eval()` method, providing a string as argument which 159 | contains valid R syntax:: 160 | 161 | >>> conn.eval('3 + 5') 162 | 8.0 163 | 164 | In this example the string ``"3 + 5"`` will be sent to the remote side and evaluated by the R interpreter. 165 | The result is then 166 | delivered back into a native Python object, a floating point number in this case. As an R expert you are 167 | probably aware of the fact that R uses vectors for all numbers internally by default. But why did we receive 168 | a single floating point number? The reason is that pyRserve looks at arrays coming from Rserve and converts 169 | arrays with only one single item into an atomic value. This behaviour is for convenience reasons only. 170 | 171 | There are two ways to override this behaviour so that the result is a real (numpy) array: 172 | 173 | * Apply `atomicArray=True` to the `eval()`-method: 174 | 175 | >>> conn.eval('3 + 5', atomicArray=True) 176 | array([ 8.]) 177 | 178 | This behaviour is then valid for one single call. 179 | 180 | * Apply `atomicArray=True` to the `connect()`-function to make it the default for all calls to `eval()`: 181 | 182 | ``conn = pyRserve.connect(atomicArray=True)`` 183 | 184 | Then calling `eval()` would return a `numpy` array in every case: 185 | 186 | >>> conn.eval('3 + 5') 187 | array([ 8.]) 188 | 189 | ``conn.atomicArray`` will tell you how the connection handles results. This attribute contains the value of the 190 | ``atomicArray`` kw-argument given to connect. It can also be changed directly for a running connection. 191 | 192 | >>> conn.atomicArray 193 | True 194 | >>> conn.atomicArray = False # change value 195 | 196 | More expression evaluation 197 | ------------------------------ 198 | 199 | Of course also more complex data types can be sent from R to Python, e.g. lists or real arrays. Here are some examples:: 200 | 201 | >>> conn.eval("list(1, 'otto')") 202 | [1, 'otto'] 203 | >>> conn.eval('c(1, 5, 7)') 204 | array([ 1., 5., 7.]) 205 | 206 | As demonstrated here R-lists are converted into plain Python lists whereas R-vectors are converted into numpy 207 | arrays on the Python side. 208 | 209 | To set a variable inside the R namespace do:: 210 | 211 | >>> conn.eval('aVar <- "abc"') 212 | 'abc' 213 | 214 | and to request its value just do:: 215 | 216 | >>> conn.eval('aVar') 217 | 'abc' 218 | 219 | 220 | Expression evaluation without expecting a result 221 | ---------------------------------------------------- 222 | 223 | In the example above setting a variable in R did not only set the variable but also returned it back to Python:: 224 | 225 | >>> conn.eval('aVar <- "abc"') 226 | 'abc' 227 | 228 | This is usually not something one would expect or need, and especially in the case of very large data this can cause 229 | unnecessary network traffic. The solution to this is to either call `eval()` with another option `void=True`, or to 230 | use `conn.voidEval()` directly. The following two calls are identical and do not return the string `'abc'`: 231 | 232 | >>> conn.eval('aVar <- "abc"', void=True) 233 | >>> conn.voidEval('aVar <- "abc"') 234 | 235 | 236 | Defining functions and calling them through expression evaluation 237 | -------------------------------------------------------------------- 238 | 239 | It is also possible to create functions inside the R interpreter through the connector's namespace, or even to 240 | execute entire scripts. Basically you can do everything which is possible inside a normal R console:: 241 | 242 | # create a function and execute it: 243 | >>> conn.voidEval('doubleit <- function(x) { x*2 }') 244 | >>> conn.eval('doubleit(2)') 245 | 4.0 246 | 247 | # store a mini script definition in a Python string ... 248 | >>> my_r_script = ''' 249 | squareit <- function(x) 250 | { x**2 } 251 | squareit(4) 252 | ''' 253 | # .... and execute it in R: 254 | >>> conn.eval(my_r_script) 255 | 16.0 256 | 257 | 258 | 259 | The R namespace - setting and accessing variables in a more Pythonic way 260 | ------------------------------------------------------------------------------ 261 | 262 | Previous sections explained how to set a variable inside R by evaluation a statement in string format:: 263 | 264 | >>> conn.voidEval('aVar <- "abc"') 265 | 266 | This is not very elegant and has limited ways to provide values already stored in Python variables. A much nicer 267 | way to do this is by setting the variable name in R as an attribute to a special variable `conn.r` which points 268 | to the namespace in R directly. The following statement does the same thing as the one above, just "more Pythonic":: 269 | 270 | >>> conn.r.aVar = "abc" 271 | 272 | So of course it is then possible to compute values or copy them from Python variables into R:: 273 | 274 | >>> conn.r.aVar = some_python_number * 1000.505 275 | 276 | To retrieve a variable from R just use it as expected:: 277 | 278 | >>> print('A value from R:', conn.r.aVar) 279 | 280 | In its current implementation pyRserve allows to set and access the following base types: 281 | 282 | * None (NULL) 283 | * boolean 284 | * integers (32-bit only) 285 | * floating point numbers (64 bit only), i.e. doubles 286 | * complex numbers 287 | * strings 288 | 289 | Furthermore the following containers are supported: 290 | 291 | * lists 292 | * numpy arrays 293 | * TaggedList 294 | * AttrArray 295 | * TaggedArray 296 | 297 | Lists can be nested arbitrarily, containing other lists, numbers, or arrays. ``TaggedList``, ``AttrArray``, and 298 | ``TaggedArray`` are 299 | special containers to handle very R-specific result types. They will be explained further down in the manual. 300 | 301 | The following example shows how to assign a python list with mixed data types to an R variable called ``aList``, 302 | and then to retrieve it again:: 303 | 304 | >>> conn.r.aList = [1, 'abcde', numpy.array([1, 2, 3], dtype=int)] 305 | >>> conn.r.aList 306 | [1, 'abcde', array([1, 2, 3])] 307 | 308 | Numpy arrays can also contain dimension information which are translated into R matrices when assigned to the R namespace:: 309 | 310 | >>> arr = numpy.array(range(12)) 311 | >>> arr.shape = (3, 4) 312 | >>> conn.r.aMatrix = arr 313 | >>> conn.r('dim(aMatrix)') # give me the dimension of aMatrix on the R-side 314 | array([3, 4]) 315 | 316 | The result of the shape information is - in contrast to what one gets from numpy arrays - an array itself. 317 | There is nothing special about this, this is just the way R internally deals with that information. 318 | 319 | 320 | Expression evaluation through the R namespace 321 | ------------------------------------------------ 322 | 323 | Instead of using `conn.eval('1+1')` expressions can also be evaluate by making a function call on the R namespace 324 | directly. The following calls are producing the same result: 325 | 326 | >>> conn.r('1+1') 327 | >>> conn.eval('1+1') 328 | 329 | `conn.r('...') also accepts the `void`-option in case you want to suppress that a result is returned. Again the 330 | following three calls are producing the same result: 331 | 332 | >>> conn.r('1+1', void=True) 333 | >>> conn.eval('1+1', void=True) 334 | >>> conn.voidEval('1+1') 335 | 336 | 337 | Calling functions in R 338 | ------------------------ 339 | 340 | Functions defined in R can be called as if they were a Python methods, declared in the namespace of R. 341 | 342 | Before the examples below are usable we need to define a couple of very simple functions within the R namespace: 343 | ``func0()`` accepts no parameters and returns a fixed string, ``func1()`` takes exactly one parameter and 344 | ``funcKKW()`` takes keyword arguments with default values:: 345 | 346 | conn.voidEval('func0 <- function() { "hello world" }') 347 | conn.voidEval('func1 <- function(v) { v*2 }') 348 | conn.voidEval('funcKW <- function(a1=1.0, a2=4.0) { list(a1, a2) }') 349 | 350 | Now calling R functions is as trivial as calling plain Python functions:: 351 | 352 | >>> conn.r.func0() 353 | "hello world" 354 | >>> conn.r.func1(5) 355 | 10 356 | >>> conn.r.funcKW(a2=6.0) 357 | [1.0, 6.0] 358 | 359 | Of course you can also call functions built-in to R:: 360 | 361 | >>> conn.r.length([1,2,3]) 362 | 3 363 | 364 | 365 | Getting help with functions 366 | ------------------------------ 367 | 368 | If R is properly installed including its help messages those can be retrieved directly. 369 | Also here no surprise - just do it the Python way through the ``__doc__`` attribute:: 370 | 371 | >>> print(conn.r.sapply.__doc__) 372 | lapply package:base R Documentation 373 | 374 | Apply a Function over a List or Vector 375 | 376 | Description: 377 | 378 | 'lapply' returns a list of the same length as 'X', each element of 379 | which is the result of applying 'FUN' to the corresponding element 380 | of 'X'. 381 | [...] 382 | 383 | Of course this only works for functions which provide documentation. For all others ``__doc__`` just returns ``None``. 384 | 385 | 386 | 387 | Applying an R function as argument to another function 388 | --------------------------------------------------------- 389 | 390 | A typical application in R is to apply a vector to a function, especially via ``sapply`` and its brothers (or sisters, 391 | depending how how one sees them). 392 | 393 | Fortunately this is as easy as you would expect:: 394 | 395 | >>> conn.voidEval('double <- function(x) { x*2 }') 396 | >>> conn.r.sapply(numpy.array([1, 2, 3]), conn.r.double) 397 | array([ 2., 4., 6.]) 398 | 399 | Here a Python array and a function defined in R are provided as arguments to the R function ``sapply``. 400 | 401 | Of course the following attempt to provide a Python function as an argument into R makes no sense:: 402 | 403 | >>> def double(v): return v*2 404 | ... 405 | >>> conn.r.sapply(array([1, 2, 3]), double) 406 | Traceback (most recent call last): 407 | File "", line 1, in 408 | NameError: name 'double' is not defined 409 | 410 | This will result in a NameError error because the connector tries to reference the function 'double' inside the 411 | R namespace. It should be obvious that it is not possible to transfer function implementations from Python to R. 412 | 413 | 414 | Applying a variable already defined in R to a function 415 | ----------------------------------------------------------- 416 | 417 | To understand why this is an interesting feature one has to understand how Python and pyRserve works. The following 418 | code is pretty inefficient:: 419 | 420 | >>> conn.r.arr = numpy.array([1, 2, 3]) 421 | >>> conn.r.sapply(conn.r.arr, conn.r.double) 422 | 423 | To see why it is inefficient it is reproduced here more explicitly, but doing exactly the same thing:: 424 | 425 | >>> conn.r.arr = numpy.array([1, 2, 3]) 426 | >>> arr = conn.r.arr 427 | >>> conn.r.sapply(arr, conn.r.double) 428 | 429 | Now it is clear that the value of ``conn.r.arr`` is first set inside R, then retrieved back to Python 430 | (in the second line) and then again sent back to the ``sapply`` function. This is pretty inefficient, 431 | it would be much better just to set the array in R and then to refer to ``conn.r.arr`` instead of sending 432 | it back and forth. Here the "reference" namespace called ``ref`` comes into play:: 433 | 434 | >>> conn.ref.arr 435 | 436 | 437 | Through ``conn.ref`` it is possible to only reference a variable (or a function) in the R namespace without actually 438 | bringing it over to Python. Such a reference can then be passed as an argument to every function called 439 | from ``conn.r``. So the proper way to make the call above is:: 440 | 441 | >>> conn.r.arr = numpy.array([1, 2, 3]) 442 | >>> conn.r.sapply(conn.ref.arr, conn.r.double) 443 | 444 | However it is still possible to retrieve the actual content of a variable proxy through its ``value()`` method:: 445 | 446 | >>> conn.ref.arr.value() 447 | array([1., 2., 3.]) 448 | 449 | So using ``conn.ref`` instead of ``conn.r`` primarily returns a reference to the remote variable in the R namespace, 450 | instead of its value. Actually we have done that before with the function ``conn.r.double``. This doesn't return 451 | the R function to Python - something which would be pretty useless. Instead only a proxy to the R function is returned:: 452 | 453 | >>> conn.r.double 454 | 455 | 456 | Actually functions are always returned as proxy references, both in the ``conn.r`` and the ``conn.ref`` namespace, 457 | so ``conn.r.`` is the same as ``conn.ref.``. 458 | 459 | Using reference to R variables is indeed absolutely necessary for variable content which is not transferable into 460 | Python, like special types of R classes, complex data frames etc. 461 | 462 | 463 | Handling complex result objects from R functions 464 | --------------------------------------------------- 465 | 466 | Some functions in R (especially those doing statistical calculations) return quite complex result objects. 467 | 468 | The T-test is such an example. In the R shell you would see something like this (please ignore the silly values 469 | applied to the t test):: 470 | 471 | > t.test(c(1,2,3,1),c(1,6,7,8)) 472 | 473 | Welch Two Sample t-test 474 | 475 | data: c(1, 2, 3, 1) and c(1, 6, 7, 8) 476 | t = -2.3054, df = 3.564, p-value = 0.09053 477 | alternative hypothesis: true difference in means is not equal to 0 478 | 95 percent confidence interval: 479 | -8.4926941 0.9926941 480 | sample estimates: 481 | mean of x mean of y 482 | 1.75 5.50 483 | 484 | This is what you would get to see directly in your R shell. 485 | 486 | Now, how would this convoluted result be transferred into Python objects? For this to be possible 487 | pyRserve has defined three special classes that allow for a mapping from R to Python objects. These classes 488 | are explained the the following sections. Afterwards - with that knowledge - we have a final look at the result 489 | of the t-test again. 490 | 491 | 492 | TaggedLists 493 | ~~~~~~~~~~~~~~~~ 494 | 495 | The first special type of container is called "TaggedList". It reflects a list-type object in R where 496 | items can be accessed in two ways as shown here (this is now pure R code):: 497 | 498 | > t <- list(husband="otto", wife="erna", "5th avenue") 499 | > t[1] 500 | $husband 501 | [1] "otto" 502 | 503 | > t['husband'] 504 | $husband 505 | [1] "otto" 506 | 507 | So items in the list can be either accessed via their index position, or through their "tag". Please note that the 508 | third list item ("5th avenue") is not tagged, so it can only be accessed via its index number, i.e. ``t[3]`` 509 | (indexing in R starts at 1 and not at zero as in Python!). 510 | 511 | There is no direct match to any standard Python construct for a ``TaggedList``. Python dictionaries do not preserve 512 | their elements' order and also don't allow for missing keys (which is why an OrderDict also doesn't help). 513 | NamedTuples on the other side would do the job but don't allow items to be appended or deleted since they are 514 | immutable. 515 | 516 | The solution was to provide a special class in Python which is called ``TaggedList``. When accessing the 517 | list ``t`` from the example above you'll obtain an instance of a TaggedList in Python:: 518 | 519 | >>> t = conn.eval('list(husband="otto", wife="erna", "5th avenue")') 520 | >>> t 521 | TaggedList(husband='otto', wife='erna', '5th avenue') 522 | 523 | This ``TaggedList`` instance can be accessed in the same way as its R pendant, except for the fact the indexing is 524 | starting at zero in the usual Pythonic way:: 525 | 526 | >>> t[0] 527 | 'otto' 528 | >>> t['husband'] 529 | 'otto' 530 | >>> t[2] 531 | '5th avenue' 532 | 533 | To retrieve its data suitable for instantiating another ``TaggedList`` on the Python side get its data as a list of 534 | tuples. This also demonstrates how a ``TaggedList`` can be created directly in Python:: 535 | 536 | >>> from pyRserve import TaggedList 537 | >>> t.astuples 538 | [('husband', 'otto'), ('wife', 'erna'), (None, '5th avenue')] 539 | >>> new_tagged_list = TaggedList(t.astuples) 540 | 541 | .. NOTE:: 542 | ``TaggedList`` does not provide the full list API that one would expect, some methods are just to entirely 543 | implemented yet. However it is useful enough to retrieve all information obtained out of a R result object. 544 | 545 | 546 | AttrArrays 547 | ~~~~~~~~~~~~~~~~~ 548 | 549 | An ``AttrArray`` is simply an normal numpy array, with an additional dictionary attribute called ``attr``. 550 | This dicionary is used to store meta data associated to an array retrieved from R. 551 | 552 | Let's create such an ``AttrArray`` in R, and transfer it into to the Python side:: 553 | 554 | >>> conn.voidEval("t <- c(-8.49, 0.99)") 555 | >>> conn.voidEval("attributes(t) <- list(conf.level=0.95)") 556 | >>> conn.r.t 557 | AttrArray([-8.49, 0.99], attr={'conf.level': array([ 0.95])}) 558 | 559 | To create such an array from Python in R is also possible via:: 560 | 561 | >>> from pyRserve import AttrArray 562 | >>> conn.r.t = AttrArray.new([-8.49, 0.99], {'conf.level': numpy.array([ 0.95])}) 563 | 564 | Instead of a list argument the ``new`` function also accepts a numpy array as well:: 565 | 566 | >>> conn.r.t = AttrArray.new(numpy.array([-8.49, 0.99]), {'conf.level': numpy.array([ 0.95])}) 567 | 568 | 569 | TaggedArrays 570 | ~~~~~~~~~~~~~~~~ 571 | 572 | The third special data type provided by pyRserve is the so called ``TaggedArray``. It provides basically the same 573 | features as ``TaggedList`` above, however the underlying data type is a numpy-Array instead of a Python list. 574 | In fact, a TaggedArray is a direct subclass of ``numpy.ndarray``, enhanced with some new features 575 | like accessing array cells by name as in ``TaggedList``. 576 | 577 | For the moment ``TaggedArrays`` only make real sense if they are 1-dimensional, so please do not change 578 | its shape. The results would not really be predictable. 579 | 580 | To create a ``TaggedArray`` on the R side and transfer it to Python type: 581 | 582 | >>> res = conn.eval('c(a=1.,b=2.,3.)') 583 | >>> res 584 | TaggedArray([ 1., 2., 3.], key=['a', 'b', '']) 585 | >>> res[1] 586 | 2.0 587 | >>> res['b'] 588 | 2.0 589 | 590 | The third element in the array did not obtain a name on the R side, so it is represented by an empty string in 591 | the ``TaggedArray`` object. 592 | 593 | Although ``TaggedArray``s are normal numpy arrays they loose their tags when further processed in Python, but still 594 | present themselves (via ``__repr__``) as ``TaggedArray``. This is a current flaw in their implementation. 595 | 596 | To create a ``TaggedArray`` directly in Python there is a constructor function ``new()`` which takes a normal 597 | 1-d numpy array as the first argument and a list of tags as the second. Both arguments must match in their size:: 598 | 599 | >>> from pyRserve import TaggedArray 600 | >>> arr = TaggedArray.new(numpy.array([1, 2, 3]), ['a', 'b', '']) 601 | >>> arr 602 | TaggedArray([1, 2, 3], key=['a', 'b', '']) 603 | 604 | 605 | Back to the t-test example 606 | -------------------------------- 607 | 608 | After ``TaggedList`` and ``TaggedArray`` have been introduced we can now go back to the t-test mentioned 609 | before. Let's make the same call to the test function, this time just from the Python side, and then 610 | look at the result. Again there are two ways to call it, one via string evaluation by the R interpreter, 611 | one by directly providing native Python parameters. 612 | So:: 613 | 614 | >>> res = conn.eval('t.test(c(1,2,3,1),c(1,6,7,8))') 615 | 616 | and:: 617 | 618 | >>> res = conn.r.test(numpy.array([1,2,3,1]), numpy.array([1,6,7,8])) 619 | 620 | does actually the same thing. 621 | 622 | Looking at the result we get:: 623 | >>> res 624 | 633 | 634 | The result is an instance of a ``TaggedList``, containing different types of list items. 635 | 636 | So to access e.g. the confidence interval one would type in Python:: 637 | 638 | >>> res['conf.int'] 639 | AttrArray([-8.49269413, 0.99269413], attr={'conf.level': array([ 0.95])}) 640 | 641 | This returns an AttrArray where the confidence level is stored in an attribute called ``conf.level`` 642 | in the ``attr``-dictionary:: 643 | 644 | >>> res['conf.int'].attr['conf.level'] 645 | array([ 0.95]) 646 | 647 | In the ``res``-result data structure above there are also objects of a container called TaggedArray:: 648 | 649 | >>> res['estimate'] 650 | TaggedArray([ 1.75, 5.5 ], tags=['mean of x', 'mean of y']) 651 | >>> res['estimate'][1] 652 | 5.5 653 | >>> res['estimate']['mean of y'] 654 | 5.5 655 | 656 | Out Of Bounds messages (OOB) 657 | ---------------------------- 658 | 659 | Starting with version 1.7, Rserve allows OOB messages to be sent from R to Rserve clients, i.e. it 660 | allows for nested communication during an ``eval`` call. 661 | 662 | This capability requires to start Rserve with a configuration enabling it, and loading Rserve itself as a 663 | library into the server. Both is easily accomplished in a config file (e.g. ``oob.config``) like this:: 664 | 665 | oob enable 666 | eval library(Rserve) 667 | 668 | Then start Rserve using this config file:: 669 | 670 | R CMD Rserve --RS-conf oob.conf 671 | 672 | OOB messaging works by calling ``self.oobSend`` or ``self.oobMessage`` in R, e.g.:: 673 | 674 | >>> conn.eval('self.oobSend(1)') 675 | True 676 | 677 | This does nothing but to indicate that it works. For real usefulness, one needs to register a callback 678 | that gets called with the sent data and user code as parameters:: 679 | 680 | >>> def printoobmsg(data, code): print(data, code) 681 | ... 682 | >>> conn.oobCallback = printoobmsg 683 | >>> conn.eval('self.oobSend("foo")') # user code is 0 per default 684 | <<< foo 0 685 | True 686 | 687 | The other function, ``self.oobMessage`` executes the callback and gives its return value to R:: 688 | 689 | >>> conn.oobCallback = lambda data, code: data**code 690 | >>> conn.voidEval('dc <- self.oobMessage(2, 3)') 691 | >>> conn.r.dc 692 | 8 693 | 694 | The user code might be useful to create a callback convention used for switching callbacks based 695 | on agreed-upon codes:: 696 | 697 | >>> C_PRINT = conn.r.C_PRINT = 0 698 | >>> C_ECHO = conn.r.C_ECHO = 1 699 | >>> C_STORE = conn.r.C_STORE = 2 700 | >>> store = [] 701 | >>> functions = { 702 | ... C_PRINT: lambda data: print('<<<', data), 703 | ... C_ECHO: lambda data: data, 704 | ... C_STORE: store.append, 705 | ... } 706 | >>> def dispatch(data, code): 707 | ... return functions[code](data) 708 | >>> conn.oobCallback = dispatch 709 | >>> 710 | >>> conn.eval('self.oobMessage("foo", C_PRINT)') 711 | <<< foo 712 | >>> conn.eval('self.oobMessage("foo", C_ECHO)') 713 | 'foo' 714 | >>> conn.eval('self.oobMessage("foo", C_STORE)') 715 | >>> store 716 | ['foo'] 717 | >>> conn.eval('self.oobMessage('foo', 3)') 718 | Traceback (most recent call last): 719 | File "", line 1, in 720 | KeyError: 3 721 | 722 | 723 | An example showing how nesting of OOB messages works 724 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 725 | 726 | The previous examples were showing the bare application of OOB messages, 727 | but the real power of it comes when one understands how messages are 728 | getting nested within a ``eval`` call. 729 | 730 | For that first create an R function which returns progress information 731 | during a "complicated" calculation: 732 | 733 | >>> r_func = """ 734 | ... big_job <- function(x) 735 | ... { 736 | ... a <- x*2 737 | ... self.oobSend('25% done') 738 | ... b <- a * a 739 | ... self.oobSend('50% done') 740 | ... c <- a + b 741 | ... self.oobSend('75% done') 742 | ... d <- c**2 743 | ... self.oobSend('100% done') 744 | ... -1 * d 745 | ... }""" 746 | >>> conn.eval(r_func) 747 | 748 | Then create a progress report function, register it as a callback and 749 | then call the actual R function: 750 | 751 | >>> def progress(msg, code): print(msg) 752 | ... 753 | >>> conn.oobCallback = progress 754 | >>> res = conn.r.big_job(5) 755 | 25% done 756 | 50% done 757 | 75% done 758 | 100% done 759 | >>> res 760 | -12100.0 761 | --------------------------------------------------------------------------------