├── .gitignore ├── LICENSE ├── README.md ├── circle.yml ├── pandas_rs ├── __init__.py ├── rs.py └── version.py ├── setup.py ├── tests ├── db │ └── setup.sql └── test_rs.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | .eggs 13 | parts 14 | bin 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib 20 | lib64 21 | venv*/ 22 | pyvenv*/ 23 | 24 | # Installer logs 25 | pip-log.txt 26 | 27 | # Unit test / coverage reports 28 | .coverage 29 | .tox 30 | .coverage.* 31 | nosetests.xml 32 | coverage.xml 33 | htmlcov 34 | 35 | # Translations 36 | *.mo 37 | 38 | # Mr Developer 39 | .mr.developer.cfg 40 | .project 41 | .pydevproject 42 | .idea 43 | *.iml 44 | *.komodoproject 45 | 46 | # Complexity 47 | output/*.html 48 | output/*/index.html 49 | 50 | # Sphinx 51 | docs/_build 52 | 53 | .DS_Store 54 | *~ 55 | .*.sw[po] 56 | .build 57 | .ve 58 | .env 59 | .cache 60 | .pytest 61 | .bootstrap 62 | .appveyor.token 63 | *.bak 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Tatsuro Yasukawa 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Circle CI](https://circleci.com/gh/SamuraiT/pandas-rs.svg?style=svg)](https://circleci.com/gh/SamuraiT/pandas-rs) 2 | 3 | # pandas-rs 4 | 5 | pandas-rs is oirginally designed for RedShift but 6 | also works for PostgreSQL. Inspired by [pandas-td](https://github.com/treasure-data/pandas-td) 7 | 8 | I may should have made the package name as pandas-pg, since It also works for 9 | PostgreSQL. 10 | 11 | # requirement 12 | To connect PostgreSQL and RedShift, you have to have the PostgreSQL client. 13 | 14 | ### Mac 15 | 16 | ~~~ 17 | brew update 18 | brew install postgresql 19 | ~~~ 20 | 21 | # installation 22 | 23 | ~~~ 24 | pip install pandas-rs 25 | ~~~ 26 | 27 | 28 | # Usage 29 | 30 | As I mentioned, above pandas-rs works for PostgreSQL as well. 31 | First export password via shell(recommended) 32 | 33 | ~~~shell 34 | export REDSHIFT_OR_POSTGRESQL_PASSWORD='password' 35 | ~~~ 36 | 37 | ~~~py 38 | import pandas_rs as rs 39 | import os # use only if you will access password through environment variables 40 | 41 | rs.create_engine( 42 | dbname='dev', 43 | user='test', 44 | password=os.environ['REDSHIFT_OR_POSTGRESQL_PASSWORD'], 45 | host='foobar.redshift.exmple', 46 | port='5439' 47 | ) 48 | 49 | 50 | print(rs.read_sql("""select 'hello PostgreSQL or redshift' greeting""")) 51 | ~~~ 52 | 53 | result 54 | 55 | ~~~py 56 | greeting 57 | 0 hello PostgreSQL or redshift 58 | ~~~ 59 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | machine: 2 | python: 3 | version: 3.4.3 4 | -------------------------------------------------------------------------------- /pandas_rs/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__ 2 | from .rs import ( 3 | create_engine, 4 | read_sql 5 | ) 6 | -------------------------------------------------------------------------------- /pandas_rs/rs.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | import pandas as pd 3 | 4 | class Redshift(object): 5 | """ 6 | Redshift client which connect to redshfit database. 7 | Furthermore, you can read sql from Redshift and 8 | returns the reuslt with pandas dataframe structure 9 | """ 10 | 11 | def __init__(self, config=None): 12 | self.config = config 13 | self.con_pg = None 14 | if config is not None: 15 | self.con_pg = self.connect(config=config) 16 | 17 | def create_engine(self, dbname, user, password, host, port): 18 | self.config = dict( 19 | dbname=dbname, 20 | user=user, 21 | password=password, 22 | host=host, 23 | port=port 24 | ) 25 | self.con_pg = self.connect(config=self.config) 26 | 27 | def connect(self, *args,**kwargs): 28 | config = kwargs['config'] 29 | try: 30 | con_pg=psycopg2.connect( 31 | dbname=config['dbname'], 32 | host=config['host'], 33 | port=config['port'], 34 | user=config['user'], 35 | password=config['password'] 36 | ) 37 | return con_pg 38 | except Exception as err: 39 | print(err) 40 | 41 | def read_sql(self, sql, index_col=None, columns=None, count=0): 42 | return pd.read_sql(sql, self.con_pg, index_col, columns=columns) 43 | 44 | class RedshiftConfigurationError(Exception): 45 | 46 | def __init__(self, 47 | expr="ConfigError", 48 | msg="Config does not exists. create engine"): 49 | self.expr = expr 50 | self.msg = msg 51 | 52 | def __str__(self): 53 | return "{} {}".format(self.expr, self.msg) 54 | 55 | 56 | class RedshiftConnectionError(Exception): 57 | """Exception raised for errors in the Redshift connection. 58 | 59 | Attributes: 60 | expr -- input expression in which the error occurred 61 | msg -- explanation of the error 62 | """ 63 | 64 | def __init__(self, expr, msg="Failed to connect"): 65 | self.expr = expr 66 | self.msg = msg 67 | 68 | def __str__(self): 69 | return "{} {}".format(self.expr, self.msg) 70 | 71 | 72 | def create_engine(dbname, user, password, host, port): 73 | rs.create_engine(dbname, user, password, host, port) 74 | 75 | def read_sql(sql, index_col=None, columns=None): 76 | if rs.con_pg is None: raise RedshiftConfigurationError() 77 | return rs.read_sql(sql, index_col, columns) 78 | 79 | # HACK:FIX. 80 | # this code is really ugly 81 | # I did so because I wanted to store the configuration, and 82 | # wanted users to use `read_sql` without setting cursor everytime they access to DB 83 | rs = Redshift() 84 | -------------------------------------------------------------------------------- /pandas_rs/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.6" 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup 3 | except ImportError: 4 | from distutils.core import setup 5 | 6 | import re 7 | import os 8 | 9 | def pandas_rs_version(): 10 | version_regex = re.compile(r'__version__ = "([^\"]*)"') 11 | return version_regex.match( 12 | open('pandas_rs/version.py').read() 13 | ).group(1) 14 | 15 | def read_file(filename): 16 | filepath = os.path.join( 17 | os.path.dirname( 18 | os.path.dirname(__file__) 19 | ), 20 | filename 21 | ) 22 | if os.path.exists(filepath): 23 | return open(filepath).read() 24 | else: 25 | return '' 26 | 27 | setup( 28 | name = "pandas-rs", 29 | packages = ["pandas_rs"], 30 | install_requires = ["pandas", "psycopg2"], 31 | version = pandas_rs_version(), 32 | description = "pandas extension for PostgreSQL and AWS RedShift (Not Officail library)", 33 | author = "Tatsuro Yasukawa", 34 | author_email = "t.yasukawa01@gmail.com", 35 | url = "https://github.com/SamuraiT/pandas-rs", 36 | download_url = "", 37 | keywords = ["pandas", "PostgreSQL","RedShift", "sql"], 38 | classifiers = [ 39 | "Programming Language :: Python", 40 | "Programming Language :: Python :: 3", 41 | "Development Status :: 4 - Beta", 42 | "Environment :: Other Environment", 43 | "Intended Audience :: Developers", 44 | "License :: OSI Approved :: MIT License", 45 | "Topic :: Software Development :: Libraries :: Python Modules", 46 | ], 47 | long_description = read_file('README.md') 48 | ) -------------------------------------------------------------------------------- /tests/db/setup.sql: -------------------------------------------------------------------------------- 1 | drop database if exists test_pandas_rs_db; 2 | drop role if exists test_pandas_rs; 3 | create database test_pandas_rs_db; 4 | create role test_pandas_rs superuser; 5 | alter role test_pandas_rs with login; 6 | -------------------------------------------------------------------------------- /tests/test_rs.py: -------------------------------------------------------------------------------- 1 | import pandas_rs as rs 2 | import pandas as pd 3 | from pandas.util.testing import assert_frame_equal 4 | 5 | class TestPandasRs(object): 6 | 7 | def test_create_engine(self): 8 | assert None is rs.create_engine( 9 | dbname='test_pandas_rs_db', 10 | user='test_pandas_rs', 11 | password='', 12 | host='localhost', 13 | port='5432' 14 | ) 15 | 16 | def test_read_sql(self): 17 | expected = pd.DataFrame(['hello'], columns=['test']) 18 | assert_frame_equal(rs.read_sql("select 'hello' test"), expected) 19 | 20 | 21 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py34, py278 3 | [testenv] 4 | whitelist_externals = 5 | psql 6 | pytest 7 | deps = 8 | pytest 9 | commands = 10 | psql -f tests/db/setup.sql 11 | py.test -v tests/test_rs.py 12 | --------------------------------------------------------------------------------