├── .gitignore
├── .travis.yml
├── LICENSE
├── LICENSES
    └── PANDAS_LICENSE
├── MANIFEST.in
├── README.rst
├── doc
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── api.rst
    │   ├── conf.py
    │   ├── index.rst
    │   ├── jpdatareader.rst
    │   ├── jpdatetime.rst
    │   └── jpstrings.rst
├── japandas
    ├── __init__.py
    ├── compat.py
    ├── core
    │   ├── __init__.py
    │   ├── strings.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   └── test_strings.py
    ├── io
    │   ├── __init__.py
    │   ├── data.py
    │   ├── estat.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   └── test_estat.py
    ├── tools
    │   ├── __init__.py
    │   ├── plotting.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   └── test_plotting.py
    ├── tseries
    │   ├── __init__.py
    │   ├── data
    │   │   ├── holidays.pkl
    │   │   └── tseholidays.pkl
    │   ├── holiday.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_holiday.py
    │   │   └── test_tools.py
    │   └── tools.py
    └── version.py
├── requirements.txt
├── requirements_test.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # PyInstaller
26 | #  Usually these files are written by a python script from a template
27 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 | 
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 | 
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .cache
40 | nosetests.xml
41 | coverage.xml
42 | 
43 | # Translations
44 | *.mo
45 | *.pot
46 | 
47 | # Django stuff:
48 | *.log
49 | 
50 | # Sphinx documentation
51 | docs/_build/
52 | 
53 | # PyBuilder
54 | target/
55 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | env:
 3 |   global:
 4 |     secure: dlKlpVXGzCuN1Wc0h48HeT3EYjND2erX6hPeya/c6SR9LTN+ybcWGyZrHDtybOaRw28REVIXqH3unkVtX/X4d46U/tmRcC/Fy7wInhOS82yuFRCB8TinQkpeqRDZuU+HiW737uzmYf3U37NZnm2VGCsvVXUAoZe6/aLgTXMHP6U=
 5 |   matrix:
 6 |     - PYTHON=2.7 PANDAS=0.20.2
 7 |     - PYTHON=2.7 PANDAS=0.23.2
 8 |     - PYTHON=3.5 PANDAS=0.21.1
 9 |     - PYTHON=3.6 PANDAS=0.22.0
10 |     - PYTHON=3.6 PANDAS=0.23.2 COVERAGE=true
11 | 
12 | install:
13 |   - if [[ "$PYTHON" == "2.7" ]]; then
14 |       wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
15 |     else
16 |       wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
17 |     fi
18 |   - bash miniconda.sh -b -p $HOME/miniconda
19 |   - export PATH="$HOME/miniconda/bin:$PATH"
20 |   - hash -r
21 |   - conda config --set always_yes yes --set changeps1 no
22 |   - conda config --add channels pandas
23 |   - conda update -q conda
24 |   - conda info -a
25 |   - conda create -q -n test-environment python=$PYTHON pip numpy scipy pandas=$PANDAS nose matplotlib requests
26 |   - source activate test-environment
27 |   - "pip install -r requirements_test.txt"
28 |   - pip install flake8 pytest pytest-cov codecov
29 | 
30 | script:
31 |   - if [[ "$PYTHON" == "2.7" ]]; then
32 |       nosetests -v -s;
33 |     else
34 |       nosetests -v -s --with-coverage --cover-package=japandas;
35 |     fi
36 |   - flake8 --ignore E501 japandas
37 | 
38 | after_success:
39 |   - if [ "$COVERAGE" ]; then
40 |       codecov;
41 |     fi
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Sinhrks
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of japandas nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/LICENSES/PANDAS_LICENSE:
--------------------------------------------------------------------------------
 1 | =======
 2 | License
 3 | =======
 4 | 
 5 | pandas is distributed under a 3-clause ("Simplified" or "New") BSD
 6 | license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
 7 | BSD-compatible licenses, are included. Their licenses follow the pandas
 8 | license.
 9 | 
10 | pandas license
11 | ==============
12 | 
13 | Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
14 | All rights reserved.
15 | 
16 | Copyright (c) 2008-2011 AQR Capital Management, LLC
17 | All rights reserved.
18 | 
19 | Redistribution and use in source and binary forms, with or without
20 | modification, are permitted provided that the following conditions are
21 | met:
22 | 
23 |     * Redistributions of source code must retain the above copyright
24 |        notice, this list of conditions and the following disclaimer.
25 | 
26 |     * Redistributions in binary form must reproduce the above
27 |        copyright notice, this list of conditions and the following
28 |        disclaimer in the documentation and/or other materials provided
29 |        with the distribution.
30 | 
31 |     * Neither the name of the copyright holder nor the names of any
32 |        contributors may be used to endorse or promote products derived
33 |        from this software without specific prior written permission.
34 | 
35 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
36 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
37 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
38 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
39 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
45 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 | 
47 | About the Copyright Holders
48 | ===========================
49 | 
50 | AQR Capital Management began pandas development in 2008. Development was
51 | led by Wes McKinney. AQR released the source under this license in 2009.
52 | Wes is now an employee of Lambda Foundry, and remains the pandas project
53 | lead.
54 | 
55 | The PyData Development Team is the collection of developers of the PyData
56 | project. This includes all of the PyData sub-projects, including pandas. The
57 | core team that coordinates development on GitHub can be found here:
58 | http://github.com/pydata.
59 | 
60 | Full credits for pandas contributors can be found in the documentation.
61 | 
62 | Our Copyright Policy
63 | ====================
64 | 
65 | PyData uses a shared copyright model. Each contributor maintains copyright
66 | over their contributions to PyData. However, it is important to note that
67 | these contributions are typically only changes to the repositories. Thus,
68 | the PyData source code, in its entirety, is not the copyright of any single
69 | person or institution. Instead, it is the collective copyright of the
70 | entire PyData Development Team. If individual contributors want to maintain
71 | a record of what changes/contributions they have specific copyright on,
72 | they should indicate their copyright in the commit message of the change
73 | when they commit the change to one of the PyData repositories.
74 | 
75 | With this in mind, the following banner should be used in any source code
76 | file to indicate the copyright and license terms:
77 | 
78 | #-----------------------------------------------------------------------------
79 | # Copyright (c) 2012, PyData Development Team
80 | # All rights reserved.
81 | #
82 | # Distributed under the terms of the BSD Simplified License.
83 | #
84 | # The full license is in the LICENSE file, distributed with this software.
85 | #-----------------------------------------------------------------------------
86 | 
87 | Other licenses can be found in the LICENSES directory.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include MANIFEST.in
2 | include LICENSES/*
3 | include README.rst
4 | include requirements.txt
5 | include setup.py


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | japandas
  2 | ========
  3 | 
  4 | .. image:: https://img.shields.io/pypi/v/japandas.svg
  5 |   :target: https://pypi.python.org/pypi/japandas/
  6 | .. image:: https://readthedocs.org/projects/japandas/badge/?version=latest
  7 |   :target: http://japandas.readthedocs.org/en/latest/
  8 |   :alt: Latest Docs
  9 | .. image:: https://travis-ci.org/sinhrks/japandas.svg?branch=master
 10 |   :target: https://travis-ci.org/sinhrks/japandas
 11 | .. image:: https://codecov.io/gh/sinhrks/japandas/branch/master/graph/badge.svg
 12 |   :target: https://codecov.io/gh/sinhrks/japandas
 13 | 
 14 | Overview
 15 | ~~~~~~~~
 16 | 
 17 | pandas Japanese extension.
 18 | 
 19 | pandas の日本語拡張。以下の機能を提供する。
 20 | 
 21 | - 日本語の日付のパース
 22 | - 日本の祝日カレンダーと、それを利用した営業日計算
 23 | - 文字列の全角/半角変換
 24 | - e-Stat からのデータの取得
 25 | - ローソク足チャート
 26 | 
 27 | **補足** このパッケージでは、"日本固有の機能であり本流に実装される可能性が低いもの", もしくは"それらに関係し本流に実装される可能性が低いもの" を実装 / メンテナンスする。
 28 | 
 29 | 
 30 | インストール
 31 | ~~~~~~~~~~
 32 | 
 33 | .. code-block:: sh
 34 | 
 35 |     pip install japandas
 36 | 
 37 | ドキュメント
 38 | ~~~~~~~~~~
 39 | 
 40 | - 開発版: http://japandas.readthedocs.org/en/latest/
 41 | - リリース版: http://japandas.readthedocs.org/en/stable/
 42 | 
 43 | 機能概要
 44 | ~~~~~~~
 45 | 
 46 | 日本語の日付のパース
 47 | ,,,,,,,,,,,,,,,,,
 48 | 
 49 | .. code-block:: python
 50 | 
 51 |     >>> import japandas as jpd
 52 |     >>> jpd.to_datetime('2014年11月30日')
 53 |     Timestamp('2014-11-30 00:00:00')
 54 | 
 55 |     >>> jpd.to_datetime(['2014年11月30日13時25分', '2014年11月30日14時38分'])
 56 |     <class 'pandas.tseries.index.DatetimeIndex'>
 57 |     [2014-11-30 13:25:00, 2014-11-30 14:38:00]
 58 |     Length: 2, Freq: None, Timezone: None
 59 | 
 60 |     >>> jpd.date_range(start=u'2013年12月01日', end=u'2014年12月01日', freq='D')
 61 |     <class 'pandas.tseries.index.DatetimeIndex'>
 62 |     [2013-12-01, ..., 2014-12-01]
 63 |     Length: 366, Freq: D, Timezone: None
 64 | 
 65 | 
 66 | 日本の祝日カレンダーと、それを利用した営業日計算
 67 | ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
 68 | 
 69 | .. code-block:: python
 70 | 
 71 |     >>> import pandas as pd
 72 |     >>> import datetime
 73 | 
 74 |     >>> calendar = jpd.JapaneseHolidayCalendar()
 75 |     >>> cday = pd.offsets.CDay(calendar=calendar)
 76 | 
 77 |     >>> datetime.datetime(2014, 4, 28) + cday
 78 |     # 4/29は祝日(昭和の日)
 79 |     Timestamp('2014-04-30 00:00:00')
 80 | 
 81 |     >>> datetime.datetime(2014, 4, 28) - cday
 82 |     # 4/26は土曜日, 4/27は日曜日
 83 |     Timestamp('2014-04-25 00:00:00')
 84 | 
 85 |     >>> datetime.datetime(2014, 5, 3) + cday
 86 |     # 5/4は日曜日, 5/5は祝日(こどもの日), 5/6は祝日(みどりの日/振替休日)
 87 |     Timestamp('2014-05-07 00:00:00')
 88 | 
 89 |     >>> datetime.datetime(2014, 5, 3) - cday
 90 |     # 5/3は土曜日
 91 |     Timestamp('2014-05-02 00:00:00')
 92 | 
 93 |     # 適当なデータを作成
 94 |     >>> df = pd.DataFrame(np.random.randn(10, 3),
 95 |     ...                   index=jpd.date_range(u'2014年5月1日', u'2014年5月10日', freq='D'))
 96 |     >>> df
 97 |                        0         1         2
 98 |     2014-05-01  0.762453 -1.418762 -0.150073
 99 |     2014-05-02  0.966500 -0.473888  0.272871
100 |     2014-05-03  0.473370 -1.282504  0.380449
101 |     2014-05-04  0.215411  0.220587 -1.088699
102 |     2014-05-05  0.286348 -1.069165 -1.471871
103 |     2014-05-06 -0.665438 -0.402046 -1.008051
104 |     2014-05-07  1.173935  2.080087 -2.279285
105 |     2014-05-08 -0.957195  0.746798  0.092214
106 |     2014-05-09 -0.259276 -0.775489  0.572525
107 |     2014-05-10 -0.910188  0.294136  0.020730
108 | 
109 |     >>> cday = pd.offsets.CDay(calendar=calendar)
110 |     >>> indexer = jpd.date_range(u'2014年5月1日', u'2014年5月10日', freq=cday)
111 | 
112 |     # カレンダー上 営業日のレコードを抽出
113 |     >>> df.ix[indexer]
114 |                        0         1         2
115 |     2014-05-01  0.762453 -1.418762 -0.150073
116 |     2014-05-02  0.966500 -0.473888  0.272871
117 |     2014-05-07  1.173935  2.080087 -2.279285
118 |     2014-05-08 -0.957195  0.746798  0.092214
119 |     2014-05-09 -0.259276 -0.775489  0.572525
120 | 
121 | 
122 | 全角/半角変換
123 | ,,,,,,,,,,,
124 | 
125 | .. code-block:: python
126 | 
127 |    >>> s = pd.Series([u'ｱｲｳｴｵ', u'ABC01', u'DE345'])
128 |    >>> z = s.str.h2z()
129 |    >>> z
130 |    0    アイウエオ
131 |    1    ＡＢＣ０１
132 |    2    ＤＥ３４５
133 |    dtype: object
134 | 
135 |    >>> z.str.z2h()
136 |    0    ｱｲｳｴｵ
137 |    1    ABC01
138 |    2    DE345
139 |    dtype: object
140 | 
141 | e-Stat からの統計情報取得
142 | ,,,,,,,,,,,,,,,,,,,,,,,
143 | 
144 | .. code-block:: python
145 | 
146 |     >>> key = "your application id"
147 |     >>> df = jpd.DataReader("0000030001", 'estat', appid=key)
148 |     >>> df.head()
149 |                  value 全国都道府県030001 全域・集中の別030002 年齢５歳階級Ａ030002 男女Ａ030001
150 |     時間軸(年次)
151 |     1980年    117060396           全国            全域            総数      男女総数
152 |     1980年     89187409         全国市部            全域            総数      男女総数
153 |     1980年     27872987         全国郡部            全域            総数      男女総数
154 |     1980年      5575989          北海道            全域            総数      男女総数
155 |     1980年      1523907          青森県            全域            総数      男女総数
156 | 
157 | 
158 | ローソク足チャート
159 | ,,,,,,,,,,,,,,,,,
160 | 
161 | .. code-block:: python
162 | 
163 |     >>> df.plot(kind='ohlc')
164 |     チャート省略
165 | 
166 | 
167 | License
168 | ~~~~~~~
169 | 
170 | BSD.
171 | 
172 | 日本の祝日データソースとして以下を利用。
173 | 
174 | - `komagata/holiday_jp <https://github.com/komagata/holiday_jp>`_
175 | 
176 |   Copyright (c) 2009 Masaki Komagata. See `LICENSE <https://github.com/komagata/holiday_jp/blob/master/LICENSE>`_ for details.
177 | 
178 | - `holiday_jp <https://github.com/holiday-jp/holiday_jp>`_
179 | 
180 |   MIT.
181 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/japandas.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/japandas.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/japandas"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/japandas"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\japandas.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\japandas.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/doc/source/api.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | API Reference
 3 | =============
 4 | 
 5 | .. _api.functions:
 6 | 
 7 | Base
 8 | ~~~~
 9 | 
10 | .. autosummary::
11 |    :toctree: generated/
12 | 
13 |    DataStore.is_valid
14 |    DataStore.get
15 |    DataStore.search
16 |    DataResource.read
17 | 
18 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # japandas documentation build configuration file, created by
  4 | # sphinx-quickstart on Sun Feb  8 19:30:56 2015.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | #sys.path.insert(0, os.path.abspath('.'))
 22 | 
 23 | # -- General configuration ------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | #needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be
 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 30 | # ones.
 31 | extensions = [
 32 |     'sphinx.ext.autodoc',
 33 |     'sphinx.ext.doctest',
 34 |     'sphinx.ext.intersphinx',
 35 |     'sphinx.ext.todo',
 36 |     'sphinx.ext.coverage',
 37 |     'sphinx.ext.viewcode']
 38 | 
 39 | # Add any paths that contain templates here, relative to this directory.
 40 | templates_path = ['_templates']
 41 | 
 42 | # The suffix of source filenames.
 43 | source_suffix = '.rst'
 44 | 
 45 | # The encoding of source files.
 46 | source_encoding = 'utf-8-sig'
 47 | 
 48 | # The master toctree document.
 49 | master_doc = 'index'
 50 | 
 51 | # General information about the project.
 52 | project = u'japandas'
 53 | copyright = u'2015, sinhrks'
 54 | 
 55 | # The version info for the project you're documenting, acts as replacement for
 56 | # |version| and |release|, also used in various other places throughout the
 57 | # built documents.
 58 | #
 59 | # The short X.Y version.
 60 | version = '0.0.1'
 61 | # The full version, including alpha/beta/rc tags.
 62 | release = '0.0.1'
 63 | 
 64 | # The language for content autogenerated by Sphinx. Refer to documentation
 65 | # for a list of supported languages.
 66 | #language = None
 67 | 
 68 | # There are two options for replacing |today|: either, you set today to some
 69 | # non-false value, then it is used:
 70 | #today = ''
 71 | # Else, today_fmt is used as the format for a strftime call.
 72 | #today_fmt = '%B %d, %Y'
 73 | 
 74 | # List of patterns, relative to source directory, that match files and
 75 | # directories to ignore when looking for source files.
 76 | exclude_patterns = []
 77 | 
 78 | # The reST default role (used for this markup: `text`) to use for all
 79 | # documents.
 80 | #default_role = None
 81 | 
 82 | # If true, '()' will be appended to :func: etc. cross-reference text.
 83 | #add_function_parentheses = True
 84 | 
 85 | # If true, the current module name will be prepended to all description
 86 | # unit titles (such as .. function::).
 87 | #add_module_names = True
 88 | 
 89 | # If true, sectionauthor and moduleauthor directives will be shown in the
 90 | # output. They are ignored by default.
 91 | #show_authors = False
 92 | 
 93 | # The name of the Pygments (syntax highlighting) style to use.
 94 | pygments_style = 'sphinx'
 95 | 
 96 | # A list of ignored prefixes for module index sorting.
 97 | #modindex_common_prefix = []
 98 | 
 99 | # If true, keep warnings as "system message" paragraphs in the built documents.
100 | #keep_warnings = False
101 | 
102 | 
103 | # -- Options for HTML output ----------------------------------------------
104 | 
105 | # The theme to use for HTML and HTML Help pages.  See the documentation for
106 | # a list of builtin themes.
107 | html_theme = 'default'
108 | 
109 | # Theme options are theme-specific and customize the look and feel of a theme
110 | # further.  For a list of options available for each theme, see the
111 | # documentation.
112 | #html_theme_options = {}
113 | 
114 | # Add any paths that contain custom themes here, relative to this directory.
115 | #html_theme_path = []
116 | 
117 | # The name for this set of Sphinx documents.  If None, it defaults to
118 | # "<project> v<release> documentation".
119 | #html_title = None
120 | 
121 | # A shorter title for the navigation bar.  Default is the same as html_title.
122 | #html_short_title = None
123 | 
124 | # The name of an image file (relative to this directory) to place at the top
125 | # of the sidebar.
126 | #html_logo = None
127 | 
128 | # The name of an image file (within the static path) to use as favicon of the
129 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
130 | # pixels large.
131 | #html_favicon = None
132 | 
133 | # Add any paths that contain custom static files (such as style sheets) here,
134 | # relative to this directory. They are copied after the builtin static files,
135 | # so a file named "default.css" will overwrite the builtin "default.css".
136 | html_static_path = ['_static']
137 | 
138 | # Add any extra paths that contain custom files (such as robots.txt or
139 | # .htaccess) here, relative to this directory. These files are copied
140 | # directly to the root of the documentation.
141 | #html_extra_path = []
142 | 
143 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
144 | # using the given strftime format.
145 | #html_last_updated_fmt = '%b %d, %Y'
146 | 
147 | # If true, SmartyPants will be used to convert quotes and dashes to
148 | # typographically correct entities.
149 | #html_use_smartypants = True
150 | 
151 | # Custom sidebar templates, maps document names to template names.
152 | #html_sidebars = {}
153 | 
154 | # Additional templates that should be rendered to pages, maps page names to
155 | # template names.
156 | #html_additional_pages = {}
157 | 
158 | # If false, no module index is generated.
159 | #html_domain_indices = True
160 | 
161 | # If false, no index is generated.
162 | #html_use_index = True
163 | 
164 | # If true, the index is split into individual pages for each letter.
165 | #html_split_index = False
166 | 
167 | # If true, links to the reST sources are added to the pages.
168 | #html_show_sourcelink = True
169 | 
170 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
171 | #html_show_sphinx = True
172 | 
173 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
174 | #html_show_copyright = True
175 | 
176 | # If true, an OpenSearch description file will be output, and all pages will
177 | # contain a <link> tag referring to it.  The value of this option must be the
178 | # base URL from which the finished HTML is served.
179 | #html_use_opensearch = ''
180 | 
181 | # This is the file name suffix for HTML files (e.g. ".xhtml").
182 | #html_file_suffix = None
183 | 
184 | # Output file base name for HTML help builder.
185 | htmlhelp_basename = 'japandasdoc'
186 | 
187 | 
188 | # -- Options for LaTeX output ---------------------------------------------
189 | 
190 | latex_elements = {
191 | # The paper size ('letterpaper' or 'a4paper').
192 | #'papersize': 'letterpaper',
193 | 
194 | # The font size ('10pt', '11pt' or '12pt').
195 | #'pointsize': '10pt',
196 | 
197 | # Additional stuff for the LaTeX preamble.
198 | #'preamble': '',
199 | }
200 | 
201 | # Grouping the document tree into LaTeX files. List of tuples
202 | # (source start file, target name, title,
203 | #  author, documentclass [howto, manual, or own class]).
204 | latex_documents = [
205 |   ('index', 'japandas.tex', u'japandas Documentation',
206 |    u'sinhrks', 'manual'),
207 | ]
208 | 
209 | # The name of an image file (relative to this directory) to place at the top of
210 | # the title page.
211 | #latex_logo = None
212 | 
213 | # For "manual" documents, if this is true, then toplevel headings are parts,
214 | # not chapters.
215 | #latex_use_parts = False
216 | 
217 | # If true, show page references after internal links.
218 | #latex_show_pagerefs = False
219 | 
220 | # If true, show URL addresses after external links.
221 | #latex_show_urls = False
222 | 
223 | # Documents to append as an appendix to all manuals.
224 | #latex_appendices = []
225 | 
226 | # If false, no module index is generated.
227 | #latex_domain_indices = True
228 | 
229 | 
230 | # -- Options for manual page output ---------------------------------------
231 | 
232 | # One entry per manual page. List of tuples
233 | # (source start file, name, description, authors, manual section).
234 | man_pages = [
235 |     ('index', 'japandas', u'japandas Documentation',
236 |      [u'sinhrks'], 1)
237 | ]
238 | 
239 | # If true, show URL addresses after external links.
240 | #man_show_urls = False
241 | 
242 | 
243 | # -- Options for Texinfo output -------------------------------------------
244 | 
245 | # Grouping the document tree into Texinfo files. List of tuples
246 | # (source start file, target name, title, author,
247 | #  dir menu entry, description, category)
248 | texinfo_documents = [
249 |   ('index', 'japandas', u'japandas Documentation',
250 |    u'sinhrks', 'japandas', 'One line description of project.',
251 |    'Miscellaneous'),
252 | ]
253 | 
254 | # Documents to append as an appendix to all manuals.
255 | #texinfo_appendices = []
256 | 
257 | # If false, no module index is generated.
258 | #texinfo_domain_indices = True
259 | 
260 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
261 | #texinfo_show_urls = 'footnote'
262 | 
263 | # If true, do not generate a @detailmenu in the "Top" node's menu.
264 | #texinfo_no_detailmenu = False
265 | 
266 | 
267 | # Example configuration for intersphinx: refer to the Python standard library.
268 | intersphinx_mapping = {'http://docs.python.org/': None}
269 | 
270 | 
271 | # -- Build API doc ----------------------------------------------------------
272 | 
273 | numpydoc_show_class_members = False
274 | 
275 | fpath = os.path.dirname(__file__)
276 | gen_path = os.path.join(fpath, 'generated')
277 | app_path = os.path.join(os.path.dirname(os.path.dirname(fpath)), 'japandas')
278 | os.system('sphinx-apidoc -f -o {0} {1}'.format(gen_path, app_path))
279 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. japandas documentation master file, created by
 2 |    sphinx-quickstart on Sun Feb  8 19:30:56 2015.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | japandas ドキュメント
 7 | =======================
 8 | 
 9 | 目次:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    jpdatetime
15 |    jpstrings
16 |    jpdatareader
17 | 
18 | 
19 | API:
20 | 
21 | .. toctree::
22 |    :maxdepth: 2
23 | 
24 |    generated/japandas.core
25 |    generated/japandas.io
26 |    generated/japandas.tools
27 |    generated/japandas.tseries
28 | 


--------------------------------------------------------------------------------
/doc/source/jpdatareader.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | リモートデータアクセス
 3 | ======================
 4 | 
 5 | ``jpd.DataReader`` を利用すると、国内のデータソースの情報を ``DataFrame`` として取得できます。以下のデータソースをサポートしています。
 6 | 
 7 | - ``estat``: e-Stat API から統計情報を取得。
 8 | - ほか、``pd.DataReader`` でサポートしているデータソース
 9 | 
10 | 利用できるオプションは ``pd.DataReader`` とほぼ同一です。ここでは、差異がある点のみ記載します。
11 | 
12 | e-Stat API
13 | ----------
14 | 
15 | 以下のコードに対応するデータを取得できます。
16 | 
17 | - 政府統計コード (8桁): `e-Stat API 提供データ <http://www.e-stat.go.jp/api/api-data/>`_ から確認できる 8桁のコードです。取得したデータには、各統計に付随する "統計表ID" が含まれます。
18 | - 統計表ID: 実データを含む表のIDです。
19 | 
20 | まず、取得したいデータを含む政府統計コードから統計表IDの一覧を取得します。
21 | 
22 | .. code-block:: python
23 | 
24 |     >>> key = "your application id"
25 |     >>> dlist = jpd.DataReader("00200521", 'estat', appid=key)
26 |     >>> dlist.head()
27 |             統計表ID 政府統計名 作成機関名           提供統計名及び提供分類名  \
28 |     0  0000030001  国勢調査   総務省  昭和55年国勢調査 第1次基本集計 全国編
29 |     1  0000030002  国勢調査   総務省  昭和55年国勢調査 第1次基本集計 全国編
30 |     2  0000030003  国勢調査   総務省  昭和55年国勢調査 第1次基本集計 全国編
31 |     3  0000030004  国勢調査   総務省  昭和55年国勢調査 第1次基本集計 全国編
32 |     4  0000030005  国勢調査   総務省  昭和55年国勢調査 第1次基本集計 全国編
33 | 
34 |                                               統計表題名及び表番号 提供周期    調査年月        公開日  \
35 |     0  男女の別（性別）（３），年齢５歳階級（２３），人口 全国・市部・郡部・都道府県（４７），全域...    -  198010 2007-10-05
36 |     1  男女の別（性別）（３），年齢各歳階級（１０３），人口 全国・市部・郡部・都道府県（４７），全...    -  198010 2007-10-05
37 |     2  総人口・日本人（２），男女の別（性別）（２），年齢各歳階級（１２２），出生の月（５），人口 ...    -  198010 2007-10-05
38 |     3  総人口・日本人（２），男女の別（性別）（２），年齢各歳階級（８６），配偶関係（５），１５歳以...    -  198010 2007-10-05
39 |     4  男女の別（性別）（３），年齢５歳階級（１９），国籍（５），外国人数 全国・市部・郡部・都道府...    -  198010 2007-10-05
40 | 
41 |       小地域属性フラグ 統計大分野名 統計小分野名     総件数      最終更新日
42 |     0        0  人口・世帯     人口    3651 2008-03-19
43 |     1        0  人口・世帯     人口   16365 2008-03-19
44 |     2        0  人口・世帯     人口  123782 2008-03-19
45 |     3        0  人口・世帯     人口   85848 2008-03-19
46 |     4        0  人口・世帯     人口   13082 2008-03-19
47 | 
48 | 結果から取得したい統計表IDを確認し、実データを取得します。複数のデータをまとめて取得したい場合は
49 | そのリストを渡します。
50 | 
51 | .. code-block:: python
52 | 
53 |     >>> df = jpd.DataReader("0000030001", 'estat', appid=key)
54 |     >>> df.head()
55 |                     value 全国都道府県030001 全域・集中の別030002 年齢５歳階級Ａ030002 男女Ａ030001
56 |     時間軸(年次)
57 |     1980-01-01  117060396           全国            全域            総数      男女総数
58 |     1980-01-01   89187409         全国市部            全域            総数      男女総数
59 |     1980-01-01   27872987         全国郡部            全域            総数      男女総数
60 |     1980-01-01    5575989          北海道            全域            総数      男女総数
61 |     1980-01-01    1523907          青森県            全域            総数      男女総数
62 | 
63 |     >>> df = jpd.DataReader(["0000030001", "0000030002"], 'estat', appid=key)
64 |     >>> df.head()
65 |                     value 全国都道府県030001 全域・集中の別030002 年齢各歳階級Ｂ030003 年齢５歳階級Ａ030002  \
66 |     時間軸(年次)
67 |     1980-01-01  117060396           全国            全域           NaN            総数
68 |     1980-01-01   89187409         全国市部            全域           NaN            総数
69 |     1980-01-01   27872987         全国郡部            全域           NaN            総数
70 |     1980-01-01    5575989          北海道            全域           NaN            総数
71 |     1980-01-01    1523907          青森県            全域           NaN            総数
72 | 
73 |                男女Ａ030001
74 |     時間軸(年次)
75 |     1980-01-01      男女総数
76 |     1980-01-01      男女総数
77 |     1980-01-01      男女総数
78 |     1980-01-01      男女総数
79 |     1980-01-01      男女総数
80 | 
81 | 
82 | e-Statでは、一度のリクエストで10万件のレコードまで取得できます。
83 | 取得するレコード数は ``limit`` キーワードで変更できます。
84 | 
85 | .. code-block:: python
86 | 
87 |     >>> df = jpd.DataReader("0003280394", 'estat', appid=key, limit=100)
88 | 
89 | 
90 | 取得するレコードの開始位置は ``startPosition`` キーワードで変更できます。
91 | 10万件目以降のレコードを取得する際にはこのキーワードを利用してください。
92 | 
93 | .. code-block:: python
94 | 
95 |     >>> df = jpd.DataReader("0003280394", 'estat', appid=key, startPosition=100001)
96 | 
97 | オプションの詳細は `e-Stat API 仕様 <https://www.e-stat.go.jp/api/api-info/e-stat-manual>`_ を参照してください。
98 | 


--------------------------------------------------------------------------------
/doc/source/jpdatetime.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | 日時処理
  3 | ========
  4 | 
  5 | 日本語日付のパース
  6 | ------------------
  7 | 
  8 | ``jpd.to_datetime`` で 日本語の日付をパースできます。引数が単一の文字列の場合、結果は ``Timestamp`` に、リストや ``np.array`` の場合は ``DatetimeIndex`` になります。この挙動は ``pd.to_datetime`` と同様です。
  9 | 
 10 | .. code-block:: python
 11 | 
 12 |     >>> import pandas as pd
 13 |     >>> import japandas as jpd
 14 | 
 15 |     >>> jpd.to_datetime(u'2014年11月30日')
 16 |     Timestamp('2014-11-30 00:00:00')
 17 | 
 18 |     >>> jpd.to_datetime([u'2014年11月30日13時25分', u'2014年11月30日14時38分'])
 19 |     <class 'pandas.tseries.index.DatetimeIndex'>
 20 |     [2014-11-30 13:25:00, 2014-11-30 14:38:00]
 21 |     Length: 2, Freq: None, Timezone: None
 22 | 
 23 | 
 24 | 同様に、``jpd.date_range``, ``jpd.period_range`` でも 日本語の日付をパースすることができます。それ以外の挙動は ``pd.date_range``, ``pd.period_range`` と同様です。
 25 | 
 26 | .. code-block:: python
 27 | 
 28 |     >>> jpd.date_range(start=u'2013年12月01日', end=u'2014年12月01日', freq='D')
 29 |     <class 'pandas.tseries.index.DatetimeIndex'>
 30 |     [2013-12-01, ..., 2014-12-01]
 31 |     Length: 366, Freq: D, Timezone: None
 32 | 
 33 |     >>> jpd.period_range(start=u'2013年12月01日', end=u'2014年12月01日', freq='M')
 34 |     <class 'pandas.tseries.period.PeriodIndex'>
 35 |     [2013-12, ..., 2014-12]
 36 |     Length: 13, Freq: M
 37 | 
 38 | 
 39 | 日本の祝日カレンダー
 40 | -----------------
 41 | 
 42 | `japandas` では以下 2 種類のカレンダークラスを定義しています。
 43 | 
 44 | - ``japandas.JapaneseHolidayCalendar``: 1970 年から 2030 年までの日本の祝日を定義したカレンダークラスです。
 45 | - ``japandas.TSEHolidayCalendar``: 1970 年から 2030 年までの東京証券取引所の休業日 (日本の祝日 + 年末年始 12/31 - 1/3) を定義したカレンダークラスです。
 46 | 
 47 | 定義された祝日の一覧は、それぞれ ``Calendar.holidays()`` メソッドで確認することができます。
 48 | 
 49 | .. code-block:: python
 50 | 
 51 |     >>> calendar = jpd.JapaneseHolidayCalendar()
 52 |     >>> calendar.holidays()
 53 |     DatetimeIndex(['1970-01-01', '1970-01-15', '1970-02-11', '1970-03-21',
 54 |                    '1970-04-29', '1970-05-03', '1970-05-05', '1970-09-15',
 55 |                    '1970-09-23', '1970-10-10',
 56 |                    ...
 57 |                    '2030-05-06', '2030-07-15', '2030-08-11', '2030-09-16',
 58 |                    '2030-09-23', '2030-10-14', '2030-11-03', '2030-11-04',
 59 |                    '2030-11-23', '2030-12-23'],
 60 |                   dtype='datetime64[ns]', length=969, freq=None)
 61 | 
 62 |     >>> tse_calendar = jpd.TSEHolidayCalendar()
 63 |     >>> tse_calendar.holidays()
 64 |     DatetimeIndex(['1970-01-01', '1970-01-02', '1970-01-03', '1970-01-15',
 65 |                    '1970-02-11', '1970-03-21', '1970-04-29', '1970-05-03',
 66 |                    '1970-05-05', '1970-09-15',
 67 |                    ...
 68 |                    '2030-07-15', '2030-08-11', '2030-09-16', '2030-09-23',
 69 |                    '2030-10-14', '2030-11-03', '2030-11-04', '2030-11-23',
 70 |                    '2030-12-23', '2030-12-31'],
 71 |                   dtype='datetime64[ns]', length=1144, freq=None)
 72 | 
 73 | 
 74 | このカレンダーと ``pd.offsets.CDay`` クラスを利用すると、カレンダーの定義に従って営業日の計算を行うことができます。
 75 | 
 76 | .. code-block:: python
 77 | 
 78 |     >>> cday = pd.offsets.CDay(calendar=calendar)
 79 | 
 80 |     >>> import datetime
 81 |     >>> datetime.datetime(2014, 4, 28) + cday
 82 |     # 4/29は祝日(昭和の日)
 83 |     Timestamp('2014-04-30 00:00:00')
 84 | 
 85 |     >>> datetime.datetime(2014, 4, 28) - cday
 86 |     # 4/26は土曜日, 4/27は日曜日
 87 |     Timestamp('2014-04-25 00:00:00')
 88 | 
 89 |     >>> datetime.datetime(2014, 5, 3) + cday
 90 |     # 5/4は日曜日, 5/5は祝日(こどもの日), 5/6は祝日(みどりの日/振替休日)
 91 |     Timestamp('2014-05-07 00:00:00')
 92 | 
 93 |     >>> datetime.datetime(2014, 5, 3) - cday
 94 |     # 5/3は土曜日
 95 |     Timestamp('2014-05-02 00:00:00')
 96 | 
 97 | 
 98 | また、カレンダーの定義を条件として ``DataFrame`` や ``Series`` からレコードを抽出することができます。以下の例では、それぞれカレンダー上で営業日となっているレコードの抽出 / 休日となっているレコードの抽出を行っています。
 99 | 
100 | **補足** 対象とするデータは ``DatetimeIndex`` を持っている必要があります。
101 | 
102 | .. code-block:: python
103 | 
104 |     >>> df = pd.DataFrame(np.random.randn(10, 3),
105 |     ...                   index=jpd.date_range(u'2014年5月1日', u'2014年5月10日', freq='D'))
106 |     >>> df
107 |                        0         1         2
108 |     2014-05-01  0.762453 -1.418762 -0.150073
109 |     2014-05-02  0.966500 -0.473888  0.272871
110 |     2014-05-03  0.473370 -1.282504  0.380449
111 |     2014-05-04  0.215411  0.220587 -1.088699
112 |     2014-05-05  0.286348 -1.069165 -1.471871
113 |     2014-05-06 -0.665438 -0.402046 -1.008051
114 |     2014-05-07  1.173935  2.080087 -2.279285
115 |     2014-05-08 -0.957195  0.746798  0.092214
116 |     2014-05-09 -0.259276 -0.775489  0.572525
117 |     2014-05-10 -0.910188  0.294136  0.020730
118 | 
119 |     >>> cday = pd.offsets.CDay(calendar=calendar)
120 |     >>> indexer = jpd.date_range(u'2014年5月1日', u'2014年5月10日', freq=cday)
121 | 
122 |     # カレンダー上 営業日のレコードを抽出
123 |     >>> df.ix[indexer]
124 |                        0         1         2
125 |     2014-05-01  0.762453 -1.418762 -0.150073
126 |     2014-05-02  0.966500 -0.473888  0.272871
127 |     2014-05-07  1.173935  2.080087 -2.279285
128 |     2014-05-08 -0.957195  0.746798  0.092214
129 |     2014-05-09 -0.259276 -0.775489  0.572525
130 | 
131 |     # カレンダー上 休日のレコードを抽出
132 |     >>> df[~df.index.isin(indexer)]
133 |                        0         1         2
134 |     2014-05-03  0.473370 -1.282504  0.380449
135 |     2014-05-04  0.215411  0.220587 -1.088699
136 |     2014-05-05  0.286348 -1.069165 -1.471871
137 |     2014-05-06 -0.665438 -0.402046 -1.008051
138 |     2014-05-10 -0.910188  0.294136  0.020730
139 | 


--------------------------------------------------------------------------------
/doc/source/jpstrings.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | 文字列処理
  3 | ==========
  4 | 
  5 | Unicode 正規化
  6 | --------------
  7 | 
  8 |  **補足** この機能は ``pandas`` 0.16.1 標準にポーティングされました。API は `unicodedata.normalize <http://docs.python.jp/2/library/unicodedata.html#unicodedata.normalize>`_ と同一です。引数 ``form`` を省略することはできなくなるため注意してください。
  9 | 
 10 | ``Series.str.normalize`` は、標準の ``unicodedata.normalize`` と同じ処理を ``Series`` の値に対して行います。
 11 | 
 12 | .. code-block:: python
 13 | 
 14 |     >>> import pandas as pd
 15 |     >>> s = pd.Series([u'ｱｲｳｴｵ', u'ｶｷｸｹｺ', u'ｶﾞｷﾞｸﾞｹﾞｺﾞ', u'ＡＢＣＤＥ'])
 16 |     >>> s
 17 |     0         ｱｲｳｴｵ
 18 |     1         ｶｷｸｹｺ
 19 |     2    ｶﾞｷﾞｸﾞｹﾞｺﾞ
 20 |     3         ＡＢＣＤＥ
 21 |     dtype: object
 22 | 
 23 |     >>> s.str.normalize('NFKC')
 24 |     0    アイウエオ
 25 |     1    カキクケコ
 26 |     2    ガギグゲゴ
 27 |     3    ABCDE
 28 |     dtype: object
 29 | 
 30 | 
 31 | 引数として、``unicodedata.normalize`` と同じフォーマットを渡すことができます。
 32 | 
 33 | - ``NFC``: 正規形 C。
 34 | - ``NFKC``: 正規形 KC。
 35 | - ``NFD``: 正規形 D。
 36 | - ``NFKD``: 正規形 KD。
 37 | 
 38 | .. code-block:: python
 39 | 
 40 |     >>> s.str.normalize('NFD')
 41 |     0         ｱｲｳｴｵ
 42 |     1         ｶｷｸｹｺ
 43 |     2    ｶﾞｷﾞｸﾞｹﾞｺﾞ
 44 |     3         ＡＢＣＤＥ
 45 |     dtype: object
 46 | 
 47 | 
 48 | 全角/半角変換
 49 | -------------
 50 | 
 51 | ``Series.str.z2h`` で値を 全角文字から半角文字へ変換、 ``Series.str.h2z`` で値を 半角文字から全角文字へ変換できます。
 52 | 
 53 | .. code-block:: python
 54 | 
 55 |    >>> s = pd.Series([u'ｱｲｳｴｵ', u'ABC01', u'DE345'])
 56 |    >>> z = s.str.h2z()
 57 |    >>> z
 58 |    0    アイウエオ
 59 |    1    ＡＢＣ０１
 60 |    2    ＤＥ３４５
 61 |    dtype: object
 62 | 
 63 |    >>> z.str.z2h()
 64 |    0    ｱｲｳｴｵ
 65 |    1    ABC01
 66 |    2    DE345
 67 |    dtype: object
 68 | 
 69 | 変換の対象とする文字のグループはキーワードオプションで変更できます。それぞれのキーワードについて対象となる文字列は以下の通りです。デフォルトでは全て ``True`` で、全ての文字が変換されます。変換したくないグループがある場合は 対応するキーワードに ``False`` を指定してください。
 70 | 
 71 | **補足** ``kana`` には日本語の記号 (句読点) も含まれることに注意してください。
 72 | 
 73 | - ``kana``: ``ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノ
 74 |   ハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロワヲンヴー・「」。、``
 75 | - ``alpha``: ``ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz``
 76 | - ``digit``: ``0123456789``
 77 | - ``symbol``: ``!"#$%&'()*+,"-./:;<=>?@[\]^_`~{|}``
 78 | 
 79 | .. code-block:: python
 80 | 
 81 |     >>> s = pd.Series([u'ｱｲｳｴｵ', u'ABC01', u'DE345'])
 82 | 
 83 |     # アルファベットは全角にしない
 84 |     >>> s.str.h2z(alpha=False)
 85 |     0    アイウエオ
 86 |     1    ABC０１
 87 |     2    DE３４５
 88 |     dtype: object
 89 | 
 90 |     # カナ、アルファベットは全角にしない
 91 |     >>> s.str.h2z(kana=False, alpha=False, digit=True)
 92 |     0    ｱｲｳｴｵ
 93 |     1    ABC０１
 94 |     2    DE３４５
 95 |     dtype: object
 96 | 
 97 |     # カナ、アルファベット、数値は全角にしない = 記号以外は半角のまま
 98 |     >>> s.str.h2z(kana=False, alpha=False, digit=False)
 99 |     0    ｱｲｳｴｵ
100 |     1    ABC01
101 |     2    DE345
102 |     dtype: object
103 | 
104 | 


--------------------------------------------------------------------------------
/japandas/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import japandas.core.strings                                                          # noqa
 5 | import japandas.io.data                                                               # noqa
 6 | from japandas.io.data import DataReader                                               # noqa
 7 | from japandas.tseries.tools import to_datetime, date_range, period_range              # noqa
 8 | from japandas.tseries.holiday import JapaneseHolidayCalendar, TSEHolidayCalendar      # noqa
 9 | import japandas.tools.plotting                                                        # noqa
10 | 
11 | from japandas.version import version as __version__                                   # noqa
12 | 


--------------------------------------------------------------------------------
/japandas/compat.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | 
4 | import pandas as pd
5 | from distutils.version import LooseVersion
6 | 
7 | 
8 | PANDAS_VERSION = LooseVersion(pd.__version__)
9 | 


--------------------------------------------------------------------------------
/japandas/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/core/__init__.py


--------------------------------------------------------------------------------
/japandas/core/strings.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | from __future__ import unicode_literals
  5 | 
  6 | from unicodedata import normalize
  7 | 
  8 | from pandas.compat import PY3, iteritems, u_safe
  9 | import pandas.core.strings as strings
 10 | 
 11 | 
 12 | # soundmarks require special handlings
 13 | _HKANA = 'ｧｱｨｲｩｳｪｴｫｵｶｷｸｹｺｻｼｽｾｿﾀﾁｯﾂﾃﾄﾅﾆﾇﾈﾉﾊﾋﾌﾍﾎﾏﾐﾑﾒﾓｬﾔｭﾕｮﾖﾗﾘﾙﾚﾛﾜｦﾝﾞｰ･｢｣｡､'
 14 | _ZALPHA = ('ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺ'
 15 |            'ａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ')
 16 | _ZSYMBOL = '！＂＃＄％＆＇（）＊＋，－．／：；＜＝＞？＠［＼］＾＿｀｛｜｝～　'
 17 | _ZDIGIT = '０１２３４５６７８９'
 18 | 
 19 | # mapping from full-width to half-width
 20 | _KANA_MAPPER = {normalize('NFKC', c): c for c in _HKANA}
 21 | _ALPHA_MAPPER = {c: normalize('NFKC', c) for c in _ZALPHA}
 22 | _DIGIT_MAPPER = {c: normalize('NFKC', c) for c in _ZDIGIT}
 23 | _SYMBOL_MAPPER = {c: normalize('NFKC', c) for c in _ZSYMBOL}
 24 | 
 25 | 
 26 | def _reverse_dict(dict):
 27 |     return {v: k for k, v in iteritems(dict)}
 28 | 
 29 | 
 30 | def _ord_dict(dict):
 31 |     return {ord(k): v for k, v in iteritems(dict)}
 32 | 
 33 | 
 34 | # for unicode.translate
 35 | _Z2H_KANA = _ord_dict(_KANA_MAPPER)
 36 | _Z2H_ALPHA = _ord_dict(_ALPHA_MAPPER)
 37 | _Z2H_DIGIT = _ord_dict(_DIGIT_MAPPER)
 38 | _Z2H_SYMBOL = _ord_dict(_SYMBOL_MAPPER)
 39 | _H2Z_KANA = _ord_dict(_reverse_dict(_KANA_MAPPER))
 40 | _H2Z_ALPHA = _ord_dict(_reverse_dict(_ALPHA_MAPPER))
 41 | _H2Z_DIGIT = _ord_dict(_reverse_dict(_DIGIT_MAPPER))
 42 | _H2Z_SYMBOL = _ord_dict(_reverse_dict(_SYMBOL_MAPPER))
 43 | 
 44 | 
 45 | def _h2z_sm(text):
 46 |     return (text.replace("ｶﾞ", "ガ").replace("ｷﾞ", "ギ").replace("ｸﾞ", "グ").replace("ｹﾞ", "ゲ").
 47 |             replace("ｺﾞ", "ゴ").replace("ｻﾞ", "ザ").replace("ｼﾞ", "ジ").replace("ｽﾞ", "ズ").
 48 |             replace("ｾﾞ", "ゼ").replace("ｿﾞ", "ゾ").replace("ﾀﾞ", "ダ").replace("ﾁﾞ", "ヂ").
 49 |             replace("ﾂﾞ", "ヅ").replace("ﾃﾞ", "デ").replace("ﾄﾞ", "ド").replace("ﾊﾞ", "バ").
 50 |             replace("ﾋﾞ", "ビ").replace("ﾌﾞ", "ブ").replace("ﾍﾞ", "ベ").replace("ﾎﾞ", "ボ").
 51 |             replace("ﾊﾟ", "パ").replace("ﾋﾟ", "ピ").replace("ﾌﾟ", "プ").replace("ﾍﾟ", "ペ").
 52 |             replace("ﾎﾟ", "ポ").replace("ｳﾞ", "ヴ"))
 53 | 
 54 | 
 55 | def _z2h_sm(text):
 56 |     return (text.replace("ガ", "ｶﾞ").replace("ギ", "ｷﾞ").replace("グ", "ｸﾞ").replace("ゲ", "ｹﾞ").
 57 |             replace("ゴ", "ｺﾞ").replace("ザ", "ｻﾞ").replace("ジ", "ｼﾞ").replace("ズ", "ｽﾞ").
 58 |             replace("ゼ", "ｾﾞ").replace("ゾ", "ｿﾞ").replace("ダ", "ﾀﾞ").replace("ヂ", "ﾁﾞ").
 59 |             replace("ヅ", "ﾂﾞ").replace("デ", "ﾃﾞ").replace("ド", "ﾄﾞ").replace("バ", "ﾊﾞ").
 60 |             replace("ビ", "ﾋﾞ").replace("ブ", "ﾌﾞ").replace("ベ", "ﾍﾞ").replace("ボ", "ﾎﾞ").
 61 |             replace("パ", "ﾊﾟ").replace("ピ", "ﾋﾟ").replace("プ", "ﾌﾟ").replace("ペ", "ﾍﾟ").
 62 |             replace("ポ", "ﾎﾟ").replace("ヴ", "ｳﾞ"))
 63 | 
 64 | 
 65 | def str_z2h(self, kana=True, alpha=True, digit=True, symbol=True):
 66 |     mapper = dict()
 67 |     if kana:
 68 |         mapper.update(_Z2H_KANA)
 69 |     if alpha:
 70 |         mapper.update(_Z2H_ALPHA)
 71 |     if digit:
 72 |         mapper.update(_Z2H_DIGIT)
 73 |     if symbol:
 74 |         mapper.update(_Z2H_SYMBOL)
 75 | 
 76 |     if kana:
 77 |         if PY3:
 78 |             def f(x):
 79 |                 return _z2h_sm(x).translate(mapper)
 80 |         else:
 81 |             def f(x):
 82 |                 return _z2h_sm(u_safe(x)).translate(mapper)
 83 |     else:
 84 |         if PY3:
 85 |             def f(x):
 86 |                 return x.translate(mapper)
 87 |         else:
 88 |             def f(x):
 89 |                 return u_safe(x).translate(mapper)
 90 | 
 91 |     try:
 92 |         target = self.series
 93 |     except AttributeError:
 94 |         target = self._data
 95 |     return self._wrap_result(strings._na_map(f, target))
 96 | 
 97 | 
 98 | def str_h2z(self, kana=True, alpha=True, digit=True, symbol=True):
 99 |     mapper = dict()
100 |     if kana:
101 |         mapper.update(_H2Z_KANA)
102 |     if alpha:
103 |         mapper.update(_H2Z_ALPHA)
104 |     if digit:
105 |         mapper.update(_H2Z_DIGIT)
106 |     if symbol:
107 |         mapper.update(_H2Z_SYMBOL)
108 | 
109 |     if kana:
110 |         if PY3:
111 |             def f(x):
112 |                 return _h2z_sm(x).translate(mapper)
113 |         else:
114 |             def f(x):
115 |                 return _h2z_sm(u_safe(x)).translate(mapper)
116 |     else:
117 |         if PY3:
118 |             def f(x):
119 |                 return x.translate(mapper)
120 |         else:
121 |             def f(x):
122 |                 return u_safe(x).translate(mapper)
123 | 
124 |     try:
125 |         target = self.series
126 |     except AttributeError:
127 |         target = self._data
128 |     return self._wrap_result(strings._na_map(f, target))
129 | 
130 | 
131 | # do not overwrite existing func
132 | if not hasattr(strings.StringMethods, 'z2h'):
133 |     strings.StringMethods.z2h = str_z2h
134 | 
135 | 
136 | if not hasattr(strings.StringMethods, 'h2z'):
137 |     strings.StringMethods.h2z = str_h2z
138 | 


--------------------------------------------------------------------------------
/japandas/core/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/core/tests/__init__.py


--------------------------------------------------------------------------------
/japandas/core/tests/test_strings.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # do not import unicode_literals here to test ASCII in Python 2.7
  5 | import unittest
  6 | 
  7 | import pandas as pd
  8 | import pandas.util.testing as tm
  9 | 
 10 | 
 11 | class TestStrings(unittest.TestCase):
 12 | 
 13 |     def setUp(self):
 14 |         self.zhiragana_s = pd.Series([u'ぁあぃいぅうぇえぉお',
 15 |                                       u'かがきぎくぐけげこご',
 16 |                                       u'さざしじすずせぜそぞ',
 17 |                                       u'ただちぢっつづてでとど',
 18 |                                       u'なにぬねの',
 19 |                                       u'はばぱひびぴふぶぷへべぺほぼぽ',
 20 |                                       u'まみむめもゃやゅゆょよ',
 21 |                                       u'らりるれろわをんゎゐゑゕゖゔ'])
 22 | 
 23 |         self.zkatakana_s = pd.Series([u'ァアィイゥウェエォオ',
 24 |                                       u'カガキギクグケゲコゴ',
 25 |                                       u'サザシジスズセゼソゾ',
 26 |                                       u'タダチヂッツヅテデトド',
 27 |                                       u'ナニヌネノ',
 28 |                                       u'ハバパヒビピフブプヘベペホボポ',
 29 |                                       u'マミムメモャヤュユョヨ',
 30 |                                       u'ラリルレロワヲンヮヰヱヵヶヴ',
 31 |                                       u'ー・「」。、'])
 32 | 
 33 |         self.hkatakana_s = pd.Series([u'ｧｱｨｲｩｳｪｴｫｵ',
 34 |                                       u'ｶｶﾞｷｷﾞｸｸﾞｹｹﾞｺｺﾞ',
 35 |                                       u'ｻｻﾞｼｼﾞｽｽﾞｾｾﾞｿｿﾞ',
 36 |                                       u'ﾀﾀﾞﾁﾁﾞｯﾂﾂﾞﾃﾃﾞﾄﾄﾞ',
 37 |                                       u'ﾅﾆﾇﾈﾉ',
 38 |                                       u'ﾊﾊﾞﾊﾟﾋﾋﾞﾋﾟﾌﾌﾞﾌﾟﾍﾍﾞﾍﾟﾎﾎﾞﾎﾟ',
 39 |                                       u'ﾏﾐﾑﾒﾓｬﾔｭﾕｮﾖ',
 40 |                                       u'ﾗﾘﾙﾚﾛﾜｦﾝヮヰヱヵヶｳﾞ',
 41 |                                       u'ｰ･｢｣｡､'])
 42 | 
 43 |         self.zalpha_s = pd.Series([u'ＡＢＣＤＥＦＧＨ',
 44 |                                    u'ＩＪＫＬＭＮＯＰ',
 45 |                                    u'ＱＲＳＴＵＶＷＸＹＺ',
 46 |                                    u'ａｂｃｄｅｆｇｈ',
 47 |                                    u'ｉｊｋｌｍｎｏｐ',
 48 |                                    u'ｑｒｓｔｕｖｗｘｙｚ'])
 49 |         self.halpha_s = pd.Series(['ABCDEFGH',
 50 |                                    'IJKLMNOP',
 51 |                                    'QRSTUVWXYZ',
 52 |                                    'abcdefgh',
 53 |                                    'ijklmnop',
 54 |                                    'qrstuvwxyz'])
 55 | 
 56 |         self.zdigit_s = pd.Series([u'０１２３４', u'５６７８９'])
 57 |         self.hdigit_s = pd.Series(['01234', '56789'])
 58 | 
 59 |         self.zsymbol_s = pd.Series([u'！＂＃＄％＆',
 60 |                                     u'＇（）＊＋，',
 61 |                                     u'－．／：；＜',
 62 |                                     u'＝＞？＠［＼',
 63 |                                     u'］＾＿｀～｛',
 64 |                                     u'｜｝　'])
 65 |         self.hsymbol_s = pd.Series([u'!"#$%&',
 66 |                                     u"'()*+,",
 67 |                                     u'-./:;<',
 68 |                                     u'=>?@[\\',
 69 |                                     u']^_`~{',
 70 |                                     u'|} '])
 71 | 
 72 |     def test_mapper(self):
 73 |         import japandas.core.strings as s
 74 |         self.assertEqual(len(s._KANA_MAPPER), len(s._HKANA))
 75 |         self.assertEqual(len(s._ALPHA_MAPPER), len(s._ZALPHA))
 76 |         self.assertEqual(len(s._DIGIT_MAPPER), len(s._ZDIGIT))
 77 |         self.assertEqual(len(s._SYMBOL_MAPPER), len(s._ZSYMBOL))
 78 | 
 79 |         self.assertEqual(len(s._reverse_dict(s._KANA_MAPPER)), len(s._HKANA))
 80 |         self.assertEqual(len(s._reverse_dict(s._ALPHA_MAPPER)), len(s._ZALPHA))
 81 |         self.assertEqual(len(s._reverse_dict(s._DIGIT_MAPPER)), len(s._ZDIGIT))
 82 |         self.assertEqual(len(s._Z2H_SYMBOL), len(s._H2Z_SYMBOL))
 83 | 
 84 |         self.assertEqual(len(s._Z2H_KANA), len(s._H2Z_KANA))
 85 |         self.assertEqual(len(s._Z2H_ALPHA), len(s._H2Z_ALPHA))
 86 |         self.assertEqual(len(s._Z2H_DIGIT), len(s._H2Z_DIGIT))
 87 |         self.assertEqual(len(s._reverse_dict(s._SYMBOL_MAPPER)), len(s._ZSYMBOL))
 88 | 
 89 |     def test_z2h(self):
 90 |         s = pd.Series([u'ａａａ', 'bbb', u'アアア', u'１', u'＊'])
 91 |         result = s.str.z2h()
 92 |         expected = pd.Series(['aaa', 'bbb', u'ｱｱｱ', '1', '*'])
 93 |         tm.assert_series_equal(result, expected)
 94 | 
 95 |         # full-width kana to half-width kana
 96 |         result = self.zkatakana_s.str.z2h(kana=True, alpha=False, digit=False, symbol=False)
 97 |         tm.assert_series_equal(result, self.hkatakana_s)
 98 |         result = self.zkatakana_s.str.z2h(kana=False, alpha=True, digit=False, symbol=False)
 99 |         tm.assert_series_equal(result, self.zkatakana_s)
100 |         result = self.zkatakana_s.str.z2h(kana=False, alpha=False, digit=True, symbol=False)
101 |         tm.assert_series_equal(result, self.zkatakana_s)
102 |         result = self.zkatakana_s.str.z2h(kana=False, alpha=False, digit=False, symbol=True)
103 |         tm.assert_series_equal(result, self.zkatakana_s)
104 | 
105 |         # full-width kana to half-width alpha
106 |         result = self.zalpha_s.str.z2h(kana=True, alpha=False, digit=False, symbol=False)
107 |         tm.assert_series_equal(result, self.zalpha_s)
108 |         result = self.zalpha_s.str.z2h(kana=False, alpha=True, digit=False, symbol=False)
109 |         tm.assert_series_equal(result, self.halpha_s)
110 |         result = self.zalpha_s.str.z2h(kana=False, alpha=False, digit=True, symbol=False)
111 |         tm.assert_series_equal(result, self.zalpha_s)
112 |         result = self.zalpha_s.str.z2h(kana=False, alpha=False, digit=False, symbol=True)
113 |         tm.assert_series_equal(result, self.zalpha_s)
114 | 
115 |         # full-width kana to half-width digit
116 |         result = self.zdigit_s.str.z2h(kana=True, alpha=False, digit=False, symbol=False)
117 |         tm.assert_series_equal(result, self.zdigit_s)
118 |         result = self.zdigit_s.str.z2h(kana=False, alpha=True, digit=False, symbol=False)
119 |         tm.assert_series_equal(result, self.zdigit_s)
120 |         result = self.zdigit_s.str.z2h(kana=False, alpha=False, digit=True, symbol=False)
121 |         tm.assert_series_equal(result, self.hdigit_s)
122 |         result = self.zdigit_s.str.z2h(kana=False, alpha=False, digit=False, symbol=True)
123 |         tm.assert_series_equal(result, self.zdigit_s)
124 | 
125 |         # full-width kana to half-width symbol
126 |         result = self.zsymbol_s.str.z2h(kana=True, alpha=False, digit=False, symbol=False)
127 |         tm.assert_series_equal(result, self.zsymbol_s)
128 |         result = self.zsymbol_s.str.z2h(kana=False, alpha=True, digit=False, symbol=False)
129 |         tm.assert_series_equal(result, self.zsymbol_s)
130 |         result = self.zsymbol_s.str.z2h(kana=False, alpha=False, digit=True, symbol=False)
131 |         tm.assert_series_equal(result, self.zsymbol_s)
132 |         result = self.zsymbol_s.str.z2h(kana=False, alpha=False, digit=False, symbol=True)
133 |         tm.assert_series_equal(result, self.hsymbol_s)
134 | 
135 |         # half-width to half-width
136 |         result = self.hkatakana_s.str.z2h()
137 |         tm.assert_series_equal(result, self.hkatakana_s)
138 |         result = self.halpha_s.str.z2h()
139 |         tm.assert_series_equal(result, self.halpha_s)
140 |         result = self.hdigit_s.str.z2h()
141 |         tm.assert_series_equal(result, self.hdigit_s)
142 |         result = self.hsymbol_s.str.z2h()
143 |         tm.assert_series_equal(result, self.hsymbol_s)
144 | 
145 |     def test_h2z(self):
146 |         s = pd.Series(['aaa', 'bbb', u'ｱｱｱ', u'１', '*'])
147 |         result = s.str.h2z()
148 |         expected = pd.Series([u'ａａａ', u'ｂｂｂ', u'アアア', u'１', u'＊'])
149 |         tm.assert_series_equal(result, expected)
150 | 
151 |         # half-width kana to full-width kana
152 |         result = self.hkatakana_s.str.h2z(kana=True, alpha=False, digit=False, symbol=False)
153 |         tm.assert_series_equal(result, self.zkatakana_s)
154 |         result = self.hkatakana_s.str.h2z(kana=False, alpha=True, digit=False, symbol=False)
155 |         tm.assert_series_equal(result, self.hkatakana_s)
156 |         result = self.hkatakana_s.str.h2z(kana=False, alpha=False, digit=True, symbol=False)
157 |         tm.assert_series_equal(result, self.hkatakana_s)
158 |         result = self.hkatakana_s.str.h2z(kana=False, alpha=False, digit=False, symbol=True)
159 |         tm.assert_series_equal(result, self.hkatakana_s)
160 | 
161 |         # half-width kana to full-width alpha
162 |         result = self.halpha_s.str.h2z(kana=True, alpha=False, digit=False, symbol=False)
163 |         tm.assert_series_equal(result, self.halpha_s)
164 |         result = self.halpha_s.str.h2z(kana=False, alpha=True, digit=False, symbol=False)
165 |         tm.assert_series_equal(result, self.zalpha_s)
166 |         result = self.halpha_s.str.h2z(kana=False, alpha=False, digit=True, symbol=False)
167 |         tm.assert_series_equal(result, self.halpha_s)
168 |         result = self.halpha_s.str.h2z(kana=False, alpha=False, digit=False, symbol=True)
169 |         tm.assert_series_equal(result, self.halpha_s)
170 | 
171 |         # half-width kana to full-width digit
172 |         result = self.hdigit_s.str.h2z(kana=True, alpha=False, digit=False, symbol=False)
173 |         tm.assert_series_equal(result, self.hdigit_s)
174 |         result = self.hdigit_s.str.h2z(kana=False, alpha=True, digit=False, symbol=False)
175 |         tm.assert_series_equal(result, self.hdigit_s)
176 |         result = self.hdigit_s.str.h2z(kana=False, alpha=False, digit=True, symbol=False)
177 |         tm.assert_series_equal(result, self.zdigit_s)
178 |         result = self.hdigit_s.str.h2z(kana=False, alpha=False, digit=False, symbol=True)
179 |         tm.assert_series_equal(result, self.hdigit_s)
180 | 
181 |         # half-width kana to full-width symbol
182 |         result = self.hsymbol_s.str.h2z(kana=True, alpha=False, digit=False, symbol=False)
183 |         tm.assert_series_equal(result, self.hsymbol_s)
184 |         result = self.hsymbol_s.str.h2z(kana=False, alpha=True, digit=False, symbol=False)
185 |         tm.assert_series_equal(result, self.hsymbol_s)
186 |         result = self.hsymbol_s.str.h2z(kana=False, alpha=False, digit=True, symbol=False)
187 |         tm.assert_series_equal(result, self.hsymbol_s)
188 |         result = self.hsymbol_s.str.h2z(kana=False, alpha=False, digit=False, symbol=True)
189 |         tm.assert_series_equal(result, self.zsymbol_s)
190 | 
191 |         # full-width to full-width
192 |         result = self.zkatakana_s.str.h2z()
193 |         tm.assert_series_equal(result, self.zkatakana_s)
194 |         result = self.zalpha_s.str.h2z()
195 |         tm.assert_series_equal(result, self.zalpha_s)
196 |         result = self.zdigit_s.str.h2z()
197 |         tm.assert_series_equal(result, self.zdigit_s)
198 |         result = self.zkatakana_s.str.h2z()
199 |         tm.assert_series_equal(result, self.zkatakana_s)
200 | 
201 |     def test_z2h_obj(self):
202 |         s = pd.Series(['aaa', None, u'アアア', u'あああ', u'１', 3])
203 |         result = s.str.z2h()
204 |         expected = pd.Series(['aaa', None, u'ｱｱｱ', u'あああ', '1', None])
205 |         tm.assert_series_equal(result, expected)
206 | 
207 |         empty_str = pd.Series(dtype=str)
208 |         tm.assert_series_equal(empty_str.str.h2z(), empty_str)
209 | 
210 |     def test_h2z_obj(self):
211 |         s = pd.Series(['aaa', None, u'ｱｱｱ', u'あああ', u'１', 3])
212 |         result = s.str.h2z()
213 |         expected = pd.Series([u'ａａａ', None, u'アアア', u'あああ', u'１', None])
214 |         tm.assert_series_equal(result, expected)
215 | 
216 |         empty_str = pd.Series(dtype=str)
217 |         tm.assert_series_equal(empty_str.str.h2z(), empty_str)
218 | 
219 |     def test_normalize(self):
220 |         s = pd.Series([u'ａａａ', 'bbb', u'ｱｱｱ', u'１', u'＊'])
221 |         result = s.str.normalize('NFKC')
222 |         expected = pd.Series(['aaa', 'bbb', u'アアア', '1', '*'])
223 |         tm.assert_series_equal(result, expected)
224 | 
225 |         s = pd.Series([u'ａａａ', None, 'bbb', u'ｱｱｱ', u'１', 5, u'＊'])
226 |         result = s.str.normalize('NFKC')
227 |         expected = pd.Series(['aaa', None, 'bbb', u'アアア', '1', None, '*'])
228 |         tm.assert_series_equal(result, expected)
229 | 
230 |         empty_str = pd.Series(dtype=str)
231 |         tm.assert_series_equal(empty_str.str.normalize('NFKC'), empty_str)
232 | 
233 |     def test_normalize_format(self):
234 |         import unicodedata
235 |         values = [u'ｱｲｳｴｵ', u'ｶｷｸｹｺ', u'ｶﾞｷﾞｸﾞｹﾞｺﾞ', u'ＡＢＣＤＥ']
236 |         for format in ['NFD', 'NFC', 'NFKD', 'NFKC']:
237 |             result = pd.Series(values).str.normalize(format).tolist()
238 |             expected = [unicodedata.normalize(format, v) for v in values]
239 |             self.assertEqual(result, expected)
240 | 
241 | 
242 | if __name__ == '__main__':
243 |     import nose
244 |     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
245 |                    exit=False)
246 | 


--------------------------------------------------------------------------------
/japandas/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/io/__init__.py


--------------------------------------------------------------------------------
/japandas/io/data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from __future__ import unicode_literals
 5 | 
 6 | from japandas.io.estat import EStatReader
 7 | 
 8 | from pandas_datareader import data
 9 | 
10 | 
11 | _ohlc_columns_jp = ['始値', '高値', '安値', '終値', '出来高', '調整後終値*']
12 | _ohlc_columns_en = ['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']
13 | 
14 | 
15 | def DataReader(symbols, data_source=None, start=None, end=None, appid=None, **kwargs):
16 |     if data_source == 'yahoojp':
17 |         msg = "YahooJPReaderは削除されました https://www.yahoo-help.jp/app/answers/detail/p/546/a_id/93575"
18 |         raise NotImplementedError(msg)
19 |     elif data_source == 'estat':
20 |         return EStatReader(symbols=symbols, appid=appid, **kwargs).read()
21 |     else:
22 |         return data.DataReader(name=symbols, data_source=data_source,
23 |                                start=start, end=end, **kwargs)
24 | 
25 | 
26 | DataReader.__doc__ = data.DataReader.__doc__
27 | 


--------------------------------------------------------------------------------
/japandas/io/estat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | from __future__ import unicode_literals
  5 | 
  6 | import xml.etree.ElementTree as ET
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | 
 11 | from pandas_datareader.base import _BaseReader
 12 | from japandas.tseries.tools import to_datetime
 13 | 
 14 | # http://www.e-stat.go.jp/api/e-stat-manual/
 15 | 
 16 | METADATA_MAPPER = {
 17 |     # 'TABLE_INF': '統計表ID',
 18 |     'STAT_NAME': '政府統計名',
 19 |     'GOV_ORG': '作成機関名',
 20 |     'STATISTICS_NAME': '提供統計名及び提供分類名',
 21 |     'TITLE': '統計表題名及び表番号',
 22 |     'CYCLE': '提供周期',
 23 |     'SURVEY_DATE': '調査年月',
 24 |     'OPEN_DATE': '公開日',
 25 |     'SMALL_AREA': '小地域属性フラグ',
 26 |     'MAIN_CATEGORY': '統計大分野名',
 27 |     'SUB_CATEGORY': '統計小分野名',
 28 |     'OVERALL_TOTAL_NUMBER': '総件数',
 29 |     'UPDATED_DATE': '最終更新日',
 30 |     'id': '統計表ID'
 31 | }
 32 | 
 33 | 
 34 | class EStatReader(_BaseReader):
 35 | 
 36 |     def __init__(self, symbols=None, appid=None,
 37 |                  limit=None, startPosition=None, **kwargs):
 38 |         if isinstance(symbols, pd.DataFrame):
 39 |             if '統計表ID' in symbols.columns:
 40 |                 symbols = symbols.loc[:, '統計表ID']
 41 |             else:
 42 |                 raise ValueError('DataFrame 中に "統計表ID" カラムがありません')
 43 | 
 44 |         super(EStatReader, self).__init__(symbols=symbols, **kwargs)
 45 | 
 46 |         if appid is None:
 47 |             raise ValueError('アプリケーションID "appid" を文字列で指定してください')
 48 |         self.appid = appid
 49 | 
 50 |         # e-Stat attrs
 51 |         self.limit = limit
 52 |         self.startPosition = startPosition
 53 | 
 54 |     @property
 55 |     def url(self):
 56 |         return 'http://api.e-stat.go.jp/rest/2.0/app/getStatsData'
 57 | 
 58 |     @property
 59 |     def params(self):
 60 |         params = {'appId': self.appid, 'lang': 'J'}
 61 | 
 62 |         for attr in ['limit', 'startPosition']:
 63 |             value = getattr(self, attr, None)
 64 |             if value is not None:
 65 |                 params[attr] = value
 66 |         return params
 67 | 
 68 |     def read(self):
 69 |         """ read data """
 70 |         if isinstance(self.symbols, pd.compat.string_types):
 71 |             if len(self.symbols) == 8:
 72 |                 return self.get_estat_list()
 73 | 
 74 |             params = self.params
 75 |             params['statsDataId'] = self.symbols
 76 |             return self._read_one_data(self.url, params)
 77 | 
 78 |         elif pd.api.types.is_list_like(self.symbols):
 79 |             dfs = []
 80 |             for symbol in self.symbols:
 81 |                 params = self.params
 82 |                 params['statsDataId'] = symbol
 83 |                 df = self._read_one_data(self.url, params)
 84 |                 dfs.append(df)
 85 | 
 86 |             if len(dfs) == 0:
 87 |                 raise ValueError('取得するIDがありません')
 88 |             elif len(dfs) == 1:
 89 |                 return dfs[0]
 90 |             else:
 91 |                 return dfs[0].append(dfs[1:])
 92 |         else:
 93 |             raise ValueError('IDは文字列もしくはそのリストで指定してください')
 94 | 
 95 |     def _read_lines(self, out):
 96 |         root = ET.fromstring(out.getvalue())
 97 |         # retrieve class
 98 |         class_names = {}   # mapping from class id to name
 99 |         class_codes = {}   # mapping from class id to codes
100 |         for c in root.findall('.//CLASS_OBJ'):
101 |             class_id = c.attrib['id']
102 |             class_names[class_id] = c.attrib['name']
103 | 
104 |             mapper = {}
105 |             for code in c.findall('CLASS'):
106 |                 mapper[code.attrib['code']] = code.attrib['name']
107 |             class_codes[class_id] = mapper
108 | 
109 |         # retrieve values
110 |         values = []
111 |         for value in root.findall('.//VALUE'):
112 |             row = {}
113 |             for cat in class_codes:
114 |                 name = class_names[cat]
115 |                 code = value.attrib[cat]
116 |                 row[name] = class_codes[cat][code]
117 | 
118 |             if value.text in ('-', ):
119 |                 # avoid to_numeric fails
120 |                 row['value'] = np.nan
121 |             else:
122 |                 row['value'] = value.text
123 |             values.append(row)
124 | 
125 |         df = pd.DataFrame(values)
126 |         df.loc[:, 'value'] = pd.to_numeric(df['value'], errors='ignore')
127 | 
128 |         if 'time' in class_names:
129 |             df = df.set_index(class_names['time'])
130 |             df.index = to_datetime(df.index)
131 |         return df
132 | 
133 |     def get_estat_list(self):
134 |         url = 'http://api.e-stat.go.jp/rest/2.0/app/getStatsList'
135 |         params = {'appId': self.appid, 'lang': 'J', 'statsCode': self.symbols}
136 | 
137 |         out = self._read_url_as_StringIO(url, params=params)
138 |         root = ET.fromstring(out.getvalue())
139 | 
140 |         values = []
141 |         columns = []
142 |         for table in root.findall('.//TABLE_INF'):
143 |             columns = ['統計表ID']
144 |             row = {'統計表ID': table.get('id')}
145 |             for elem in table.iter():
146 |                 if elem.tag == 'TABLE_INF':
147 |                     continue
148 | 
149 |                 if elem.tag in ('UPDATED_DATE', 'OPEN_DATE'):
150 |                     val = pd.to_datetime(elem.text)
151 |                 elif elem.tag == 'SURVEY_DATE':
152 |                     # Almost impossible to parse SURVEY_DATE as Timestamp...
153 |                     val = elem.text
154 |                 elif elem.tag == 'OVERALL_TOTAL_NUMVER':
155 |                     val = pd.to_numeric(elem.text)
156 |                 else:
157 |                     val = elem.text
158 |                 label = METADATA_MAPPER.get(elem.tag, elem.tag)
159 |                 columns.append(label)
160 |                 row[label] = val
161 |             values.append(row)
162 | 
163 |         if len(values) == 0:
164 | 
165 |             try:
166 |                 # if msg can be extracted from XML, raise it
167 |                 root = ET.fromstring(out.getvalue())
168 |                 msg = root.find('RESULT').find('ERROR_MSG').text
169 |             except Exception:
170 |                 # otherwie, raise all XML content
171 |                 raise ValueError(out.getvalue())
172 |             raise ValueError(msg.encode('utf-8', 'replace'))
173 | 
174 |         df = pd.DataFrame(values, columns=columns)
175 |         return df
176 | 


--------------------------------------------------------------------------------
/japandas/io/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/io/tests/__init__.py


--------------------------------------------------------------------------------
/japandas/io/tests/test_estat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | from __future__ import unicode_literals
  5 | 
  6 | import os
  7 | import unittest
  8 | 
  9 | import numpy as np
 10 | import pandas as pd
 11 | import pandas.util.testing as tm
 12 | import japandas as jpd
 13 | 
 14 | 
 15 | class TestEstat(unittest.TestCase):
 16 | 
 17 |     def test_data_estat_error(self):
 18 |         with self.assertRaises(ValueError):
 19 |             # no app ID
 20 |             jpd.DataReader('00200521', 'estat', appid=None)
 21 | 
 22 |         ESTAT_KEY = os.environ['ESTAT']
 23 | 
 24 |         with self.assertRaises(ValueError):
 25 |             # blank list
 26 |             jpd.DataReader([], 'estat', appid=ESTAT_KEY)
 27 | 
 28 |         with self.assertRaises(ValueError):
 29 |             # invalid type
 30 |             jpd.DataReader(1, 'estat', appid=ESTAT_KEY)
 31 | 
 32 |     def test_data_estat_list(self):
 33 | 
 34 |         ESTAT_KEY = os.environ['ESTAT']
 35 |         df = jpd.DataReader('00200521', 'estat', appid=ESTAT_KEY)
 36 | 
 37 |         exp_columns = pd.Index(['統計表ID', '政府統計名',
 38 |                                 '作成機関名', '提供統計名及び提供分類名',
 39 |                                 '統計表題名及び表番号', '提供周期', '調査年月',
 40 |                                 '公開日', '小地域属性フラグ', '統計大分野名',
 41 |                                 '統計小分野名', '総件数', '最終更新日'],)
 42 |         tm.assert_index_equal(df.columns, exp_columns)
 43 | 
 44 |         target = df.head(n=3)
 45 |         df = jpd.DataReader(target, 'estat', appid=ESTAT_KEY)
 46 |         self.assertIsInstance(df, pd.DataFrame)
 47 | 
 48 |         df = jpd.DataReader('00200523', 'estat', appid=ESTAT_KEY)
 49 |         tm.assert_index_equal(df.columns, exp_columns)
 50 | 
 51 |     def test_data_estat_list_all(self):
 52 |         # 以下 すべての提供データをテスト
 53 |         # http://www.e-stat.go.jp/api/api-data/
 54 | 
 55 |         targets = ['00200521', '00200522', '00200523', '00200524', '00200531',
 56 |                    '00200532', '00200533', '00200541', '00200543', '00200544',
 57 |                    '00200545', '00200551', '00200552', '00200553', '00200561',
 58 |                    '00200563', '00200564', '00200565', '00200566', '00200571',
 59 |                    '00200572', '00200573',    # '00200511', '00200502', (no data found)
 60 |                    '00250011']
 61 |         for target in targets:
 62 |             self._assert_target(target)
 63 | 
 64 |     def test_data_estat_list_all2(self):
 65 |         # Travis CI でのタイムアウトを防ぐため分割
 66 |         targets = ['00350600', '00350620', '00351000', '00400001', '00400002',
 67 |                    '00400003', '00400004', '00400202', '00450011', '00450012',
 68 |                    '00450021', '00450022', '00450061', '00450071', '00450091',
 69 |                    '00450151', '00500201', '00500209', '00500215', '00500216',
 70 |                    '00500217', '00500225', '00550010', '00550020', '00550030',
 71 |                    '00550040', '00550100', '00550200', '00550210', '00551020',
 72 |                    '00551130', '00600330', '00600470', '00600480']
 73 |         for target in targets:
 74 |             self._assert_target(target)
 75 | 
 76 |     def _assert_target(self, target):
 77 |         ESTAT_KEY = os.environ['ESTAT']
 78 | 
 79 |         df = jpd.DataReader(target, 'estat', appid=ESTAT_KEY)
 80 |         exp_columns = pd.Index(['統計表ID', '政府統計名',
 81 |                                 '作成機関名', '提供統計名及び提供分類名',
 82 |                                 '統計表題名及び表番号', '提供周期', '調査年月',
 83 |                                 '公開日', '小地域属性フラグ', '統計大分野名',
 84 |                                 '統計小分野名', '総件数', '最終更新日'],)
 85 |         tm.assert_index_equal(df.columns, exp_columns)
 86 | 
 87 |         target = df.head(n=3)
 88 |         df = jpd.DataReader(target, 'estat', appid=ESTAT_KEY)
 89 |         self.assertIsInstance(df, pd.DataFrame)
 90 | 
 91 |     def test_data_estat_data(self):
 92 | 
 93 |         ESTAT_KEY = os.environ['ESTAT']
 94 |         df = jpd.DataReader('0000030001', 'estat', appid=ESTAT_KEY)
 95 | 
 96 |         exp = pd.DataFrame({'value': [117060396, 89187409, 27872987, 5575989, 1523907],
 97 |                             '全国都道府県030001': ['全国', '全国市部', '全国郡部', '北海道', '青森県'],
 98 |                             '全域・集中の別030002': ['全域'] * 5,
 99 |                             '年齢５歳階級Ａ030002': ['総数'] * 5,
100 |                             '男女Ａ030001': ['男女総数'] * 5},
101 |                            index=pd.DatetimeIndex(['1980-01-01'] * 5, name='時間軸(年次)'))
102 |         tm.assert_frame_equal(df.head(), exp)
103 | 
104 |         df = jpd.DataReader(['0000030001', '0000030002'], 'estat', appid=ESTAT_KEY)
105 |         self.assertIsInstance(df, pd.DataFrame)
106 | 
107 |         df = jpd.DataReader("0002180001", 'estat', appid=ESTAT_KEY)
108 |         exp = pd.DataFrame({'value': [445007, 194243, 199623, 203464, 190711],
109 |                             '全国・都道府県・大都市': ['全国'] * 5,
110 |                             '性別': ['総数'] * 5,
111 |                             '表章項目': ['都道府県（自都市）内移動者数'] * 5},
112 |                            index=pd.DatetimeIndex(['2009-03-01', '2009-02-01', '2009-01-01',
113 |                                                    '2008-12-01', '2008-11-01'], name='時間軸（月次）'))
114 |         tm.assert_frame_equal(df.head(), exp)
115 | 
116 |     def test_data_estat_data_numeric(self):
117 |         ESTAT_KEY = os.environ['ESTAT']
118 |         df = jpd.DataReader('0003109612', 'estat', appid=ESTAT_KEY)
119 |         self.assertEqual(df['value'].dtype, np.float64)
120 | 
121 |     def test_data_limit(self):
122 |         ESTAT_KEY = os.environ['ESTAT']
123 |         df = jpd.DataReader('0003280394', 'estat', appid=ESTAT_KEY)
124 |         assert len(df) == 100000
125 |         self.assertEqual(df['value'].dtype, np.float64)
126 | 
127 |         df = jpd.DataReader('0003280394', 'estat', appid=ESTAT_KEY,
128 |                             limit=20)
129 |         assert len(df) == 20
130 |         self.assertEqual(df['value'].dtype, np.float64)
131 | 
132 |     def test_data_position(self):
133 |         ESTAT_KEY = os.environ['ESTAT']
134 |         df = jpd.DataReader('0003280394', 'estat', appid=ESTAT_KEY, limit=100)
135 |         assert len(df) == 100
136 |         self.assertEqual(df['value'].dtype, np.float64)
137 | 
138 |         df2 = jpd.DataReader('0003280394', 'estat', appid=ESTAT_KEY,
139 |                              startPosition=11, limit=90)
140 |         tm.assert_frame_equal(df.iloc[10:], df2)
141 |         self.assertEqual(df2['value'].dtype, np.float64)
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     import nose
146 |     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
147 |                    exit=False)
148 | 


--------------------------------------------------------------------------------
/japandas/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tools/__init__.py


--------------------------------------------------------------------------------
/japandas/tools/plotting.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from __future__ import unicode_literals
 5 | 
 6 | import pandas as pd
 7 | try:
 8 |     import pandas.plotting._core as plotting
 9 | except ImportError:
10 |     import pandas.tools.plotting as plotting
11 | 
12 | from japandas.io.data import _ohlc_columns_jp, _ohlc_columns_en
13 | 
14 | 
15 | class OhlcPlot(plotting.LinePlot):
16 |     ohlc_cols = pd.Index(['open', 'high', 'low', 'close'])
17 |     reader_cols_en = pd.Index(_ohlc_columns_en)
18 |     reader_cols_jp = pd.Index(_ohlc_columns_jp)
19 | 
20 |     def __init__(self, data, **kwargs):
21 |         data = data.copy()
22 |         self.freq = kwargs.pop('freq', 'B')
23 | 
24 |         if isinstance(data, pd.Series):
25 |             data = data.resample(self.freq).ohlc()
26 |         assert isinstance(data, pd.DataFrame)
27 |         assert isinstance(data.index, pd.DatetimeIndex)
28 | 
29 |         if data.columns.equals(self.ohlc_cols):
30 |             data.columns = [c.title() for c in data.columns]
31 |         elif data.columns.equals(self.reader_cols_jp):
32 |             data.columns = self.reader_cols_en
33 |         elif data.columns.equals(self.reader_cols_en):
34 |             pass
35 |         else:
36 | 
37 |             raise ValueError('data is not ohlc-like:')
38 |         data = data[['Open', 'Close', 'High', 'Low']]
39 |         plotting.LinePlot.__init__(self, data, **kwargs)
40 | 
41 |     def _get_plot_function(self):
42 |         try:
43 |             from mpl_finance import candlestick_ohlc
44 |         except ImportError as e:
45 |             try:
46 |                 from matplotlib.finance import candlestick_ohlc
47 |             except ImportError:
48 |                 raise ImportError(e)
49 | 
50 |         def _plot(data, ax, **kwds):
51 |             candles = candlestick_ohlc(ax, data.values, **kwds)
52 |             return candles
53 | 
54 |         return _plot
55 | 
56 |     def _make_plot(self):
57 |         try:
58 |             from pandas.plotting._timeseries import (_decorate_axes,
59 |                                                      format_dateaxis)
60 |         except ImportError:
61 |             from pandas.tseries.plotting import _decorate_axes, format_dateaxis
62 |         plotf = self._get_plot_function()
63 |         ax = self._get_ax(0)
64 | 
65 |         data = self.data
66 |         data.index.name = 'Date'
67 |         data = data.to_period(freq=self.freq)
68 |         index = data.index
69 |         data = data.reset_index(level=0)
70 | 
71 |         if self._is_ts_plot():
72 |             data['Date'] = data['Date'].apply(lambda x: x.ordinal)
73 |             _decorate_axes(ax, self.freq, self.kwds)
74 |             candles = plotf(data, ax, **self.kwds)
75 |             format_dateaxis(ax, self.freq, index)
76 |         else:
77 |             from matplotlib.dates import date2num, AutoDateFormatter, AutoDateLocator
78 | 
79 |             data['Date'] = data['Date'].apply(lambda x: date2num(x.to_timestamp()))
80 |             candles = plotf(data, ax, **self.kwds)
81 | 
82 |             locator = AutoDateLocator()
83 |             ax.xaxis.set_major_locator(locator)
84 |             ax.xaxis.set_major_formatter(AutoDateFormatter(locator))
85 | 
86 |         return candles
87 | 
88 | 
89 | if 'ohlc' not in plotting._plot_klass:
90 |     plotting._all_kinds.append('ohlc')
91 |     plotting._common_kinds.append('ohlc')
92 |     plotting._plot_klass['ohlc'] = OhlcPlot
93 | 


--------------------------------------------------------------------------------
/japandas/tools/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tools/tests/__init__.py


--------------------------------------------------------------------------------
/japandas/tools/tests/test_plotting.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from __future__ import unicode_literals
 5 | 
 6 | import numpy as np
 7 | import pandas as pd
 8 | 
 9 | from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
10 | 
11 | 
12 | class TestTools(TestPlotBase):
13 | 
14 |     def test_to_ohlc(self):
15 |         n = 50
16 |         idx = pd.date_range(start='2014-10-01 09:00', freq='H', periods=n)
17 |         s = pd.Series(np.random.randn(n), index=idx)
18 |         _check_plot_works(s.plot, kind='ohlc')
19 |         _check_plot_works(s.plot, kind='ohlc', x_compat=True)
20 | 
21 |         ohlc = s.resample('B').ohlc()
22 |         _check_plot_works(ohlc.plot, kind='ohlc')
23 |         _check_plot_works(ohlc.plot, kind='ohlc', x_compat=True)
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     import nose
28 |     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
29 |                    exit=False)
30 | 


--------------------------------------------------------------------------------
/japandas/tseries/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tseries/__init__.py


--------------------------------------------------------------------------------
/japandas/tseries/data/holidays.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tseries/data/holidays.pkl


--------------------------------------------------------------------------------
/japandas/tseries/data/tseholidays.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tseries/data/tseholidays.pkl


--------------------------------------------------------------------------------
/japandas/tseries/holiday.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from __future__ import unicode_literals
 5 | 
 6 | import datetime
 7 | import os
 8 | 
 9 | import pandas.compat as compat
10 | import pandas.tseries.holiday as holiday
11 | 
12 | current_dir = os.path.dirname(__file__)
13 | data_path = os.path.join(current_dir, 'data', 'holidays.pkl')
14 | tse_data_path = os.path.join(current_dir, 'data', 'tseholidays.pkl')
15 | 
16 | 
17 | def _read_rules(path):
18 |     if os.path.exists(path):
19 |         with open(path, mode='rb') as f:
20 |             rules = compat.cPickle.load(f)
21 |     elif __name__ != '__main__':
22 |         raise ImportError("Unable to load '{0}'".format(path))
23 |     else:
24 |         rules = None
25 |     return rules
26 | 
27 | 
28 | rules = _read_rules(data_path)
29 | tse_rules = _read_rules(tse_data_path)
30 | 
31 | 
32 | class JapaneseHolidayCalendar(holiday.AbstractHolidayCalendar):
33 |     rules = rules
34 | 
35 | 
36 | class TSEHolidayCalendar(holiday.AbstractHolidayCalendar):
37 |     rules = tse_rules
38 | 
39 | 
40 | # register to pandas factory
41 | holiday.register(JapaneseHolidayCalendar)
42 | holiday.register(TSEHolidayCalendar)
43 | 
44 | 
45 | if __name__ == '__main__':
46 | 
47 |     # Procedure
48 |     # cd japandas/tseries
49 |     # Open https://github.com/holiday-jp/holiday_jp
50 |     # Download holidays.yaml to data directory
51 |     # python holiday.py
52 | 
53 |     import yaml
54 | 
55 |     def to_pickle(dates, path):
56 |         rules = []
57 |         keys = sorted(compat.iterkeys(dates))
58 |         for dt in keys:
59 |             name = dates[dt]
60 |             h = holiday.Holiday(
61 |                 name, dt.year, month=dt.month, day=dt.day)
62 |             rules.append(h)
63 | 
64 |         with open(path, mode='wb') as w:
65 |             compat.cPickle.dump(rules, w, protocol=2)
66 |             print('pickled {0} data'.format(len(dates)))
67 | 
68 |     with open(os.path.join('data', 'holidays.yml'), mode='rb') as f:
69 |         data = yaml.load(f)
70 |     # JapaneseHolidayCalendar
71 |     to_pickle(data, data_path)
72 | 
73 |     tse_data = data.copy()
74 |     for y in range(1970, 2031):
75 |         for m, d in [(1, 1), (1, 2), (1, 3), (12, 31)]:
76 |             dt = datetime.date(y, m, d)
77 |             if dt not in tse_data:
78 |                 tse_data[dt] = {'name': '年末年始休業日', 'date': dt}
79 | 
80 |     # TSEHolidayCalendar
81 |     to_pickle(tse_data, tse_data_path)
82 | 


--------------------------------------------------------------------------------
/japandas/tseries/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tseries/tests/__init__.py


--------------------------------------------------------------------------------
/japandas/tseries/tests/test_holiday.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | from __future__ import unicode_literals
  5 | 
  6 | import datetime
  7 | import unittest
  8 | 
  9 | import pandas as pd
 10 | 
 11 | import japandas as jpd
 12 | 
 13 | 
 14 | class TestCalendar(unittest.TestCase):
 15 | 
 16 |     def setUp(self):
 17 |         self.expected = [datetime.datetime(2014, 1, 1, 0, 0),
 18 |                          datetime.datetime(2014, 1, 13, 0, 0),
 19 |                          datetime.datetime(2014, 2, 11, 0, 0),
 20 |                          datetime.datetime(2014, 3, 21, 0, 0),
 21 |                          datetime.datetime(2014, 4, 29, 0, 0),
 22 |                          datetime.datetime(2014, 5, 3, 0, 0),
 23 |                          datetime.datetime(2014, 5, 4, 0, 0),
 24 |                          datetime.datetime(2014, 5, 5, 0, 0),
 25 |                          datetime.datetime(2014, 5, 6, 0, 0),
 26 |                          datetime.datetime(2014, 7, 21, 0, 0),
 27 |                          datetime.datetime(2014, 9, 15, 0, 0),
 28 |                          datetime.datetime(2014, 9, 23, 0, 0),
 29 |                          datetime.datetime(2014, 10, 13, 0, 0),
 30 |                          datetime.datetime(2014, 11, 3, 0, 0),
 31 |                          datetime.datetime(2014, 11, 23, 0, 0),
 32 |                          datetime.datetime(2014, 11, 24, 0, 0),
 33 |                          datetime.datetime(2014, 12, 23, 0, 0)]
 34 | 
 35 |         self.start_date = datetime.datetime(2014, 1, 1)
 36 |         self.end_date = datetime.datetime(2014, 12, 31)
 37 | 
 38 |     def test_calendar(self):
 39 | 
 40 |         calendar = jpd.JapaneseHolidayCalendar()
 41 |         holidays_0 = calendar.holidays(self.start_date,
 42 |                                        self.end_date)
 43 | 
 44 |         holidays_1 = calendar.holidays(self.start_date.strftime('%Y-%m-%d'),
 45 |                                        self.end_date.strftime('%Y-%m-%d'))
 46 |         holidays_2 = calendar.holidays(pd.Timestamp(self.start_date),
 47 |                                        pd.Timestamp(self.end_date))
 48 | 
 49 |         self.assertEqual(holidays_0.to_pydatetime().tolist(), self.expected)
 50 |         self.assertEqual(holidays_1.to_pydatetime().tolist(), self.expected)
 51 |         self.assertEqual(holidays_2.to_pydatetime().tolist(), self.expected)
 52 | 
 53 |     def test_cday(self):
 54 |         calendar = jpd.JapaneseHolidayCalendar()
 55 |         cday = pd.offsets.CDay(calendar=calendar)
 56 | 
 57 |         dt = datetime.datetime(2014, 1, 12)
 58 |         self.assertEqual(dt - cday, datetime.datetime(2014, 1, 10))
 59 |         self.assertEqual(dt + cday, datetime.datetime(2014, 1, 14))
 60 | 
 61 |         dt = datetime.datetime(2014, 1, 10)
 62 |         self.assertEqual(dt - cday, datetime.datetime(2014, 1, 9))
 63 |         self.assertEqual(dt + cday, datetime.datetime(2014, 1, 14))
 64 | 
 65 |         dt = datetime.datetime(2014, 4, 28)
 66 |         self.assertEqual(dt - cday, datetime.datetime(2014, 4, 25))
 67 |         self.assertEqual(dt + cday, datetime.datetime(2014, 4, 30))
 68 | 
 69 |         dt = datetime.datetime(2014, 5, 3)
 70 |         self.assertEqual(dt - cday, datetime.datetime(2014, 5, 2))
 71 |         self.assertEqual(dt + cday, datetime.datetime(2014, 5, 7))
 72 | 
 73 |         dt = datetime.datetime(2014, 5, 6)
 74 |         self.assertEqual(dt - cday, datetime.datetime(2014, 5, 2))
 75 |         self.assertEqual(dt + cday, datetime.datetime(2014, 5, 7))
 76 | 
 77 |     def test_factory(self):
 78 |         calendar = pd.tseries.holiday.get_calendar('JapaneseHolidayCalendar')
 79 |         self.assertTrue(isinstance(calendar, jpd.JapaneseHolidayCalendar))
 80 | 
 81 |         calendar = pd.tseries.holiday.get_calendar('TSEHolidayCalendar')
 82 |         self.assertTrue(isinstance(calendar, jpd.TSEHolidayCalendar))
 83 | 
 84 |     def test_holiday_attributes(self):
 85 |         calendar = jpd.JapaneseHolidayCalendar()
 86 |         self.assertEqual(calendar.rules[0].name, '元日')
 87 |         self.assertEqual(calendar.rules[0].year, 1970)
 88 |         self.assertEqual(calendar.rules[0].month, 1)
 89 |         self.assertEqual(calendar.rules[0].day, 1)
 90 | 
 91 |     def test_jpholiday_holidays(self):
 92 |         calendar = jpd.JapaneseHolidayCalendar()
 93 |         holidays = calendar.holidays()
 94 |         for y in range(1970, 2030):
 95 |             for m, d in [(1, 1)]:
 96 |                 dt = datetime.date(y, m, d)
 97 |                 self.assertTrue(dt in holidays)
 98 | 
 99 |         for e in self.expected:
100 |             self.assertTrue(dt in holidays)
101 | 
102 |     def test_tseholiday_holidays(self):
103 |         calendar = jpd.TSEHolidayCalendar()
104 |         holidays = calendar.holidays()
105 |         for y in range(1970, 2031):
106 |             for m, d in [(1, 1), (1, 2), (1, 3), (12, 31)]:
107 |                 dt = datetime.date(y, m, d)
108 |                 self.assertTrue(dt in holidays)
109 | 
110 |         # test initial / final date explicitly
111 |         self.assertTrue(datetime.date(1970, 1, 1) in holidays)
112 |         self.assertTrue(datetime.date(2030, 12, 31) in holidays)
113 |         for e in self.expected:
114 |             self.assertTrue(dt in holidays)
115 | 
116 |     def test_holiday_bug(self):
117 |         # GH 42
118 | 
119 |         for calendar in [jpd.TSEHolidayCalendar(),
120 |                          jpd.JapaneseHolidayCalendar()]:
121 |             holidays = calendar.holidays()
122 | 
123 |             self.assertFalse(datetime.datetime(1993, 9, 5) in holidays)
124 |             self.assertTrue(datetime.datetime(1993, 9, 15) in holidays)
125 | 
126 |             self.assertFalse(datetime.datetime(2020, 8, 12) in holidays)
127 |             # http://www8.cao.go.jp/chosei/shukujitsu/gaiyou.html#tokurei
128 |             self.assertFalse(datetime.datetime(2020, 8, 11) in holidays)
129 | 
130 |     def test_heisei_emperor_abdication_holiday(self):
131 | 
132 |         for calendar in [jpd.TSEHolidayCalendar(),
133 |                          jpd.JapaneseHolidayCalendar()]:
134 |             holidays = calendar.holidays()
135 | 
136 |             self.assertTrue(datetime.datetime(2018, 12, 23) in holidays)
137 |             self.assertFalse(datetime.datetime(2019, 12, 23) in holidays)
138 | 
139 |             self.assertFalse(datetime.datetime(2019, 2, 23) in holidays)
140 |             self.assertTrue(datetime.datetime(2020, 2, 23) in holidays)
141 | 
142 |     def test_tokurei(self):
143 |         # http://www8.cao.go.jp/chosei/shukujitsu/gaiyou.html#tokurei
144 | 
145 |         for calendar in [jpd.TSEHolidayCalendar(),
146 |                          jpd.JapaneseHolidayCalendar()]:
147 |             holidays = calendar.holidays()
148 | 
149 |             # 海の日
150 |             self.assertTrue(datetime.datetime(2020, 7, 23) in holidays)
151 |             self.assertFalse(datetime.datetime(2020, 7, 20) in holidays)
152 |             self.assertTrue(datetime.datetime(2021, 7, 19) in holidays)
153 | 
154 |             # 山の日
155 |             self.assertTrue(datetime.datetime(2020, 8, 10) in holidays)
156 |             self.assertFalse(datetime.datetime(2020, 8, 11) in holidays)
157 |             self.assertTrue(datetime.datetime(2021, 8, 11) in holidays)
158 | 
159 |             # スポーツの日
160 |             self.assertTrue(datetime.datetime(2020, 7, 24) in holidays)
161 |             self.assertFalse(datetime.datetime(2020, 10, 12) in holidays)
162 |             self.assertTrue(datetime.datetime(2021, 10, 11) in holidays)
163 | 
164 |     def test_new_era(self):
165 | 
166 |         for calendar in [jpd.TSEHolidayCalendar(),
167 |                          jpd.JapaneseHolidayCalendar()]:
168 |             holidays = calendar.holidays()
169 | 
170 |             self.assertFalse(datetime.datetime(2019, 4, 26) in holidays)
171 |             self.assertFalse(datetime.datetime(2019, 4, 27) in holidays)
172 |             self.assertFalse(datetime.datetime(2019, 4, 28) in holidays)
173 |             self.assertTrue(datetime.datetime(2019, 4, 29) in holidays)
174 |             self.assertTrue(datetime.datetime(2019, 4, 30) in holidays)
175 |             self.assertTrue(datetime.datetime(2019, 5, 1) in holidays)
176 |             self.assertTrue(datetime.datetime(2019, 5, 2) in holidays)
177 |             self.assertTrue(datetime.datetime(2019, 5, 3) in holidays)
178 |             self.assertTrue(datetime.datetime(2019, 5, 4) in holidays)
179 |             self.assertTrue(datetime.datetime(2019, 5, 5) in holidays)
180 |             self.assertTrue(datetime.datetime(2019, 5, 6) in holidays)
181 |             self.assertFalse(datetime.datetime(2019, 5, 7) in holidays)
182 |             self.assertFalse(datetime.datetime(2019, 12, 23) in holidays)
183 | 
184 | 
185 | if __name__ == '__main__':
186 |     import nose
187 |     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
188 |                    exit=False)
189 | 


--------------------------------------------------------------------------------
/japandas/tseries/tests/test_tools.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from __future__ import unicode_literals
 5 | 
 6 | import unittest
 7 | 
 8 | import pandas as pd
 9 | import pandas.compat as compat
10 | import pandas.util.testing as tm
11 | 
12 | import japandas as jpd
13 | 
14 | 
15 | class TestTools(unittest.TestCase):
16 | 
17 |     def test_to_datetime(self):
18 |         cases = {'2011年10月30日': ('2011-10-30', '%Y-%m-%d'),
19 |                  '2012年12月': ('2012-12', '%Y-%m'),
20 |                  '12月3日': ('12-03', '%m-%d'),
21 |                  '2013年9月4日10時15分': ('2013-09-04 10:15', '%Y-%m-%d %H:%M'),
22 |                  '10月5日13時25分': ('10-05 13:25', '%m-%d %H:%M'),
23 |                  '2014年3月8日20時45分8秒': ('2014-03-08 20:45:08', '%Y-%m-%d %H:%M:%S'),
24 |                  '3月8日20時45分8秒': ('03-08 20:45:08', '%m-%d %H:%M:%S'),
25 |                  '11年10月30日': ('11-10-30', '%y-%m-%d'),
26 |                  '09年12月': ('09-12', '%y-%m'),
27 |                  '13年9月4日10時15分': ('13-09-04 10:15', '%y-%m-%d %H:%M'),
28 |                  '14年3月8日20時45分8秒': ('14-03-08 20:45:08', '%y-%m-%d %H:%M:%S'),
29 |                  '14年': ('2014-01-01', '%Y-%m-%d'),
30 |                  '2014年': ('2014-01-01', '%Y-%m-%d')
31 |                  }
32 | 
33 |         for k, (s, f) in compat.iteritems(cases):
34 |             result = jpd.to_datetime(k)
35 |             expected = pd.to_datetime(s, format=f)
36 |             self.assertEqual(result, expected)
37 | 
38 |             result = jpd.to_datetime([k])
39 |             expected = pd.to_datetime([s], format=f)
40 |             tm.assert_index_equal(result, expected)
41 | 
42 |             result = jpd.to_datetime([k], box=False)
43 |             expected = pd.to_datetime([s], format=f, box=False)
44 |             tm.assert_numpy_array_equal(result, expected)
45 | 
46 |     def test_date_range(self):
47 |         result = jpd.date_range(start='2013年11月15日', end='2014年12月18日', freq='D')
48 |         expected = pd.date_range(start='2013-11-15', end='2014-12-18', freq='D')
49 |         tm.assert_index_equal(result, expected)
50 | 
51 |         result = jpd.date_range(start='2014年1月15日09時25分', end='2014年1月18日10時45分', freq='M')
52 |         expected = pd.date_range(start='2014-01-15 09:25', end='2014-01-18 10:45', freq='M')
53 |         tm.assert_index_equal(result, expected)
54 | 
55 |     def test_period_range(self):
56 |         result = jpd.period_range(start='2013年11月15日', end='2014年12月18日', freq='D')
57 |         expected = pd.period_range(start='2013-11-15', end='2014-12-18', freq='D')
58 |         tm.assert_index_equal(result, expected)
59 | 
60 |         result = jpd.period_range(start='2014年1月15日09時25分', end='2014年1月18日10時45分', freq='M')
61 |         expected = pd.period_range(start='2014-01-15 09:25', end='2014-01-18 10:45', freq='M')
62 |         tm.assert_index_equal(result, expected)
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     import nose
67 |     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
68 |                    exit=False)
69 | 


--------------------------------------------------------------------------------
/japandas/tseries/tools.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from __future__ import unicode_literals
 5 | 
 6 | import numpy as np
 7 | import pandas as pd
 8 | import pandas.compat as compat
 9 | 
10 | 
11 | _formats = ['%Y年', '%Y年%m月%d日', '%Y年%m月',
12 |             '%Y年%m月%d日%H時%M分', '%Y年%m月%d日%H時%M分%S秒',
13 | 
14 |             '%y年', '%y年%m月%d日', '%y年%m月',
15 |             '%y年%m月%d日%H時%M分', '%y年%m月%d日%H時%M分%S秒',
16 | 
17 |             '%m月%d日', '%m月%d日%H時%M分', '%m月%d日%H時%M分%S秒']
18 | 
19 | 
20 | def to_datetime(arg, box=True, format=None, **kwargs):
21 | 
22 |     try:
23 |         result = pd.to_datetime(arg, box=box, format=format, **kwargs)
24 | 
25 |         if format is not None:
26 |             # if format is specified, return pd.to_datetime as it is
27 |             return result
28 | 
29 |         if result is None:
30 |             return result
31 |         elif isinstance(result, (pd.Timestamp, pd.DatetimeIndex)):
32 |             return result
33 |     except ValueError:
34 |         # as of pandas 0.17, to_datetime raises when parsing fails
35 |         result = arg
36 | 
37 |     def _convert_listlike(arg, box):
38 |         for format in _formats:
39 |             try:
40 |                 return pd.to_datetime(arg, box=box, format=format, **kwargs)
41 |             except ValueError:
42 |                 pass
43 |         return arg
44 | 
45 |     if isinstance(result, compat.string_types):
46 |         arg = np.array([arg], dtype='O')
47 |         result = _convert_listlike(arg, box)
48 |         return result[0]
49 | 
50 |     if isinstance(result, pd.Series):
51 |         values = _convert_listlike(arg.values, False)
52 |         return pd.Series(values, index=arg.index, name=arg.name)
53 |     elif pd.api.types.is_list_like(result):
54 |         return _convert_listlike(result, box)
55 |     return result
56 | 
57 | 
58 | def date_range(start=None, end=None, **kwargs):
59 |     start = to_datetime(start)
60 |     end = to_datetime(end)
61 |     return pd.date_range(start=start, end=end, **kwargs)
62 | 
63 | 
64 | def period_range(start=None, end=None, **kwargs):
65 |     start = to_datetime(start)
66 |     end = to_datetime(end)
67 |     return pd.period_range(start=start, end=end, **kwargs)
68 | 
69 | 
70 | to_datetime.__doc__ = pd.to_datetime.__doc__
71 | date_range.__doc__ = pd.date_range.__doc__
72 | period_range.__doc__ = pd.period_range.__doc__
73 | 
74 | 
75 | """
76 | try:
77 |     import pandas.tseries.timedeltas as timedeltas
78 |     abbrevs = [('d' ,'days|d|day|日'),
79 |                ('h' ,'hours|h|hour|時間'),
80 |                ('m' ,'minutes|min|minute|m|分'),
81 |                ('s' ,'seconds|sec|second|s|秒'),
82 |                ('ms','milliseconds|milli|millis|millisecond|ms'),
83 |                ('us','microseconds|micro|micros|microsecond|us'),
84 |                ('ns','nanoseconds|nano|nanos|nanosecond|ns')]
85 |     timedeltas.abbrevs = abbrevs
86 | except Exception:
87 |     pass
88 | """
89 | 


--------------------------------------------------------------------------------
/japandas/version.py:
--------------------------------------------------------------------------------
1 | version = '0.5.1'
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas >= 0.20.0
2 | pandas-datareader >= 0.7.0
3 | 


--------------------------------------------------------------------------------
/requirements_test.txt:
--------------------------------------------------------------------------------
1 | lxml
2 | html5lib
3 | beautifulsoup4
4 | pandas >= 0.20.0
5 | pandas-datareader >= 0.7.0
6 | IPython>=2.3
7 | matplotlib>=1.4.0
8 | mpl_finance
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #!/usr/bin/env python
 3 | 
 4 | import codecs
 5 | import os
 6 | from setuptools import setup, find_packages
 7 | 
 8 | PACKAGE = 'japandas'
 9 | README = 'README.rst'
10 | REQUIREMENTS = 'requirements.txt'
11 | 
12 | VERSION = '0.6.0.dev0'
13 | 
14 | def read(fname):
15 |   # file must be read as utf-8 in py3 to avoid to be bytes
16 |   return codecs.open(os.path.join(os.path.dirname(__file__), fname), encoding='utf-8').read()
17 | 
18 | def write_version_py(filename=None):
19 |     cnt = """\
20 | version = '%s'
21 | """
22 |     a = open(filename, 'w')
23 |     try:
24 |         a.write(cnt % VERSION)
25 |     finally:
26 |         a.close()
27 | 
28 | version_file = os.path.join(os.path.dirname(__file__), PACKAGE, 'version.py')
29 | write_version_py(filename=version_file)
30 | 
31 | setup(name=PACKAGE,
32 |       version=VERSION,
33 |       description='pandas japanese extension',
34 |       long_description=read(README),
35 |       author='sinhrks',
36 |       author_email='sinhrks@gmail.com',
37 |       url='http://japandas.readthedocs.org/en/stable',
38 |       license = 'BSD',
39 |       packages=find_packages(),
40 |       package_data = {'japandas.tseries': ['data/*.pkl']},
41 |       install_requires=list(read(REQUIREMENTS).splitlines())
42 |       )
43 | 


--------------------------------------------------------------------------------