├── .gitignore ├── .travis.yml ├── LICENSE ├── LICENSES └── PANDAS_LICENSE ├── MANIFEST.in ├── README.rst ├── doc ├── Makefile ├── make.bat └── source │ ├── api.rst │ ├── conf.py │ ├── index.rst │ ├── jpdatareader.rst │ ├── jpdatetime.rst │ └── jpstrings.rst ├── japandas ├── __init__.py ├── compat.py ├── core │ ├── __init__.py │ ├── strings.py │ └── tests │ │ ├── __init__.py │ │ └── test_strings.py ├── io │ ├── __init__.py │ ├── data.py │ ├── estat.py │ └── tests │ │ ├── __init__.py │ │ └── test_estat.py ├── tools │ ├── __init__.py │ ├── plotting.py │ └── tests │ │ ├── __init__.py │ │ └── test_plotting.py ├── tseries │ ├── __init__.py │ ├── data │ │ ├── holidays.pkl │ │ └── tseholidays.pkl │ ├── holiday.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_holiday.py │ │ └── test_tools.py │ └── tools.py └── version.py ├── requirements.txt ├── requirements_test.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | env: 3 | global: 4 | secure: dlKlpVXGzCuN1Wc0h48HeT3EYjND2erX6hPeya/c6SR9LTN+ybcWGyZrHDtybOaRw28REVIXqH3unkVtX/X4d46U/tmRcC/Fy7wInhOS82yuFRCB8TinQkpeqRDZuU+HiW737uzmYf3U37NZnm2VGCsvVXUAoZe6/aLgTXMHP6U= 5 | matrix: 6 | - PYTHON=2.7 PANDAS=0.20.2 7 | - PYTHON=2.7 PANDAS=0.23.2 8 | - PYTHON=3.5 PANDAS=0.21.1 9 | - PYTHON=3.6 PANDAS=0.22.0 10 | - PYTHON=3.6 PANDAS=0.23.2 COVERAGE=true 11 | 12 | install: 13 | - if [[ "$PYTHON" == "2.7" ]]; then 14 | wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; 15 | else 16 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 17 | fi 18 | - bash miniconda.sh -b -p $HOME/miniconda 19 | - export PATH="$HOME/miniconda/bin:$PATH" 20 | - hash -r 21 | - conda config --set always_yes yes --set changeps1 no 22 | - conda config --add channels pandas 23 | - conda update -q conda 24 | - conda info -a 25 | - conda create -q -n test-environment python=$PYTHON pip numpy scipy pandas=$PANDAS nose matplotlib requests 26 | - source activate test-environment 27 | - "pip install -r requirements_test.txt" 28 | - pip install flake8 pytest pytest-cov codecov 29 | 30 | script: 31 | - if [[ "$PYTHON" == "2.7" ]]; then 32 | nosetests -v -s; 33 | else 34 | nosetests -v -s --with-coverage --cover-package=japandas; 35 | fi 36 | - flake8 --ignore E501 japandas 37 | 38 | after_success: 39 | - if [ "$COVERAGE" ]; then 40 | codecov; 41 | fi 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Sinhrks 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of japandas nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /LICENSES/PANDAS_LICENSE: -------------------------------------------------------------------------------- 1 | ======= 2 | License 3 | ======= 4 | 5 | pandas is distributed under a 3-clause ("Simplified" or "New") BSD 6 | license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have 7 | BSD-compatible licenses, are included. Their licenses follow the pandas 8 | license. 9 | 10 | pandas license 11 | ============== 12 | 13 | Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team 14 | All rights reserved. 15 | 16 | Copyright (c) 2008-2011 AQR Capital Management, LLC 17 | All rights reserved. 18 | 19 | Redistribution and use in source and binary forms, with or without 20 | modification, are permitted provided that the following conditions are 21 | met: 22 | 23 | * Redistributions of source code must retain the above copyright 24 | notice, this list of conditions and the following disclaimer. 25 | 26 | * Redistributions in binary form must reproduce the above 27 | copyright notice, this list of conditions and the following 28 | disclaimer in the documentation and/or other materials provided 29 | with the distribution. 30 | 31 | * Neither the name of the copyright holder nor the names of any 32 | contributors may be used to endorse or promote products derived 33 | from this software without specific prior written permission. 34 | 35 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 36 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 37 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 38 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 39 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 41 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 42 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 43 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 44 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 45 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46 | 47 | About the Copyright Holders 48 | =========================== 49 | 50 | AQR Capital Management began pandas development in 2008. Development was 51 | led by Wes McKinney. AQR released the source under this license in 2009. 52 | Wes is now an employee of Lambda Foundry, and remains the pandas project 53 | lead. 54 | 55 | The PyData Development Team is the collection of developers of the PyData 56 | project. This includes all of the PyData sub-projects, including pandas. The 57 | core team that coordinates development on GitHub can be found here: 58 | http://github.com/pydata. 59 | 60 | Full credits for pandas contributors can be found in the documentation. 61 | 62 | Our Copyright Policy 63 | ==================== 64 | 65 | PyData uses a shared copyright model. Each contributor maintains copyright 66 | over their contributions to PyData. However, it is important to note that 67 | these contributions are typically only changes to the repositories. Thus, 68 | the PyData source code, in its entirety, is not the copyright of any single 69 | person or institution. Instead, it is the collective copyright of the 70 | entire PyData Development Team. If individual contributors want to maintain 71 | a record of what changes/contributions they have specific copyright on, 72 | they should indicate their copyright in the commit message of the change 73 | when they commit the change to one of the PyData repositories. 74 | 75 | With this in mind, the following banner should be used in any source code 76 | file to indicate the copyright and license terms: 77 | 78 | #----------------------------------------------------------------------------- 79 | # Copyright (c) 2012, PyData Development Team 80 | # All rights reserved. 81 | # 82 | # Distributed under the terms of the BSD Simplified License. 83 | # 84 | # The full license is in the LICENSE file, distributed with this software. 85 | #----------------------------------------------------------------------------- 86 | 87 | Other licenses can be found in the LICENSES directory. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include LICENSES/* 3 | include README.rst 4 | include requirements.txt 5 | include setup.py -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | japandas 2 | ======== 3 | 4 | .. image:: https://img.shields.io/pypi/v/japandas.svg 5 | :target: https://pypi.python.org/pypi/japandas/ 6 | .. image:: https://readthedocs.org/projects/japandas/badge/?version=latest 7 | :target: http://japandas.readthedocs.org/en/latest/ 8 | :alt: Latest Docs 9 | .. image:: https://travis-ci.org/sinhrks/japandas.svg?branch=master 10 | :target: https://travis-ci.org/sinhrks/japandas 11 | .. image:: https://codecov.io/gh/sinhrks/japandas/branch/master/graph/badge.svg 12 | :target: https://codecov.io/gh/sinhrks/japandas 13 | 14 | Overview 15 | ~~~~~~~~ 16 | 17 | pandas Japanese extension. 18 | 19 | pandas の日本語拡張。以下の機能を提供する。 20 | 21 | - 日本語の日付のパース 22 | - 日本の祝日カレンダーと、それを利用した営業日計算 23 | - 文字列の全角/半角変換 24 | - e-Stat からのデータの取得 25 | - ローソク足チャート 26 | 27 | **補足** このパッケージでは、"日本固有の機能であり本流に実装される可能性が低いもの", もしくは"それらに関係し本流に実装される可能性が低いもの" を実装 / メンテナンスする。 28 | 29 | 30 | インストール 31 | ~~~~~~~~~~ 32 | 33 | .. code-block:: sh 34 | 35 | pip install japandas 36 | 37 | ドキュメント 38 | ~~~~~~~~~~ 39 | 40 | - 開発版: http://japandas.readthedocs.org/en/latest/ 41 | - リリース版: http://japandas.readthedocs.org/en/stable/ 42 | 43 | 機能概要 44 | ~~~~~~~ 45 | 46 | 日本語の日付のパース 47 | ,,,,,,,,,,,,,,,,, 48 | 49 | .. code-block:: python 50 | 51 | >>> import japandas as jpd 52 | >>> jpd.to_datetime('2014年11月30日') 53 | Timestamp('2014-11-30 00:00:00') 54 | 55 | >>> jpd.to_datetime(['2014年11月30日13時25分', '2014年11月30日14時38分']) 56 | 57 | [2014-11-30 13:25:00, 2014-11-30 14:38:00] 58 | Length: 2, Freq: None, Timezone: None 59 | 60 | >>> jpd.date_range(start=u'2013年12月01日', end=u'2014年12月01日', freq='D') 61 | 62 | [2013-12-01, ..., 2014-12-01] 63 | Length: 366, Freq: D, Timezone: None 64 | 65 | 66 | 日本の祝日カレンダーと、それを利用した営業日計算 67 | ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 68 | 69 | .. code-block:: python 70 | 71 | >>> import pandas as pd 72 | >>> import datetime 73 | 74 | >>> calendar = jpd.JapaneseHolidayCalendar() 75 | >>> cday = pd.offsets.CDay(calendar=calendar) 76 | 77 | >>> datetime.datetime(2014, 4, 28) + cday 78 | # 4/29は祝日(昭和の日) 79 | Timestamp('2014-04-30 00:00:00') 80 | 81 | >>> datetime.datetime(2014, 4, 28) - cday 82 | # 4/26は土曜日, 4/27は日曜日 83 | Timestamp('2014-04-25 00:00:00') 84 | 85 | >>> datetime.datetime(2014, 5, 3) + cday 86 | # 5/4は日曜日, 5/5は祝日(こどもの日), 5/6は祝日(みどりの日/振替休日) 87 | Timestamp('2014-05-07 00:00:00') 88 | 89 | >>> datetime.datetime(2014, 5, 3) - cday 90 | # 5/3は土曜日 91 | Timestamp('2014-05-02 00:00:00') 92 | 93 | # 適当なデータを作成 94 | >>> df = pd.DataFrame(np.random.randn(10, 3), 95 | ... index=jpd.date_range(u'2014年5月1日', u'2014年5月10日', freq='D')) 96 | >>> df 97 | 0 1 2 98 | 2014-05-01 0.762453 -1.418762 -0.150073 99 | 2014-05-02 0.966500 -0.473888 0.272871 100 | 2014-05-03 0.473370 -1.282504 0.380449 101 | 2014-05-04 0.215411 0.220587 -1.088699 102 | 2014-05-05 0.286348 -1.069165 -1.471871 103 | 2014-05-06 -0.665438 -0.402046 -1.008051 104 | 2014-05-07 1.173935 2.080087 -2.279285 105 | 2014-05-08 -0.957195 0.746798 0.092214 106 | 2014-05-09 -0.259276 -0.775489 0.572525 107 | 2014-05-10 -0.910188 0.294136 0.020730 108 | 109 | >>> cday = pd.offsets.CDay(calendar=calendar) 110 | >>> indexer = jpd.date_range(u'2014年5月1日', u'2014年5月10日', freq=cday) 111 | 112 | # カレンダー上 営業日のレコードを抽出 113 | >>> df.ix[indexer] 114 | 0 1 2 115 | 2014-05-01 0.762453 -1.418762 -0.150073 116 | 2014-05-02 0.966500 -0.473888 0.272871 117 | 2014-05-07 1.173935 2.080087 -2.279285 118 | 2014-05-08 -0.957195 0.746798 0.092214 119 | 2014-05-09 -0.259276 -0.775489 0.572525 120 | 121 | 122 | 全角/半角変換 123 | ,,,,,,,,,,, 124 | 125 | .. code-block:: python 126 | 127 | >>> s = pd.Series([u'アイウエオ', u'ABC01', u'DE345']) 128 | >>> z = s.str.h2z() 129 | >>> z 130 | 0 アイウエオ 131 | 1 ABC01 132 | 2 DE345 133 | dtype: object 134 | 135 | >>> z.str.z2h() 136 | 0 アイウエオ 137 | 1 ABC01 138 | 2 DE345 139 | dtype: object 140 | 141 | e-Stat からの統計情報取得 142 | ,,,,,,,,,,,,,,,,,,,,,,, 143 | 144 | .. code-block:: python 145 | 146 | >>> key = "your application id" 147 | >>> df = jpd.DataReader("0000030001", 'estat', appid=key) 148 | >>> df.head() 149 | value 全国都道府県030001 全域・集中の別030002 年齢5歳階級A030002 男女A030001 150 | 時間軸(年次) 151 | 1980年 117060396 全国 全域 総数 男女総数 152 | 1980年 89187409 全国市部 全域 総数 男女総数 153 | 1980年 27872987 全国郡部 全域 総数 男女総数 154 | 1980年 5575989 北海道 全域 総数 男女総数 155 | 1980年 1523907 青森県 全域 総数 男女総数 156 | 157 | 158 | ローソク足チャート 159 | ,,,,,,,,,,,,,,,,, 160 | 161 | .. code-block:: python 162 | 163 | >>> df.plot(kind='ohlc') 164 | チャート省略 165 | 166 | 167 | License 168 | ~~~~~~~ 169 | 170 | BSD. 171 | 172 | 日本の祝日データソースとして以下を利用。 173 | 174 | - `komagata/holiday_jp `_ 175 | 176 | Copyright (c) 2009 Masaki Komagata. See `LICENSE `_ for details. 177 | 178 | - `holiday_jp `_ 179 | 180 | MIT. 181 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/japandas.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/japandas.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/japandas" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/japandas" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\japandas.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\japandas.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /doc/source/api.rst: -------------------------------------------------------------------------------- 1 | 2 | API Reference 3 | ============= 4 | 5 | .. _api.functions: 6 | 7 | Base 8 | ~~~~ 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | DataStore.is_valid 14 | DataStore.get 15 | DataStore.search 16 | DataResource.read 17 | 18 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # japandas documentation build configuration file, created by 4 | # sphinx-quickstart on Sun Feb 8 19:30:56 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | 'sphinx.ext.autodoc', 33 | 'sphinx.ext.doctest', 34 | 'sphinx.ext.intersphinx', 35 | 'sphinx.ext.todo', 36 | 'sphinx.ext.coverage', 37 | 'sphinx.ext.viewcode'] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ['_templates'] 41 | 42 | # The suffix of source filenames. 43 | source_suffix = '.rst' 44 | 45 | # The encoding of source files. 46 | source_encoding = 'utf-8-sig' 47 | 48 | # The master toctree document. 49 | master_doc = 'index' 50 | 51 | # General information about the project. 52 | project = u'japandas' 53 | copyright = u'2015, sinhrks' 54 | 55 | # The version info for the project you're documenting, acts as replacement for 56 | # |version| and |release|, also used in various other places throughout the 57 | # built documents. 58 | # 59 | # The short X.Y version. 60 | version = '0.0.1' 61 | # The full version, including alpha/beta/rc tags. 62 | release = '0.0.1' 63 | 64 | # The language for content autogenerated by Sphinx. Refer to documentation 65 | # for a list of supported languages. 66 | #language = None 67 | 68 | # There are two options for replacing |today|: either, you set today to some 69 | # non-false value, then it is used: 70 | #today = '' 71 | # Else, today_fmt is used as the format for a strftime call. 72 | #today_fmt = '%B %d, %Y' 73 | 74 | # List of patterns, relative to source directory, that match files and 75 | # directories to ignore when looking for source files. 76 | exclude_patterns = [] 77 | 78 | # The reST default role (used for this markup: `text`) to use for all 79 | # documents. 80 | #default_role = None 81 | 82 | # If true, '()' will be appended to :func: etc. cross-reference text. 83 | #add_function_parentheses = True 84 | 85 | # If true, the current module name will be prepended to all description 86 | # unit titles (such as .. function::). 87 | #add_module_names = True 88 | 89 | # If true, sectionauthor and moduleauthor directives will be shown in the 90 | # output. They are ignored by default. 91 | #show_authors = False 92 | 93 | # The name of the Pygments (syntax highlighting) style to use. 94 | pygments_style = 'sphinx' 95 | 96 | # A list of ignored prefixes for module index sorting. 97 | #modindex_common_prefix = [] 98 | 99 | # If true, keep warnings as "system message" paragraphs in the built documents. 100 | #keep_warnings = False 101 | 102 | 103 | # -- Options for HTML output ---------------------------------------------- 104 | 105 | # The theme to use for HTML and HTML Help pages. See the documentation for 106 | # a list of builtin themes. 107 | html_theme = 'default' 108 | 109 | # Theme options are theme-specific and customize the look and feel of a theme 110 | # further. For a list of options available for each theme, see the 111 | # documentation. 112 | #html_theme_options = {} 113 | 114 | # Add any paths that contain custom themes here, relative to this directory. 115 | #html_theme_path = [] 116 | 117 | # The name for this set of Sphinx documents. If None, it defaults to 118 | # " v documentation". 119 | #html_title = None 120 | 121 | # A shorter title for the navigation bar. Default is the same as html_title. 122 | #html_short_title = None 123 | 124 | # The name of an image file (relative to this directory) to place at the top 125 | # of the sidebar. 126 | #html_logo = None 127 | 128 | # The name of an image file (within the static path) to use as favicon of the 129 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 130 | # pixels large. 131 | #html_favicon = None 132 | 133 | # Add any paths that contain custom static files (such as style sheets) here, 134 | # relative to this directory. They are copied after the builtin static files, 135 | # so a file named "default.css" will overwrite the builtin "default.css". 136 | html_static_path = ['_static'] 137 | 138 | # Add any extra paths that contain custom files (such as robots.txt or 139 | # .htaccess) here, relative to this directory. These files are copied 140 | # directly to the root of the documentation. 141 | #html_extra_path = [] 142 | 143 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 144 | # using the given strftime format. 145 | #html_last_updated_fmt = '%b %d, %Y' 146 | 147 | # If true, SmartyPants will be used to convert quotes and dashes to 148 | # typographically correct entities. 149 | #html_use_smartypants = True 150 | 151 | # Custom sidebar templates, maps document names to template names. 152 | #html_sidebars = {} 153 | 154 | # Additional templates that should be rendered to pages, maps page names to 155 | # template names. 156 | #html_additional_pages = {} 157 | 158 | # If false, no module index is generated. 159 | #html_domain_indices = True 160 | 161 | # If false, no index is generated. 162 | #html_use_index = True 163 | 164 | # If true, the index is split into individual pages for each letter. 165 | #html_split_index = False 166 | 167 | # If true, links to the reST sources are added to the pages. 168 | #html_show_sourcelink = True 169 | 170 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 171 | #html_show_sphinx = True 172 | 173 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 174 | #html_show_copyright = True 175 | 176 | # If true, an OpenSearch description file will be output, and all pages will 177 | # contain a tag referring to it. The value of this option must be the 178 | # base URL from which the finished HTML is served. 179 | #html_use_opensearch = '' 180 | 181 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 182 | #html_file_suffix = None 183 | 184 | # Output file base name for HTML help builder. 185 | htmlhelp_basename = 'japandasdoc' 186 | 187 | 188 | # -- Options for LaTeX output --------------------------------------------- 189 | 190 | latex_elements = { 191 | # The paper size ('letterpaper' or 'a4paper'). 192 | #'papersize': 'letterpaper', 193 | 194 | # The font size ('10pt', '11pt' or '12pt'). 195 | #'pointsize': '10pt', 196 | 197 | # Additional stuff for the LaTeX preamble. 198 | #'preamble': '', 199 | } 200 | 201 | # Grouping the document tree into LaTeX files. List of tuples 202 | # (source start file, target name, title, 203 | # author, documentclass [howto, manual, or own class]). 204 | latex_documents = [ 205 | ('index', 'japandas.tex', u'japandas Documentation', 206 | u'sinhrks', 'manual'), 207 | ] 208 | 209 | # The name of an image file (relative to this directory) to place at the top of 210 | # the title page. 211 | #latex_logo = None 212 | 213 | # For "manual" documents, if this is true, then toplevel headings are parts, 214 | # not chapters. 215 | #latex_use_parts = False 216 | 217 | # If true, show page references after internal links. 218 | #latex_show_pagerefs = False 219 | 220 | # If true, show URL addresses after external links. 221 | #latex_show_urls = False 222 | 223 | # Documents to append as an appendix to all manuals. 224 | #latex_appendices = [] 225 | 226 | # If false, no module index is generated. 227 | #latex_domain_indices = True 228 | 229 | 230 | # -- Options for manual page output --------------------------------------- 231 | 232 | # One entry per manual page. List of tuples 233 | # (source start file, name, description, authors, manual section). 234 | man_pages = [ 235 | ('index', 'japandas', u'japandas Documentation', 236 | [u'sinhrks'], 1) 237 | ] 238 | 239 | # If true, show URL addresses after external links. 240 | #man_show_urls = False 241 | 242 | 243 | # -- Options for Texinfo output ------------------------------------------- 244 | 245 | # Grouping the document tree into Texinfo files. List of tuples 246 | # (source start file, target name, title, author, 247 | # dir menu entry, description, category) 248 | texinfo_documents = [ 249 | ('index', 'japandas', u'japandas Documentation', 250 | u'sinhrks', 'japandas', 'One line description of project.', 251 | 'Miscellaneous'), 252 | ] 253 | 254 | # Documents to append as an appendix to all manuals. 255 | #texinfo_appendices = [] 256 | 257 | # If false, no module index is generated. 258 | #texinfo_domain_indices = True 259 | 260 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 261 | #texinfo_show_urls = 'footnote' 262 | 263 | # If true, do not generate a @detailmenu in the "Top" node's menu. 264 | #texinfo_no_detailmenu = False 265 | 266 | 267 | # Example configuration for intersphinx: refer to the Python standard library. 268 | intersphinx_mapping = {'http://docs.python.org/': None} 269 | 270 | 271 | # -- Build API doc ---------------------------------------------------------- 272 | 273 | numpydoc_show_class_members = False 274 | 275 | fpath = os.path.dirname(__file__) 276 | gen_path = os.path.join(fpath, 'generated') 277 | app_path = os.path.join(os.path.dirname(os.path.dirname(fpath)), 'japandas') 278 | os.system('sphinx-apidoc -f -o {0} {1}'.format(gen_path, app_path)) 279 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. japandas documentation master file, created by 2 | sphinx-quickstart on Sun Feb 8 19:30:56 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | japandas ドキュメント 7 | ======================= 8 | 9 | 目次: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | jpdatetime 15 | jpstrings 16 | jpdatareader 17 | 18 | 19 | API: 20 | 21 | .. toctree:: 22 | :maxdepth: 2 23 | 24 | generated/japandas.core 25 | generated/japandas.io 26 | generated/japandas.tools 27 | generated/japandas.tseries 28 | -------------------------------------------------------------------------------- /doc/source/jpdatareader.rst: -------------------------------------------------------------------------------- 1 | 2 | リモートデータアクセス 3 | ====================== 4 | 5 | ``jpd.DataReader`` を利用すると、国内のデータソースの情報を ``DataFrame`` として取得できます。以下のデータソースをサポートしています。 6 | 7 | - ``estat``: e-Stat API から統計情報を取得。 8 | - ほか、``pd.DataReader`` でサポートしているデータソース 9 | 10 | 利用できるオプションは ``pd.DataReader`` とほぼ同一です。ここでは、差異がある点のみ記載します。 11 | 12 | e-Stat API 13 | ---------- 14 | 15 | 以下のコードに対応するデータを取得できます。 16 | 17 | - 政府統計コード (8桁): `e-Stat API 提供データ `_ から確認できる 8桁のコードです。取得したデータには、各統計に付随する "統計表ID" が含まれます。 18 | - 統計表ID: 実データを含む表のIDです。 19 | 20 | まず、取得したいデータを含む政府統計コードから統計表IDの一覧を取得します。 21 | 22 | .. code-block:: python 23 | 24 | >>> key = "your application id" 25 | >>> dlist = jpd.DataReader("00200521", 'estat', appid=key) 26 | >>> dlist.head() 27 | 統計表ID 政府統計名 作成機関名 提供統計名及び提供分類名 \ 28 | 0 0000030001 国勢調査 総務省 昭和55年国勢調査 第1次基本集計 全国編 29 | 1 0000030002 国勢調査 総務省 昭和55年国勢調査 第1次基本集計 全国編 30 | 2 0000030003 国勢調査 総務省 昭和55年国勢調査 第1次基本集計 全国編 31 | 3 0000030004 国勢調査 総務省 昭和55年国勢調査 第1次基本集計 全国編 32 | 4 0000030005 国勢調査 総務省 昭和55年国勢調査 第1次基本集計 全国編 33 | 34 | 統計表題名及び表番号 提供周期 調査年月 公開日 \ 35 | 0 男女の別(性別)(3),年齢5歳階級(23),人口 全国・市部・郡部・都道府県(47),全域... - 198010 2007-10-05 36 | 1 男女の別(性別)(3),年齢各歳階級(103),人口 全国・市部・郡部・都道府県(47),全... - 198010 2007-10-05 37 | 2 総人口・日本人(2),男女の別(性別)(2),年齢各歳階級(122),出生の月(5),人口 ... - 198010 2007-10-05 38 | 3 総人口・日本人(2),男女の別(性別)(2),年齢各歳階級(86),配偶関係(5),15歳以... - 198010 2007-10-05 39 | 4 男女の別(性別)(3),年齢5歳階級(19),国籍(5),外国人数 全国・市部・郡部・都道府... - 198010 2007-10-05 40 | 41 | 小地域属性フラグ 統計大分野名 統計小分野名 総件数 最終更新日 42 | 0 0 人口・世帯 人口 3651 2008-03-19 43 | 1 0 人口・世帯 人口 16365 2008-03-19 44 | 2 0 人口・世帯 人口 123782 2008-03-19 45 | 3 0 人口・世帯 人口 85848 2008-03-19 46 | 4 0 人口・世帯 人口 13082 2008-03-19 47 | 48 | 結果から取得したい統計表IDを確認し、実データを取得します。複数のデータをまとめて取得したい場合は 49 | そのリストを渡します。 50 | 51 | .. code-block:: python 52 | 53 | >>> df = jpd.DataReader("0000030001", 'estat', appid=key) 54 | >>> df.head() 55 | value 全国都道府県030001 全域・集中の別030002 年齢5歳階級A030002 男女A030001 56 | 時間軸(年次) 57 | 1980-01-01 117060396 全国 全域 総数 男女総数 58 | 1980-01-01 89187409 全国市部 全域 総数 男女総数 59 | 1980-01-01 27872987 全国郡部 全域 総数 男女総数 60 | 1980-01-01 5575989 北海道 全域 総数 男女総数 61 | 1980-01-01 1523907 青森県 全域 総数 男女総数 62 | 63 | >>> df = jpd.DataReader(["0000030001", "0000030002"], 'estat', appid=key) 64 | >>> df.head() 65 | value 全国都道府県030001 全域・集中の別030002 年齢各歳階級B030003 年齢5歳階級A030002 \ 66 | 時間軸(年次) 67 | 1980-01-01 117060396 全国 全域 NaN 総数 68 | 1980-01-01 89187409 全国市部 全域 NaN 総数 69 | 1980-01-01 27872987 全国郡部 全域 NaN 総数 70 | 1980-01-01 5575989 北海道 全域 NaN 総数 71 | 1980-01-01 1523907 青森県 全域 NaN 総数 72 | 73 | 男女A030001 74 | 時間軸(年次) 75 | 1980-01-01 男女総数 76 | 1980-01-01 男女総数 77 | 1980-01-01 男女総数 78 | 1980-01-01 男女総数 79 | 1980-01-01 男女総数 80 | 81 | 82 | e-Statでは、一度のリクエストで10万件のレコードまで取得できます。 83 | 取得するレコード数は ``limit`` キーワードで変更できます。 84 | 85 | .. code-block:: python 86 | 87 | >>> df = jpd.DataReader("0003280394", 'estat', appid=key, limit=100) 88 | 89 | 90 | 取得するレコードの開始位置は ``startPosition`` キーワードで変更できます。 91 | 10万件目以降のレコードを取得する際にはこのキーワードを利用してください。 92 | 93 | .. code-block:: python 94 | 95 | >>> df = jpd.DataReader("0003280394", 'estat', appid=key, startPosition=100001) 96 | 97 | オプションの詳細は `e-Stat API 仕様 `_ を参照してください。 98 | -------------------------------------------------------------------------------- /doc/source/jpdatetime.rst: -------------------------------------------------------------------------------- 1 | 2 | 日時処理 3 | ======== 4 | 5 | 日本語日付のパース 6 | ------------------ 7 | 8 | ``jpd.to_datetime`` で 日本語の日付をパースできます。引数が単一の文字列の場合、結果は ``Timestamp`` に、リストや ``np.array`` の場合は ``DatetimeIndex`` になります。この挙動は ``pd.to_datetime`` と同様です。 9 | 10 | .. code-block:: python 11 | 12 | >>> import pandas as pd 13 | >>> import japandas as jpd 14 | 15 | >>> jpd.to_datetime(u'2014年11月30日') 16 | Timestamp('2014-11-30 00:00:00') 17 | 18 | >>> jpd.to_datetime([u'2014年11月30日13時25分', u'2014年11月30日14時38分']) 19 | 20 | [2014-11-30 13:25:00, 2014-11-30 14:38:00] 21 | Length: 2, Freq: None, Timezone: None 22 | 23 | 24 | 同様に、``jpd.date_range``, ``jpd.period_range`` でも 日本語の日付をパースすることができます。それ以外の挙動は ``pd.date_range``, ``pd.period_range`` と同様です。 25 | 26 | .. code-block:: python 27 | 28 | >>> jpd.date_range(start=u'2013年12月01日', end=u'2014年12月01日', freq='D') 29 | 30 | [2013-12-01, ..., 2014-12-01] 31 | Length: 366, Freq: D, Timezone: None 32 | 33 | >>> jpd.period_range(start=u'2013年12月01日', end=u'2014年12月01日', freq='M') 34 | 35 | [2013-12, ..., 2014-12] 36 | Length: 13, Freq: M 37 | 38 | 39 | 日本の祝日カレンダー 40 | ----------------- 41 | 42 | `japandas` では以下 2 種類のカレンダークラスを定義しています。 43 | 44 | - ``japandas.JapaneseHolidayCalendar``: 1970 年から 2030 年までの日本の祝日を定義したカレンダークラスです。 45 | - ``japandas.TSEHolidayCalendar``: 1970 年から 2030 年までの東京証券取引所の休業日 (日本の祝日 + 年末年始 12/31 - 1/3) を定義したカレンダークラスです。 46 | 47 | 定義された祝日の一覧は、それぞれ ``Calendar.holidays()`` メソッドで確認することができます。 48 | 49 | .. code-block:: python 50 | 51 | >>> calendar = jpd.JapaneseHolidayCalendar() 52 | >>> calendar.holidays() 53 | DatetimeIndex(['1970-01-01', '1970-01-15', '1970-02-11', '1970-03-21', 54 | '1970-04-29', '1970-05-03', '1970-05-05', '1970-09-15', 55 | '1970-09-23', '1970-10-10', 56 | ... 57 | '2030-05-06', '2030-07-15', '2030-08-11', '2030-09-16', 58 | '2030-09-23', '2030-10-14', '2030-11-03', '2030-11-04', 59 | '2030-11-23', '2030-12-23'], 60 | dtype='datetime64[ns]', length=969, freq=None) 61 | 62 | >>> tse_calendar = jpd.TSEHolidayCalendar() 63 | >>> tse_calendar.holidays() 64 | DatetimeIndex(['1970-01-01', '1970-01-02', '1970-01-03', '1970-01-15', 65 | '1970-02-11', '1970-03-21', '1970-04-29', '1970-05-03', 66 | '1970-05-05', '1970-09-15', 67 | ... 68 | '2030-07-15', '2030-08-11', '2030-09-16', '2030-09-23', 69 | '2030-10-14', '2030-11-03', '2030-11-04', '2030-11-23', 70 | '2030-12-23', '2030-12-31'], 71 | dtype='datetime64[ns]', length=1144, freq=None) 72 | 73 | 74 | このカレンダーと ``pd.offsets.CDay`` クラスを利用すると、カレンダーの定義に従って営業日の計算を行うことができます。 75 | 76 | .. code-block:: python 77 | 78 | >>> cday = pd.offsets.CDay(calendar=calendar) 79 | 80 | >>> import datetime 81 | >>> datetime.datetime(2014, 4, 28) + cday 82 | # 4/29は祝日(昭和の日) 83 | Timestamp('2014-04-30 00:00:00') 84 | 85 | >>> datetime.datetime(2014, 4, 28) - cday 86 | # 4/26は土曜日, 4/27は日曜日 87 | Timestamp('2014-04-25 00:00:00') 88 | 89 | >>> datetime.datetime(2014, 5, 3) + cday 90 | # 5/4は日曜日, 5/5は祝日(こどもの日), 5/6は祝日(みどりの日/振替休日) 91 | Timestamp('2014-05-07 00:00:00') 92 | 93 | >>> datetime.datetime(2014, 5, 3) - cday 94 | # 5/3は土曜日 95 | Timestamp('2014-05-02 00:00:00') 96 | 97 | 98 | また、カレンダーの定義を条件として ``DataFrame`` や ``Series`` からレコードを抽出することができます。以下の例では、それぞれカレンダー上で営業日となっているレコードの抽出 / 休日となっているレコードの抽出を行っています。 99 | 100 | **補足** 対象とするデータは ``DatetimeIndex`` を持っている必要があります。 101 | 102 | .. code-block:: python 103 | 104 | >>> df = pd.DataFrame(np.random.randn(10, 3), 105 | ... index=jpd.date_range(u'2014年5月1日', u'2014年5月10日', freq='D')) 106 | >>> df 107 | 0 1 2 108 | 2014-05-01 0.762453 -1.418762 -0.150073 109 | 2014-05-02 0.966500 -0.473888 0.272871 110 | 2014-05-03 0.473370 -1.282504 0.380449 111 | 2014-05-04 0.215411 0.220587 -1.088699 112 | 2014-05-05 0.286348 -1.069165 -1.471871 113 | 2014-05-06 -0.665438 -0.402046 -1.008051 114 | 2014-05-07 1.173935 2.080087 -2.279285 115 | 2014-05-08 -0.957195 0.746798 0.092214 116 | 2014-05-09 -0.259276 -0.775489 0.572525 117 | 2014-05-10 -0.910188 0.294136 0.020730 118 | 119 | >>> cday = pd.offsets.CDay(calendar=calendar) 120 | >>> indexer = jpd.date_range(u'2014年5月1日', u'2014年5月10日', freq=cday) 121 | 122 | # カレンダー上 営業日のレコードを抽出 123 | >>> df.ix[indexer] 124 | 0 1 2 125 | 2014-05-01 0.762453 -1.418762 -0.150073 126 | 2014-05-02 0.966500 -0.473888 0.272871 127 | 2014-05-07 1.173935 2.080087 -2.279285 128 | 2014-05-08 -0.957195 0.746798 0.092214 129 | 2014-05-09 -0.259276 -0.775489 0.572525 130 | 131 | # カレンダー上 休日のレコードを抽出 132 | >>> df[~df.index.isin(indexer)] 133 | 0 1 2 134 | 2014-05-03 0.473370 -1.282504 0.380449 135 | 2014-05-04 0.215411 0.220587 -1.088699 136 | 2014-05-05 0.286348 -1.069165 -1.471871 137 | 2014-05-06 -0.665438 -0.402046 -1.008051 138 | 2014-05-10 -0.910188 0.294136 0.020730 139 | -------------------------------------------------------------------------------- /doc/source/jpstrings.rst: -------------------------------------------------------------------------------- 1 | 2 | 文字列処理 3 | ========== 4 | 5 | Unicode 正規化 6 | -------------- 7 | 8 | **補足** この機能は ``pandas`` 0.16.1 標準にポーティングされました。API は `unicodedata.normalize `_ と同一です。引数 ``form`` を省略することはできなくなるため注意してください。 9 | 10 | ``Series.str.normalize`` は、標準の ``unicodedata.normalize`` と同じ処理を ``Series`` の値に対して行います。 11 | 12 | .. code-block:: python 13 | 14 | >>> import pandas as pd 15 | >>> s = pd.Series([u'アイウエオ', u'カキクケコ', u'ガギグゲゴ', u'ABCDE']) 16 | >>> s 17 | 0 アイウエオ 18 | 1 カキクケコ 19 | 2 ガギグゲゴ 20 | 3 ABCDE 21 | dtype: object 22 | 23 | >>> s.str.normalize('NFKC') 24 | 0 アイウエオ 25 | 1 カキクケコ 26 | 2 ガギグゲゴ 27 | 3 ABCDE 28 | dtype: object 29 | 30 | 31 | 引数として、``unicodedata.normalize`` と同じフォーマットを渡すことができます。 32 | 33 | - ``NFC``: 正規形 C。 34 | - ``NFKC``: 正規形 KC。 35 | - ``NFD``: 正規形 D。 36 | - ``NFKD``: 正規形 KD。 37 | 38 | .. code-block:: python 39 | 40 | >>> s.str.normalize('NFD') 41 | 0 アイウエオ 42 | 1 カキクケコ 43 | 2 ガギグゲゴ 44 | 3 ABCDE 45 | dtype: object 46 | 47 | 48 | 全角/半角変換 49 | ------------- 50 | 51 | ``Series.str.z2h`` で値を 全角文字から半角文字へ変換、 ``Series.str.h2z`` で値を 半角文字から全角文字へ変換できます。 52 | 53 | .. code-block:: python 54 | 55 | >>> s = pd.Series([u'アイウエオ', u'ABC01', u'DE345']) 56 | >>> z = s.str.h2z() 57 | >>> z 58 | 0 アイウエオ 59 | 1 ABC01 60 | 2 DE345 61 | dtype: object 62 | 63 | >>> z.str.z2h() 64 | 0 アイウエオ 65 | 1 ABC01 66 | 2 DE345 67 | dtype: object 68 | 69 | 変換の対象とする文字のグループはキーワードオプションで変更できます。それぞれのキーワードについて対象となる文字列は以下の通りです。デフォルトでは全て ``True`` で、全ての文字が変換されます。変換したくないグループがある場合は 対応するキーワードに ``False`` を指定してください。 70 | 71 | **補足** ``kana`` には日本語の記号 (句読点) も含まれることに注意してください。 72 | 73 | - ``kana``: ``ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノ 74 | ハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロワヲンヴー・「」。、`` 75 | - ``alpha``: ``ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz`` 76 | - ``digit``: ``0123456789`` 77 | - ``symbol``: ``!"#$%&'()*+,"-./:;<=>?@[\]^_`~{|}`` 78 | 79 | .. code-block:: python 80 | 81 | >>> s = pd.Series([u'アイウエオ', u'ABC01', u'DE345']) 82 | 83 | # アルファベットは全角にしない 84 | >>> s.str.h2z(alpha=False) 85 | 0 アイウエオ 86 | 1 ABC01 87 | 2 DE345 88 | dtype: object 89 | 90 | # カナ、アルファベットは全角にしない 91 | >>> s.str.h2z(kana=False, alpha=False, digit=True) 92 | 0 アイウエオ 93 | 1 ABC01 94 | 2 DE345 95 | dtype: object 96 | 97 | # カナ、アルファベット、数値は全角にしない = 記号以外は半角のまま 98 | >>> s.str.h2z(kana=False, alpha=False, digit=False) 99 | 0 アイウエオ 100 | 1 ABC01 101 | 2 DE345 102 | dtype: object 103 | 104 | -------------------------------------------------------------------------------- /japandas/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import japandas.core.strings # noqa 5 | import japandas.io.data # noqa 6 | from japandas.io.data import DataReader # noqa 7 | from japandas.tseries.tools import to_datetime, date_range, period_range # noqa 8 | from japandas.tseries.holiday import JapaneseHolidayCalendar, TSEHolidayCalendar # noqa 9 | import japandas.tools.plotting # noqa 10 | 11 | from japandas.version import version as __version__ # noqa 12 | -------------------------------------------------------------------------------- /japandas/compat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import pandas as pd 5 | from distutils.version import LooseVersion 6 | 7 | 8 | PANDAS_VERSION = LooseVersion(pd.__version__) 9 | -------------------------------------------------------------------------------- /japandas/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/core/__init__.py -------------------------------------------------------------------------------- /japandas/core/strings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | from unicodedata import normalize 7 | 8 | from pandas.compat import PY3, iteritems, u_safe 9 | import pandas.core.strings as strings 10 | 11 | 12 | # soundmarks require special handlings 13 | _HKANA = 'ァアィイゥウェエォオカキクケコサシスセソタチッツテトナニヌネノハヒフヘホマミムメモャヤュユョヨラリルレロワヲン゙ー・「」。、' 14 | _ZALPHA = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 15 | 'abcdefghijklmnopqrstuvwxyz') 16 | _ZSYMBOL = '!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ ' 17 | _ZDIGIT = '0123456789' 18 | 19 | # mapping from full-width to half-width 20 | _KANA_MAPPER = {normalize('NFKC', c): c for c in _HKANA} 21 | _ALPHA_MAPPER = {c: normalize('NFKC', c) for c in _ZALPHA} 22 | _DIGIT_MAPPER = {c: normalize('NFKC', c) for c in _ZDIGIT} 23 | _SYMBOL_MAPPER = {c: normalize('NFKC', c) for c in _ZSYMBOL} 24 | 25 | 26 | def _reverse_dict(dict): 27 | return {v: k for k, v in iteritems(dict)} 28 | 29 | 30 | def _ord_dict(dict): 31 | return {ord(k): v for k, v in iteritems(dict)} 32 | 33 | 34 | # for unicode.translate 35 | _Z2H_KANA = _ord_dict(_KANA_MAPPER) 36 | _Z2H_ALPHA = _ord_dict(_ALPHA_MAPPER) 37 | _Z2H_DIGIT = _ord_dict(_DIGIT_MAPPER) 38 | _Z2H_SYMBOL = _ord_dict(_SYMBOL_MAPPER) 39 | _H2Z_KANA = _ord_dict(_reverse_dict(_KANA_MAPPER)) 40 | _H2Z_ALPHA = _ord_dict(_reverse_dict(_ALPHA_MAPPER)) 41 | _H2Z_DIGIT = _ord_dict(_reverse_dict(_DIGIT_MAPPER)) 42 | _H2Z_SYMBOL = _ord_dict(_reverse_dict(_SYMBOL_MAPPER)) 43 | 44 | 45 | def _h2z_sm(text): 46 | return (text.replace("ガ", "ガ").replace("ギ", "ギ").replace("グ", "グ").replace("ゲ", "ゲ"). 47 | replace("ゴ", "ゴ").replace("ザ", "ザ").replace("ジ", "ジ").replace("ズ", "ズ"). 48 | replace("ゼ", "ゼ").replace("ゾ", "ゾ").replace("ダ", "ダ").replace("ヂ", "ヂ"). 49 | replace("ヅ", "ヅ").replace("デ", "デ").replace("ド", "ド").replace("バ", "バ"). 50 | replace("ビ", "ビ").replace("ブ", "ブ").replace("ベ", "ベ").replace("ボ", "ボ"). 51 | replace("パ", "パ").replace("ピ", "ピ").replace("プ", "プ").replace("ペ", "ペ"). 52 | replace("ポ", "ポ").replace("ヴ", "ヴ")) 53 | 54 | 55 | def _z2h_sm(text): 56 | return (text.replace("ガ", "ガ").replace("ギ", "ギ").replace("グ", "グ").replace("ゲ", "ゲ"). 57 | replace("ゴ", "ゴ").replace("ザ", "ザ").replace("ジ", "ジ").replace("ズ", "ズ"). 58 | replace("ゼ", "ゼ").replace("ゾ", "ゾ").replace("ダ", "ダ").replace("ヂ", "ヂ"). 59 | replace("ヅ", "ヅ").replace("デ", "デ").replace("ド", "ド").replace("バ", "バ"). 60 | replace("ビ", "ビ").replace("ブ", "ブ").replace("ベ", "ベ").replace("ボ", "ボ"). 61 | replace("パ", "パ").replace("ピ", "ピ").replace("プ", "プ").replace("ペ", "ペ"). 62 | replace("ポ", "ポ").replace("ヴ", "ヴ")) 63 | 64 | 65 | def str_z2h(self, kana=True, alpha=True, digit=True, symbol=True): 66 | mapper = dict() 67 | if kana: 68 | mapper.update(_Z2H_KANA) 69 | if alpha: 70 | mapper.update(_Z2H_ALPHA) 71 | if digit: 72 | mapper.update(_Z2H_DIGIT) 73 | if symbol: 74 | mapper.update(_Z2H_SYMBOL) 75 | 76 | if kana: 77 | if PY3: 78 | def f(x): 79 | return _z2h_sm(x).translate(mapper) 80 | else: 81 | def f(x): 82 | return _z2h_sm(u_safe(x)).translate(mapper) 83 | else: 84 | if PY3: 85 | def f(x): 86 | return x.translate(mapper) 87 | else: 88 | def f(x): 89 | return u_safe(x).translate(mapper) 90 | 91 | try: 92 | target = self.series 93 | except AttributeError: 94 | target = self._data 95 | return self._wrap_result(strings._na_map(f, target)) 96 | 97 | 98 | def str_h2z(self, kana=True, alpha=True, digit=True, symbol=True): 99 | mapper = dict() 100 | if kana: 101 | mapper.update(_H2Z_KANA) 102 | if alpha: 103 | mapper.update(_H2Z_ALPHA) 104 | if digit: 105 | mapper.update(_H2Z_DIGIT) 106 | if symbol: 107 | mapper.update(_H2Z_SYMBOL) 108 | 109 | if kana: 110 | if PY3: 111 | def f(x): 112 | return _h2z_sm(x).translate(mapper) 113 | else: 114 | def f(x): 115 | return _h2z_sm(u_safe(x)).translate(mapper) 116 | else: 117 | if PY3: 118 | def f(x): 119 | return x.translate(mapper) 120 | else: 121 | def f(x): 122 | return u_safe(x).translate(mapper) 123 | 124 | try: 125 | target = self.series 126 | except AttributeError: 127 | target = self._data 128 | return self._wrap_result(strings._na_map(f, target)) 129 | 130 | 131 | # do not overwrite existing func 132 | if not hasattr(strings.StringMethods, 'z2h'): 133 | strings.StringMethods.z2h = str_z2h 134 | 135 | 136 | if not hasattr(strings.StringMethods, 'h2z'): 137 | strings.StringMethods.h2z = str_h2z 138 | -------------------------------------------------------------------------------- /japandas/core/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/core/tests/__init__.py -------------------------------------------------------------------------------- /japandas/core/tests/test_strings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # do not import unicode_literals here to test ASCII in Python 2.7 5 | import unittest 6 | 7 | import pandas as pd 8 | import pandas.util.testing as tm 9 | 10 | 11 | class TestStrings(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.zhiragana_s = pd.Series([u'ぁあぃいぅうぇえぉお', 15 | u'かがきぎくぐけげこご', 16 | u'さざしじすずせぜそぞ', 17 | u'ただちぢっつづてでとど', 18 | u'なにぬねの', 19 | u'はばぱひびぴふぶぷへべぺほぼぽ', 20 | u'まみむめもゃやゅゆょよ', 21 | u'らりるれろわをんゎゐゑゕゖゔ']) 22 | 23 | self.zkatakana_s = pd.Series([u'ァアィイゥウェエォオ', 24 | u'カガキギクグケゲコゴ', 25 | u'サザシジスズセゼソゾ', 26 | u'タダチヂッツヅテデトド', 27 | u'ナニヌネノ', 28 | u'ハバパヒビピフブプヘベペホボポ', 29 | u'マミムメモャヤュユョヨ', 30 | u'ラリルレロワヲンヮヰヱヵヶヴ', 31 | u'ー・「」。、']) 32 | 33 | self.hkatakana_s = pd.Series([u'ァアィイゥウェエォオ', 34 | u'カガキギクグケゲコゴ', 35 | u'サザシジスズセゼソゾ', 36 | u'タダチヂッツヅテデトド', 37 | u'ナニヌネノ', 38 | u'ハバパヒビピフブプヘベペホボポ', 39 | u'マミムメモャヤュユョヨ', 40 | u'ラリルレロワヲンヮヰヱヵヶヴ', 41 | u'ー・「」。、']) 42 | 43 | self.zalpha_s = pd.Series([u'ABCDEFGH', 44 | u'IJKLMNOP', 45 | u'QRSTUVWXYZ', 46 | u'abcdefgh', 47 | u'ijklmnop', 48 | u'qrstuvwxyz']) 49 | self.halpha_s = pd.Series(['ABCDEFGH', 50 | 'IJKLMNOP', 51 | 'QRSTUVWXYZ', 52 | 'abcdefgh', 53 | 'ijklmnop', 54 | 'qrstuvwxyz']) 55 | 56 | self.zdigit_s = pd.Series([u'01234', u'56789']) 57 | self.hdigit_s = pd.Series(['01234', '56789']) 58 | 59 | self.zsymbol_s = pd.Series([u'!"#$%&', 60 | u''()*+,', 61 | u'-./:;<', 62 | u'=>?@[\', 63 | u']^_`~{', 64 | u'|} ']) 65 | self.hsymbol_s = pd.Series([u'!"#$%&', 66 | u"'()*+,", 67 | u'-./:;<', 68 | u'=>?@[\\', 69 | u']^_`~{', 70 | u'|} ']) 71 | 72 | def test_mapper(self): 73 | import japandas.core.strings as s 74 | self.assertEqual(len(s._KANA_MAPPER), len(s._HKANA)) 75 | self.assertEqual(len(s._ALPHA_MAPPER), len(s._ZALPHA)) 76 | self.assertEqual(len(s._DIGIT_MAPPER), len(s._ZDIGIT)) 77 | self.assertEqual(len(s._SYMBOL_MAPPER), len(s._ZSYMBOL)) 78 | 79 | self.assertEqual(len(s._reverse_dict(s._KANA_MAPPER)), len(s._HKANA)) 80 | self.assertEqual(len(s._reverse_dict(s._ALPHA_MAPPER)), len(s._ZALPHA)) 81 | self.assertEqual(len(s._reverse_dict(s._DIGIT_MAPPER)), len(s._ZDIGIT)) 82 | self.assertEqual(len(s._Z2H_SYMBOL), len(s._H2Z_SYMBOL)) 83 | 84 | self.assertEqual(len(s._Z2H_KANA), len(s._H2Z_KANA)) 85 | self.assertEqual(len(s._Z2H_ALPHA), len(s._H2Z_ALPHA)) 86 | self.assertEqual(len(s._Z2H_DIGIT), len(s._H2Z_DIGIT)) 87 | self.assertEqual(len(s._reverse_dict(s._SYMBOL_MAPPER)), len(s._ZSYMBOL)) 88 | 89 | def test_z2h(self): 90 | s = pd.Series([u'aaa', 'bbb', u'アアア', u'1', u'*']) 91 | result = s.str.z2h() 92 | expected = pd.Series(['aaa', 'bbb', u'アアア', '1', '*']) 93 | tm.assert_series_equal(result, expected) 94 | 95 | # full-width kana to half-width kana 96 | result = self.zkatakana_s.str.z2h(kana=True, alpha=False, digit=False, symbol=False) 97 | tm.assert_series_equal(result, self.hkatakana_s) 98 | result = self.zkatakana_s.str.z2h(kana=False, alpha=True, digit=False, symbol=False) 99 | tm.assert_series_equal(result, self.zkatakana_s) 100 | result = self.zkatakana_s.str.z2h(kana=False, alpha=False, digit=True, symbol=False) 101 | tm.assert_series_equal(result, self.zkatakana_s) 102 | result = self.zkatakana_s.str.z2h(kana=False, alpha=False, digit=False, symbol=True) 103 | tm.assert_series_equal(result, self.zkatakana_s) 104 | 105 | # full-width kana to half-width alpha 106 | result = self.zalpha_s.str.z2h(kana=True, alpha=False, digit=False, symbol=False) 107 | tm.assert_series_equal(result, self.zalpha_s) 108 | result = self.zalpha_s.str.z2h(kana=False, alpha=True, digit=False, symbol=False) 109 | tm.assert_series_equal(result, self.halpha_s) 110 | result = self.zalpha_s.str.z2h(kana=False, alpha=False, digit=True, symbol=False) 111 | tm.assert_series_equal(result, self.zalpha_s) 112 | result = self.zalpha_s.str.z2h(kana=False, alpha=False, digit=False, symbol=True) 113 | tm.assert_series_equal(result, self.zalpha_s) 114 | 115 | # full-width kana to half-width digit 116 | result = self.zdigit_s.str.z2h(kana=True, alpha=False, digit=False, symbol=False) 117 | tm.assert_series_equal(result, self.zdigit_s) 118 | result = self.zdigit_s.str.z2h(kana=False, alpha=True, digit=False, symbol=False) 119 | tm.assert_series_equal(result, self.zdigit_s) 120 | result = self.zdigit_s.str.z2h(kana=False, alpha=False, digit=True, symbol=False) 121 | tm.assert_series_equal(result, self.hdigit_s) 122 | result = self.zdigit_s.str.z2h(kana=False, alpha=False, digit=False, symbol=True) 123 | tm.assert_series_equal(result, self.zdigit_s) 124 | 125 | # full-width kana to half-width symbol 126 | result = self.zsymbol_s.str.z2h(kana=True, alpha=False, digit=False, symbol=False) 127 | tm.assert_series_equal(result, self.zsymbol_s) 128 | result = self.zsymbol_s.str.z2h(kana=False, alpha=True, digit=False, symbol=False) 129 | tm.assert_series_equal(result, self.zsymbol_s) 130 | result = self.zsymbol_s.str.z2h(kana=False, alpha=False, digit=True, symbol=False) 131 | tm.assert_series_equal(result, self.zsymbol_s) 132 | result = self.zsymbol_s.str.z2h(kana=False, alpha=False, digit=False, symbol=True) 133 | tm.assert_series_equal(result, self.hsymbol_s) 134 | 135 | # half-width to half-width 136 | result = self.hkatakana_s.str.z2h() 137 | tm.assert_series_equal(result, self.hkatakana_s) 138 | result = self.halpha_s.str.z2h() 139 | tm.assert_series_equal(result, self.halpha_s) 140 | result = self.hdigit_s.str.z2h() 141 | tm.assert_series_equal(result, self.hdigit_s) 142 | result = self.hsymbol_s.str.z2h() 143 | tm.assert_series_equal(result, self.hsymbol_s) 144 | 145 | def test_h2z(self): 146 | s = pd.Series(['aaa', 'bbb', u'アアア', u'1', '*']) 147 | result = s.str.h2z() 148 | expected = pd.Series([u'aaa', u'bbb', u'アアア', u'1', u'*']) 149 | tm.assert_series_equal(result, expected) 150 | 151 | # half-width kana to full-width kana 152 | result = self.hkatakana_s.str.h2z(kana=True, alpha=False, digit=False, symbol=False) 153 | tm.assert_series_equal(result, self.zkatakana_s) 154 | result = self.hkatakana_s.str.h2z(kana=False, alpha=True, digit=False, symbol=False) 155 | tm.assert_series_equal(result, self.hkatakana_s) 156 | result = self.hkatakana_s.str.h2z(kana=False, alpha=False, digit=True, symbol=False) 157 | tm.assert_series_equal(result, self.hkatakana_s) 158 | result = self.hkatakana_s.str.h2z(kana=False, alpha=False, digit=False, symbol=True) 159 | tm.assert_series_equal(result, self.hkatakana_s) 160 | 161 | # half-width kana to full-width alpha 162 | result = self.halpha_s.str.h2z(kana=True, alpha=False, digit=False, symbol=False) 163 | tm.assert_series_equal(result, self.halpha_s) 164 | result = self.halpha_s.str.h2z(kana=False, alpha=True, digit=False, symbol=False) 165 | tm.assert_series_equal(result, self.zalpha_s) 166 | result = self.halpha_s.str.h2z(kana=False, alpha=False, digit=True, symbol=False) 167 | tm.assert_series_equal(result, self.halpha_s) 168 | result = self.halpha_s.str.h2z(kana=False, alpha=False, digit=False, symbol=True) 169 | tm.assert_series_equal(result, self.halpha_s) 170 | 171 | # half-width kana to full-width digit 172 | result = self.hdigit_s.str.h2z(kana=True, alpha=False, digit=False, symbol=False) 173 | tm.assert_series_equal(result, self.hdigit_s) 174 | result = self.hdigit_s.str.h2z(kana=False, alpha=True, digit=False, symbol=False) 175 | tm.assert_series_equal(result, self.hdigit_s) 176 | result = self.hdigit_s.str.h2z(kana=False, alpha=False, digit=True, symbol=False) 177 | tm.assert_series_equal(result, self.zdigit_s) 178 | result = self.hdigit_s.str.h2z(kana=False, alpha=False, digit=False, symbol=True) 179 | tm.assert_series_equal(result, self.hdigit_s) 180 | 181 | # half-width kana to full-width symbol 182 | result = self.hsymbol_s.str.h2z(kana=True, alpha=False, digit=False, symbol=False) 183 | tm.assert_series_equal(result, self.hsymbol_s) 184 | result = self.hsymbol_s.str.h2z(kana=False, alpha=True, digit=False, symbol=False) 185 | tm.assert_series_equal(result, self.hsymbol_s) 186 | result = self.hsymbol_s.str.h2z(kana=False, alpha=False, digit=True, symbol=False) 187 | tm.assert_series_equal(result, self.hsymbol_s) 188 | result = self.hsymbol_s.str.h2z(kana=False, alpha=False, digit=False, symbol=True) 189 | tm.assert_series_equal(result, self.zsymbol_s) 190 | 191 | # full-width to full-width 192 | result = self.zkatakana_s.str.h2z() 193 | tm.assert_series_equal(result, self.zkatakana_s) 194 | result = self.zalpha_s.str.h2z() 195 | tm.assert_series_equal(result, self.zalpha_s) 196 | result = self.zdigit_s.str.h2z() 197 | tm.assert_series_equal(result, self.zdigit_s) 198 | result = self.zkatakana_s.str.h2z() 199 | tm.assert_series_equal(result, self.zkatakana_s) 200 | 201 | def test_z2h_obj(self): 202 | s = pd.Series(['aaa', None, u'アアア', u'あああ', u'1', 3]) 203 | result = s.str.z2h() 204 | expected = pd.Series(['aaa', None, u'アアア', u'あああ', '1', None]) 205 | tm.assert_series_equal(result, expected) 206 | 207 | empty_str = pd.Series(dtype=str) 208 | tm.assert_series_equal(empty_str.str.h2z(), empty_str) 209 | 210 | def test_h2z_obj(self): 211 | s = pd.Series(['aaa', None, u'アアア', u'あああ', u'1', 3]) 212 | result = s.str.h2z() 213 | expected = pd.Series([u'aaa', None, u'アアア', u'あああ', u'1', None]) 214 | tm.assert_series_equal(result, expected) 215 | 216 | empty_str = pd.Series(dtype=str) 217 | tm.assert_series_equal(empty_str.str.h2z(), empty_str) 218 | 219 | def test_normalize(self): 220 | s = pd.Series([u'aaa', 'bbb', u'アアア', u'1', u'*']) 221 | result = s.str.normalize('NFKC') 222 | expected = pd.Series(['aaa', 'bbb', u'アアア', '1', '*']) 223 | tm.assert_series_equal(result, expected) 224 | 225 | s = pd.Series([u'aaa', None, 'bbb', u'アアア', u'1', 5, u'*']) 226 | result = s.str.normalize('NFKC') 227 | expected = pd.Series(['aaa', None, 'bbb', u'アアア', '1', None, '*']) 228 | tm.assert_series_equal(result, expected) 229 | 230 | empty_str = pd.Series(dtype=str) 231 | tm.assert_series_equal(empty_str.str.normalize('NFKC'), empty_str) 232 | 233 | def test_normalize_format(self): 234 | import unicodedata 235 | values = [u'アイウエオ', u'カキクケコ', u'ガギグゲゴ', u'ABCDE'] 236 | for format in ['NFD', 'NFC', 'NFKD', 'NFKC']: 237 | result = pd.Series(values).str.normalize(format).tolist() 238 | expected = [unicodedata.normalize(format, v) for v in values] 239 | self.assertEqual(result, expected) 240 | 241 | 242 | if __name__ == '__main__': 243 | import nose 244 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], 245 | exit=False) 246 | -------------------------------------------------------------------------------- /japandas/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/io/__init__.py -------------------------------------------------------------------------------- /japandas/io/data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | from japandas.io.estat import EStatReader 7 | 8 | from pandas_datareader import data 9 | 10 | 11 | _ohlc_columns_jp = ['始値', '高値', '安値', '終値', '出来高', '調整後終値*'] 12 | _ohlc_columns_en = ['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'] 13 | 14 | 15 | def DataReader(symbols, data_source=None, start=None, end=None, appid=None, **kwargs): 16 | if data_source == 'yahoojp': 17 | msg = "YahooJPReaderは削除されました https://www.yahoo-help.jp/app/answers/detail/p/546/a_id/93575" 18 | raise NotImplementedError(msg) 19 | elif data_source == 'estat': 20 | return EStatReader(symbols=symbols, appid=appid, **kwargs).read() 21 | else: 22 | return data.DataReader(name=symbols, data_source=data_source, 23 | start=start, end=end, **kwargs) 24 | 25 | 26 | DataReader.__doc__ = data.DataReader.__doc__ 27 | -------------------------------------------------------------------------------- /japandas/io/estat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | import xml.etree.ElementTree as ET 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from pandas_datareader.base import _BaseReader 12 | from japandas.tseries.tools import to_datetime 13 | 14 | # http://www.e-stat.go.jp/api/e-stat-manual/ 15 | 16 | METADATA_MAPPER = { 17 | # 'TABLE_INF': '統計表ID', 18 | 'STAT_NAME': '政府統計名', 19 | 'GOV_ORG': '作成機関名', 20 | 'STATISTICS_NAME': '提供統計名及び提供分類名', 21 | 'TITLE': '統計表題名及び表番号', 22 | 'CYCLE': '提供周期', 23 | 'SURVEY_DATE': '調査年月', 24 | 'OPEN_DATE': '公開日', 25 | 'SMALL_AREA': '小地域属性フラグ', 26 | 'MAIN_CATEGORY': '統計大分野名', 27 | 'SUB_CATEGORY': '統計小分野名', 28 | 'OVERALL_TOTAL_NUMBER': '総件数', 29 | 'UPDATED_DATE': '最終更新日', 30 | 'id': '統計表ID' 31 | } 32 | 33 | 34 | class EStatReader(_BaseReader): 35 | 36 | def __init__(self, symbols=None, appid=None, 37 | limit=None, startPosition=None, **kwargs): 38 | if isinstance(symbols, pd.DataFrame): 39 | if '統計表ID' in symbols.columns: 40 | symbols = symbols.loc[:, '統計表ID'] 41 | else: 42 | raise ValueError('DataFrame 中に "統計表ID" カラムがありません') 43 | 44 | super(EStatReader, self).__init__(symbols=symbols, **kwargs) 45 | 46 | if appid is None: 47 | raise ValueError('アプリケーションID "appid" を文字列で指定してください') 48 | self.appid = appid 49 | 50 | # e-Stat attrs 51 | self.limit = limit 52 | self.startPosition = startPosition 53 | 54 | @property 55 | def url(self): 56 | return 'http://api.e-stat.go.jp/rest/2.0/app/getStatsData' 57 | 58 | @property 59 | def params(self): 60 | params = {'appId': self.appid, 'lang': 'J'} 61 | 62 | for attr in ['limit', 'startPosition']: 63 | value = getattr(self, attr, None) 64 | if value is not None: 65 | params[attr] = value 66 | return params 67 | 68 | def read(self): 69 | """ read data """ 70 | if isinstance(self.symbols, pd.compat.string_types): 71 | if len(self.symbols) == 8: 72 | return self.get_estat_list() 73 | 74 | params = self.params 75 | params['statsDataId'] = self.symbols 76 | return self._read_one_data(self.url, params) 77 | 78 | elif pd.api.types.is_list_like(self.symbols): 79 | dfs = [] 80 | for symbol in self.symbols: 81 | params = self.params 82 | params['statsDataId'] = symbol 83 | df = self._read_one_data(self.url, params) 84 | dfs.append(df) 85 | 86 | if len(dfs) == 0: 87 | raise ValueError('取得するIDがありません') 88 | elif len(dfs) == 1: 89 | return dfs[0] 90 | else: 91 | return dfs[0].append(dfs[1:]) 92 | else: 93 | raise ValueError('IDは文字列もしくはそのリストで指定してください') 94 | 95 | def _read_lines(self, out): 96 | root = ET.fromstring(out.getvalue()) 97 | # retrieve class 98 | class_names = {} # mapping from class id to name 99 | class_codes = {} # mapping from class id to codes 100 | for c in root.findall('.//CLASS_OBJ'): 101 | class_id = c.attrib['id'] 102 | class_names[class_id] = c.attrib['name'] 103 | 104 | mapper = {} 105 | for code in c.findall('CLASS'): 106 | mapper[code.attrib['code']] = code.attrib['name'] 107 | class_codes[class_id] = mapper 108 | 109 | # retrieve values 110 | values = [] 111 | for value in root.findall('.//VALUE'): 112 | row = {} 113 | for cat in class_codes: 114 | name = class_names[cat] 115 | code = value.attrib[cat] 116 | row[name] = class_codes[cat][code] 117 | 118 | if value.text in ('-', ): 119 | # avoid to_numeric fails 120 | row['value'] = np.nan 121 | else: 122 | row['value'] = value.text 123 | values.append(row) 124 | 125 | df = pd.DataFrame(values) 126 | df.loc[:, 'value'] = pd.to_numeric(df['value'], errors='ignore') 127 | 128 | if 'time' in class_names: 129 | df = df.set_index(class_names['time']) 130 | df.index = to_datetime(df.index) 131 | return df 132 | 133 | def get_estat_list(self): 134 | url = 'http://api.e-stat.go.jp/rest/2.0/app/getStatsList' 135 | params = {'appId': self.appid, 'lang': 'J', 'statsCode': self.symbols} 136 | 137 | out = self._read_url_as_StringIO(url, params=params) 138 | root = ET.fromstring(out.getvalue()) 139 | 140 | values = [] 141 | columns = [] 142 | for table in root.findall('.//TABLE_INF'): 143 | columns = ['統計表ID'] 144 | row = {'統計表ID': table.get('id')} 145 | for elem in table.iter(): 146 | if elem.tag == 'TABLE_INF': 147 | continue 148 | 149 | if elem.tag in ('UPDATED_DATE', 'OPEN_DATE'): 150 | val = pd.to_datetime(elem.text) 151 | elif elem.tag == 'SURVEY_DATE': 152 | # Almost impossible to parse SURVEY_DATE as Timestamp... 153 | val = elem.text 154 | elif elem.tag == 'OVERALL_TOTAL_NUMVER': 155 | val = pd.to_numeric(elem.text) 156 | else: 157 | val = elem.text 158 | label = METADATA_MAPPER.get(elem.tag, elem.tag) 159 | columns.append(label) 160 | row[label] = val 161 | values.append(row) 162 | 163 | if len(values) == 0: 164 | 165 | try: 166 | # if msg can be extracted from XML, raise it 167 | root = ET.fromstring(out.getvalue()) 168 | msg = root.find('RESULT').find('ERROR_MSG').text 169 | except Exception: 170 | # otherwie, raise all XML content 171 | raise ValueError(out.getvalue()) 172 | raise ValueError(msg.encode('utf-8', 'replace')) 173 | 174 | df = pd.DataFrame(values, columns=columns) 175 | return df 176 | -------------------------------------------------------------------------------- /japandas/io/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/io/tests/__init__.py -------------------------------------------------------------------------------- /japandas/io/tests/test_estat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | import unittest 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import pandas.util.testing as tm 12 | import japandas as jpd 13 | 14 | 15 | class TestEstat(unittest.TestCase): 16 | 17 | def test_data_estat_error(self): 18 | with self.assertRaises(ValueError): 19 | # no app ID 20 | jpd.DataReader('00200521', 'estat', appid=None) 21 | 22 | ESTAT_KEY = os.environ['ESTAT'] 23 | 24 | with self.assertRaises(ValueError): 25 | # blank list 26 | jpd.DataReader([], 'estat', appid=ESTAT_KEY) 27 | 28 | with self.assertRaises(ValueError): 29 | # invalid type 30 | jpd.DataReader(1, 'estat', appid=ESTAT_KEY) 31 | 32 | def test_data_estat_list(self): 33 | 34 | ESTAT_KEY = os.environ['ESTAT'] 35 | df = jpd.DataReader('00200521', 'estat', appid=ESTAT_KEY) 36 | 37 | exp_columns = pd.Index(['統計表ID', '政府統計名', 38 | '作成機関名', '提供統計名及び提供分類名', 39 | '統計表題名及び表番号', '提供周期', '調査年月', 40 | '公開日', '小地域属性フラグ', '統計大分野名', 41 | '統計小分野名', '総件数', '最終更新日'],) 42 | tm.assert_index_equal(df.columns, exp_columns) 43 | 44 | target = df.head(n=3) 45 | df = jpd.DataReader(target, 'estat', appid=ESTAT_KEY) 46 | self.assertIsInstance(df, pd.DataFrame) 47 | 48 | df = jpd.DataReader('00200523', 'estat', appid=ESTAT_KEY) 49 | tm.assert_index_equal(df.columns, exp_columns) 50 | 51 | def test_data_estat_list_all(self): 52 | # 以下 すべての提供データをテスト 53 | # http://www.e-stat.go.jp/api/api-data/ 54 | 55 | targets = ['00200521', '00200522', '00200523', '00200524', '00200531', 56 | '00200532', '00200533', '00200541', '00200543', '00200544', 57 | '00200545', '00200551', '00200552', '00200553', '00200561', 58 | '00200563', '00200564', '00200565', '00200566', '00200571', 59 | '00200572', '00200573', # '00200511', '00200502', (no data found) 60 | '00250011'] 61 | for target in targets: 62 | self._assert_target(target) 63 | 64 | def test_data_estat_list_all2(self): 65 | # Travis CI でのタイムアウトを防ぐため分割 66 | targets = ['00350600', '00350620', '00351000', '00400001', '00400002', 67 | '00400003', '00400004', '00400202', '00450011', '00450012', 68 | '00450021', '00450022', '00450061', '00450071', '00450091', 69 | '00450151', '00500201', '00500209', '00500215', '00500216', 70 | '00500217', '00500225', '00550010', '00550020', '00550030', 71 | '00550040', '00550100', '00550200', '00550210', '00551020', 72 | '00551130', '00600330', '00600470', '00600480'] 73 | for target in targets: 74 | self._assert_target(target) 75 | 76 | def _assert_target(self, target): 77 | ESTAT_KEY = os.environ['ESTAT'] 78 | 79 | df = jpd.DataReader(target, 'estat', appid=ESTAT_KEY) 80 | exp_columns = pd.Index(['統計表ID', '政府統計名', 81 | '作成機関名', '提供統計名及び提供分類名', 82 | '統計表題名及び表番号', '提供周期', '調査年月', 83 | '公開日', '小地域属性フラグ', '統計大分野名', 84 | '統計小分野名', '総件数', '最終更新日'],) 85 | tm.assert_index_equal(df.columns, exp_columns) 86 | 87 | target = df.head(n=3) 88 | df = jpd.DataReader(target, 'estat', appid=ESTAT_KEY) 89 | self.assertIsInstance(df, pd.DataFrame) 90 | 91 | def test_data_estat_data(self): 92 | 93 | ESTAT_KEY = os.environ['ESTAT'] 94 | df = jpd.DataReader('0000030001', 'estat', appid=ESTAT_KEY) 95 | 96 | exp = pd.DataFrame({'value': [117060396, 89187409, 27872987, 5575989, 1523907], 97 | '全国都道府県030001': ['全国', '全国市部', '全国郡部', '北海道', '青森県'], 98 | '全域・集中の別030002': ['全域'] * 5, 99 | '年齢5歳階級A030002': ['総数'] * 5, 100 | '男女A030001': ['男女総数'] * 5}, 101 | index=pd.DatetimeIndex(['1980-01-01'] * 5, name='時間軸(年次)')) 102 | tm.assert_frame_equal(df.head(), exp) 103 | 104 | df = jpd.DataReader(['0000030001', '0000030002'], 'estat', appid=ESTAT_KEY) 105 | self.assertIsInstance(df, pd.DataFrame) 106 | 107 | df = jpd.DataReader("0002180001", 'estat', appid=ESTAT_KEY) 108 | exp = pd.DataFrame({'value': [445007, 194243, 199623, 203464, 190711], 109 | '全国・都道府県・大都市': ['全国'] * 5, 110 | '性別': ['総数'] * 5, 111 | '表章項目': ['都道府県(自都市)内移動者数'] * 5}, 112 | index=pd.DatetimeIndex(['2009-03-01', '2009-02-01', '2009-01-01', 113 | '2008-12-01', '2008-11-01'], name='時間軸(月次)')) 114 | tm.assert_frame_equal(df.head(), exp) 115 | 116 | def test_data_estat_data_numeric(self): 117 | ESTAT_KEY = os.environ['ESTAT'] 118 | df = jpd.DataReader('0003109612', 'estat', appid=ESTAT_KEY) 119 | self.assertEqual(df['value'].dtype, np.float64) 120 | 121 | def test_data_limit(self): 122 | ESTAT_KEY = os.environ['ESTAT'] 123 | df = jpd.DataReader('0003280394', 'estat', appid=ESTAT_KEY) 124 | assert len(df) == 100000 125 | self.assertEqual(df['value'].dtype, np.float64) 126 | 127 | df = jpd.DataReader('0003280394', 'estat', appid=ESTAT_KEY, 128 | limit=20) 129 | assert len(df) == 20 130 | self.assertEqual(df['value'].dtype, np.float64) 131 | 132 | def test_data_position(self): 133 | ESTAT_KEY = os.environ['ESTAT'] 134 | df = jpd.DataReader('0003280394', 'estat', appid=ESTAT_KEY, limit=100) 135 | assert len(df) == 100 136 | self.assertEqual(df['value'].dtype, np.float64) 137 | 138 | df2 = jpd.DataReader('0003280394', 'estat', appid=ESTAT_KEY, 139 | startPosition=11, limit=90) 140 | tm.assert_frame_equal(df.iloc[10:], df2) 141 | self.assertEqual(df2['value'].dtype, np.float64) 142 | 143 | 144 | if __name__ == '__main__': 145 | import nose 146 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], 147 | exit=False) 148 | -------------------------------------------------------------------------------- /japandas/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tools/__init__.py -------------------------------------------------------------------------------- /japandas/tools/plotting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | import pandas as pd 7 | try: 8 | import pandas.plotting._core as plotting 9 | except ImportError: 10 | import pandas.tools.plotting as plotting 11 | 12 | from japandas.io.data import _ohlc_columns_jp, _ohlc_columns_en 13 | 14 | 15 | class OhlcPlot(plotting.LinePlot): 16 | ohlc_cols = pd.Index(['open', 'high', 'low', 'close']) 17 | reader_cols_en = pd.Index(_ohlc_columns_en) 18 | reader_cols_jp = pd.Index(_ohlc_columns_jp) 19 | 20 | def __init__(self, data, **kwargs): 21 | data = data.copy() 22 | self.freq = kwargs.pop('freq', 'B') 23 | 24 | if isinstance(data, pd.Series): 25 | data = data.resample(self.freq).ohlc() 26 | assert isinstance(data, pd.DataFrame) 27 | assert isinstance(data.index, pd.DatetimeIndex) 28 | 29 | if data.columns.equals(self.ohlc_cols): 30 | data.columns = [c.title() for c in data.columns] 31 | elif data.columns.equals(self.reader_cols_jp): 32 | data.columns = self.reader_cols_en 33 | elif data.columns.equals(self.reader_cols_en): 34 | pass 35 | else: 36 | 37 | raise ValueError('data is not ohlc-like:') 38 | data = data[['Open', 'Close', 'High', 'Low']] 39 | plotting.LinePlot.__init__(self, data, **kwargs) 40 | 41 | def _get_plot_function(self): 42 | try: 43 | from mpl_finance import candlestick_ohlc 44 | except ImportError as e: 45 | try: 46 | from matplotlib.finance import candlestick_ohlc 47 | except ImportError: 48 | raise ImportError(e) 49 | 50 | def _plot(data, ax, **kwds): 51 | candles = candlestick_ohlc(ax, data.values, **kwds) 52 | return candles 53 | 54 | return _plot 55 | 56 | def _make_plot(self): 57 | try: 58 | from pandas.plotting._timeseries import (_decorate_axes, 59 | format_dateaxis) 60 | except ImportError: 61 | from pandas.tseries.plotting import _decorate_axes, format_dateaxis 62 | plotf = self._get_plot_function() 63 | ax = self._get_ax(0) 64 | 65 | data = self.data 66 | data.index.name = 'Date' 67 | data = data.to_period(freq=self.freq) 68 | index = data.index 69 | data = data.reset_index(level=0) 70 | 71 | if self._is_ts_plot(): 72 | data['Date'] = data['Date'].apply(lambda x: x.ordinal) 73 | _decorate_axes(ax, self.freq, self.kwds) 74 | candles = plotf(data, ax, **self.kwds) 75 | format_dateaxis(ax, self.freq, index) 76 | else: 77 | from matplotlib.dates import date2num, AutoDateFormatter, AutoDateLocator 78 | 79 | data['Date'] = data['Date'].apply(lambda x: date2num(x.to_timestamp())) 80 | candles = plotf(data, ax, **self.kwds) 81 | 82 | locator = AutoDateLocator() 83 | ax.xaxis.set_major_locator(locator) 84 | ax.xaxis.set_major_formatter(AutoDateFormatter(locator)) 85 | 86 | return candles 87 | 88 | 89 | if 'ohlc' not in plotting._plot_klass: 90 | plotting._all_kinds.append('ohlc') 91 | plotting._common_kinds.append('ohlc') 92 | plotting._plot_klass['ohlc'] = OhlcPlot 93 | -------------------------------------------------------------------------------- /japandas/tools/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tools/tests/__init__.py -------------------------------------------------------------------------------- /japandas/tools/tests/test_plotting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from pandas.tests.plotting.common import TestPlotBase, _check_plot_works 10 | 11 | 12 | class TestTools(TestPlotBase): 13 | 14 | def test_to_ohlc(self): 15 | n = 50 16 | idx = pd.date_range(start='2014-10-01 09:00', freq='H', periods=n) 17 | s = pd.Series(np.random.randn(n), index=idx) 18 | _check_plot_works(s.plot, kind='ohlc') 19 | _check_plot_works(s.plot, kind='ohlc', x_compat=True) 20 | 21 | ohlc = s.resample('B').ohlc() 22 | _check_plot_works(ohlc.plot, kind='ohlc') 23 | _check_plot_works(ohlc.plot, kind='ohlc', x_compat=True) 24 | 25 | 26 | if __name__ == '__main__': 27 | import nose 28 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], 29 | exit=False) 30 | -------------------------------------------------------------------------------- /japandas/tseries/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tseries/__init__.py -------------------------------------------------------------------------------- /japandas/tseries/data/holidays.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tseries/data/holidays.pkl -------------------------------------------------------------------------------- /japandas/tseries/data/tseholidays.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tseries/data/tseholidays.pkl -------------------------------------------------------------------------------- /japandas/tseries/holiday.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | import datetime 7 | import os 8 | 9 | import pandas.compat as compat 10 | import pandas.tseries.holiday as holiday 11 | 12 | current_dir = os.path.dirname(__file__) 13 | data_path = os.path.join(current_dir, 'data', 'holidays.pkl') 14 | tse_data_path = os.path.join(current_dir, 'data', 'tseholidays.pkl') 15 | 16 | 17 | def _read_rules(path): 18 | if os.path.exists(path): 19 | with open(path, mode='rb') as f: 20 | rules = compat.cPickle.load(f) 21 | elif __name__ != '__main__': 22 | raise ImportError("Unable to load '{0}'".format(path)) 23 | else: 24 | rules = None 25 | return rules 26 | 27 | 28 | rules = _read_rules(data_path) 29 | tse_rules = _read_rules(tse_data_path) 30 | 31 | 32 | class JapaneseHolidayCalendar(holiday.AbstractHolidayCalendar): 33 | rules = rules 34 | 35 | 36 | class TSEHolidayCalendar(holiday.AbstractHolidayCalendar): 37 | rules = tse_rules 38 | 39 | 40 | # register to pandas factory 41 | holiday.register(JapaneseHolidayCalendar) 42 | holiday.register(TSEHolidayCalendar) 43 | 44 | 45 | if __name__ == '__main__': 46 | 47 | # Procedure 48 | # cd japandas/tseries 49 | # Open https://github.com/holiday-jp/holiday_jp 50 | # Download holidays.yaml to data directory 51 | # python holiday.py 52 | 53 | import yaml 54 | 55 | def to_pickle(dates, path): 56 | rules = [] 57 | keys = sorted(compat.iterkeys(dates)) 58 | for dt in keys: 59 | name = dates[dt] 60 | h = holiday.Holiday( 61 | name, dt.year, month=dt.month, day=dt.day) 62 | rules.append(h) 63 | 64 | with open(path, mode='wb') as w: 65 | compat.cPickle.dump(rules, w, protocol=2) 66 | print('pickled {0} data'.format(len(dates))) 67 | 68 | with open(os.path.join('data', 'holidays.yml'), mode='rb') as f: 69 | data = yaml.load(f) 70 | # JapaneseHolidayCalendar 71 | to_pickle(data, data_path) 72 | 73 | tse_data = data.copy() 74 | for y in range(1970, 2031): 75 | for m, d in [(1, 1), (1, 2), (1, 3), (12, 31)]: 76 | dt = datetime.date(y, m, d) 77 | if dt not in tse_data: 78 | tse_data[dt] = {'name': '年末年始休業日', 'date': dt} 79 | 80 | # TSEHolidayCalendar 81 | to_pickle(tse_data, tse_data_path) 82 | -------------------------------------------------------------------------------- /japandas/tseries/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sinhrks/japandas/787bc6427cc641e3c7a590f1ab57a2b840f471bc/japandas/tseries/tests/__init__.py -------------------------------------------------------------------------------- /japandas/tseries/tests/test_holiday.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | import datetime 7 | import unittest 8 | 9 | import pandas as pd 10 | 11 | import japandas as jpd 12 | 13 | 14 | class TestCalendar(unittest.TestCase): 15 | 16 | def setUp(self): 17 | self.expected = [datetime.datetime(2014, 1, 1, 0, 0), 18 | datetime.datetime(2014, 1, 13, 0, 0), 19 | datetime.datetime(2014, 2, 11, 0, 0), 20 | datetime.datetime(2014, 3, 21, 0, 0), 21 | datetime.datetime(2014, 4, 29, 0, 0), 22 | datetime.datetime(2014, 5, 3, 0, 0), 23 | datetime.datetime(2014, 5, 4, 0, 0), 24 | datetime.datetime(2014, 5, 5, 0, 0), 25 | datetime.datetime(2014, 5, 6, 0, 0), 26 | datetime.datetime(2014, 7, 21, 0, 0), 27 | datetime.datetime(2014, 9, 15, 0, 0), 28 | datetime.datetime(2014, 9, 23, 0, 0), 29 | datetime.datetime(2014, 10, 13, 0, 0), 30 | datetime.datetime(2014, 11, 3, 0, 0), 31 | datetime.datetime(2014, 11, 23, 0, 0), 32 | datetime.datetime(2014, 11, 24, 0, 0), 33 | datetime.datetime(2014, 12, 23, 0, 0)] 34 | 35 | self.start_date = datetime.datetime(2014, 1, 1) 36 | self.end_date = datetime.datetime(2014, 12, 31) 37 | 38 | def test_calendar(self): 39 | 40 | calendar = jpd.JapaneseHolidayCalendar() 41 | holidays_0 = calendar.holidays(self.start_date, 42 | self.end_date) 43 | 44 | holidays_1 = calendar.holidays(self.start_date.strftime('%Y-%m-%d'), 45 | self.end_date.strftime('%Y-%m-%d')) 46 | holidays_2 = calendar.holidays(pd.Timestamp(self.start_date), 47 | pd.Timestamp(self.end_date)) 48 | 49 | self.assertEqual(holidays_0.to_pydatetime().tolist(), self.expected) 50 | self.assertEqual(holidays_1.to_pydatetime().tolist(), self.expected) 51 | self.assertEqual(holidays_2.to_pydatetime().tolist(), self.expected) 52 | 53 | def test_cday(self): 54 | calendar = jpd.JapaneseHolidayCalendar() 55 | cday = pd.offsets.CDay(calendar=calendar) 56 | 57 | dt = datetime.datetime(2014, 1, 12) 58 | self.assertEqual(dt - cday, datetime.datetime(2014, 1, 10)) 59 | self.assertEqual(dt + cday, datetime.datetime(2014, 1, 14)) 60 | 61 | dt = datetime.datetime(2014, 1, 10) 62 | self.assertEqual(dt - cday, datetime.datetime(2014, 1, 9)) 63 | self.assertEqual(dt + cday, datetime.datetime(2014, 1, 14)) 64 | 65 | dt = datetime.datetime(2014, 4, 28) 66 | self.assertEqual(dt - cday, datetime.datetime(2014, 4, 25)) 67 | self.assertEqual(dt + cday, datetime.datetime(2014, 4, 30)) 68 | 69 | dt = datetime.datetime(2014, 5, 3) 70 | self.assertEqual(dt - cday, datetime.datetime(2014, 5, 2)) 71 | self.assertEqual(dt + cday, datetime.datetime(2014, 5, 7)) 72 | 73 | dt = datetime.datetime(2014, 5, 6) 74 | self.assertEqual(dt - cday, datetime.datetime(2014, 5, 2)) 75 | self.assertEqual(dt + cday, datetime.datetime(2014, 5, 7)) 76 | 77 | def test_factory(self): 78 | calendar = pd.tseries.holiday.get_calendar('JapaneseHolidayCalendar') 79 | self.assertTrue(isinstance(calendar, jpd.JapaneseHolidayCalendar)) 80 | 81 | calendar = pd.tseries.holiday.get_calendar('TSEHolidayCalendar') 82 | self.assertTrue(isinstance(calendar, jpd.TSEHolidayCalendar)) 83 | 84 | def test_holiday_attributes(self): 85 | calendar = jpd.JapaneseHolidayCalendar() 86 | self.assertEqual(calendar.rules[0].name, '元日') 87 | self.assertEqual(calendar.rules[0].year, 1970) 88 | self.assertEqual(calendar.rules[0].month, 1) 89 | self.assertEqual(calendar.rules[0].day, 1) 90 | 91 | def test_jpholiday_holidays(self): 92 | calendar = jpd.JapaneseHolidayCalendar() 93 | holidays = calendar.holidays() 94 | for y in range(1970, 2030): 95 | for m, d in [(1, 1)]: 96 | dt = datetime.date(y, m, d) 97 | self.assertTrue(dt in holidays) 98 | 99 | for e in self.expected: 100 | self.assertTrue(dt in holidays) 101 | 102 | def test_tseholiday_holidays(self): 103 | calendar = jpd.TSEHolidayCalendar() 104 | holidays = calendar.holidays() 105 | for y in range(1970, 2031): 106 | for m, d in [(1, 1), (1, 2), (1, 3), (12, 31)]: 107 | dt = datetime.date(y, m, d) 108 | self.assertTrue(dt in holidays) 109 | 110 | # test initial / final date explicitly 111 | self.assertTrue(datetime.date(1970, 1, 1) in holidays) 112 | self.assertTrue(datetime.date(2030, 12, 31) in holidays) 113 | for e in self.expected: 114 | self.assertTrue(dt in holidays) 115 | 116 | def test_holiday_bug(self): 117 | # GH 42 118 | 119 | for calendar in [jpd.TSEHolidayCalendar(), 120 | jpd.JapaneseHolidayCalendar()]: 121 | holidays = calendar.holidays() 122 | 123 | self.assertFalse(datetime.datetime(1993, 9, 5) in holidays) 124 | self.assertTrue(datetime.datetime(1993, 9, 15) in holidays) 125 | 126 | self.assertFalse(datetime.datetime(2020, 8, 12) in holidays) 127 | # http://www8.cao.go.jp/chosei/shukujitsu/gaiyou.html#tokurei 128 | self.assertFalse(datetime.datetime(2020, 8, 11) in holidays) 129 | 130 | def test_heisei_emperor_abdication_holiday(self): 131 | 132 | for calendar in [jpd.TSEHolidayCalendar(), 133 | jpd.JapaneseHolidayCalendar()]: 134 | holidays = calendar.holidays() 135 | 136 | self.assertTrue(datetime.datetime(2018, 12, 23) in holidays) 137 | self.assertFalse(datetime.datetime(2019, 12, 23) in holidays) 138 | 139 | self.assertFalse(datetime.datetime(2019, 2, 23) in holidays) 140 | self.assertTrue(datetime.datetime(2020, 2, 23) in holidays) 141 | 142 | def test_tokurei(self): 143 | # http://www8.cao.go.jp/chosei/shukujitsu/gaiyou.html#tokurei 144 | 145 | for calendar in [jpd.TSEHolidayCalendar(), 146 | jpd.JapaneseHolidayCalendar()]: 147 | holidays = calendar.holidays() 148 | 149 | # 海の日 150 | self.assertTrue(datetime.datetime(2020, 7, 23) in holidays) 151 | self.assertFalse(datetime.datetime(2020, 7, 20) in holidays) 152 | self.assertTrue(datetime.datetime(2021, 7, 19) in holidays) 153 | 154 | # 山の日 155 | self.assertTrue(datetime.datetime(2020, 8, 10) in holidays) 156 | self.assertFalse(datetime.datetime(2020, 8, 11) in holidays) 157 | self.assertTrue(datetime.datetime(2021, 8, 11) in holidays) 158 | 159 | # スポーツの日 160 | self.assertTrue(datetime.datetime(2020, 7, 24) in holidays) 161 | self.assertFalse(datetime.datetime(2020, 10, 12) in holidays) 162 | self.assertTrue(datetime.datetime(2021, 10, 11) in holidays) 163 | 164 | def test_new_era(self): 165 | 166 | for calendar in [jpd.TSEHolidayCalendar(), 167 | jpd.JapaneseHolidayCalendar()]: 168 | holidays = calendar.holidays() 169 | 170 | self.assertFalse(datetime.datetime(2019, 4, 26) in holidays) 171 | self.assertFalse(datetime.datetime(2019, 4, 27) in holidays) 172 | self.assertFalse(datetime.datetime(2019, 4, 28) in holidays) 173 | self.assertTrue(datetime.datetime(2019, 4, 29) in holidays) 174 | self.assertTrue(datetime.datetime(2019, 4, 30) in holidays) 175 | self.assertTrue(datetime.datetime(2019, 5, 1) in holidays) 176 | self.assertTrue(datetime.datetime(2019, 5, 2) in holidays) 177 | self.assertTrue(datetime.datetime(2019, 5, 3) in holidays) 178 | self.assertTrue(datetime.datetime(2019, 5, 4) in holidays) 179 | self.assertTrue(datetime.datetime(2019, 5, 5) in holidays) 180 | self.assertTrue(datetime.datetime(2019, 5, 6) in holidays) 181 | self.assertFalse(datetime.datetime(2019, 5, 7) in holidays) 182 | self.assertFalse(datetime.datetime(2019, 12, 23) in holidays) 183 | 184 | 185 | if __name__ == '__main__': 186 | import nose 187 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], 188 | exit=False) 189 | -------------------------------------------------------------------------------- /japandas/tseries/tests/test_tools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | import unittest 7 | 8 | import pandas as pd 9 | import pandas.compat as compat 10 | import pandas.util.testing as tm 11 | 12 | import japandas as jpd 13 | 14 | 15 | class TestTools(unittest.TestCase): 16 | 17 | def test_to_datetime(self): 18 | cases = {'2011年10月30日': ('2011-10-30', '%Y-%m-%d'), 19 | '2012年12月': ('2012-12', '%Y-%m'), 20 | '12月3日': ('12-03', '%m-%d'), 21 | '2013年9月4日10時15分': ('2013-09-04 10:15', '%Y-%m-%d %H:%M'), 22 | '10月5日13時25分': ('10-05 13:25', '%m-%d %H:%M'), 23 | '2014年3月8日20時45分8秒': ('2014-03-08 20:45:08', '%Y-%m-%d %H:%M:%S'), 24 | '3月8日20時45分8秒': ('03-08 20:45:08', '%m-%d %H:%M:%S'), 25 | '11年10月30日': ('11-10-30', '%y-%m-%d'), 26 | '09年12月': ('09-12', '%y-%m'), 27 | '13年9月4日10時15分': ('13-09-04 10:15', '%y-%m-%d %H:%M'), 28 | '14年3月8日20時45分8秒': ('14-03-08 20:45:08', '%y-%m-%d %H:%M:%S'), 29 | '14年': ('2014-01-01', '%Y-%m-%d'), 30 | '2014年': ('2014-01-01', '%Y-%m-%d') 31 | } 32 | 33 | for k, (s, f) in compat.iteritems(cases): 34 | result = jpd.to_datetime(k) 35 | expected = pd.to_datetime(s, format=f) 36 | self.assertEqual(result, expected) 37 | 38 | result = jpd.to_datetime([k]) 39 | expected = pd.to_datetime([s], format=f) 40 | tm.assert_index_equal(result, expected) 41 | 42 | result = jpd.to_datetime([k], box=False) 43 | expected = pd.to_datetime([s], format=f, box=False) 44 | tm.assert_numpy_array_equal(result, expected) 45 | 46 | def test_date_range(self): 47 | result = jpd.date_range(start='2013年11月15日', end='2014年12月18日', freq='D') 48 | expected = pd.date_range(start='2013-11-15', end='2014-12-18', freq='D') 49 | tm.assert_index_equal(result, expected) 50 | 51 | result = jpd.date_range(start='2014年1月15日09時25分', end='2014年1月18日10時45分', freq='M') 52 | expected = pd.date_range(start='2014-01-15 09:25', end='2014-01-18 10:45', freq='M') 53 | tm.assert_index_equal(result, expected) 54 | 55 | def test_period_range(self): 56 | result = jpd.period_range(start='2013年11月15日', end='2014年12月18日', freq='D') 57 | expected = pd.period_range(start='2013-11-15', end='2014-12-18', freq='D') 58 | tm.assert_index_equal(result, expected) 59 | 60 | result = jpd.period_range(start='2014年1月15日09時25分', end='2014年1月18日10時45分', freq='M') 61 | expected = pd.period_range(start='2014-01-15 09:25', end='2014-01-18 10:45', freq='M') 62 | tm.assert_index_equal(result, expected) 63 | 64 | 65 | if __name__ == '__main__': 66 | import nose 67 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], 68 | exit=False) 69 | -------------------------------------------------------------------------------- /japandas/tseries/tools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import pandas.compat as compat 9 | 10 | 11 | _formats = ['%Y年', '%Y年%m月%d日', '%Y年%m月', 12 | '%Y年%m月%d日%H時%M分', '%Y年%m月%d日%H時%M分%S秒', 13 | 14 | '%y年', '%y年%m月%d日', '%y年%m月', 15 | '%y年%m月%d日%H時%M分', '%y年%m月%d日%H時%M分%S秒', 16 | 17 | '%m月%d日', '%m月%d日%H時%M分', '%m月%d日%H時%M分%S秒'] 18 | 19 | 20 | def to_datetime(arg, box=True, format=None, **kwargs): 21 | 22 | try: 23 | result = pd.to_datetime(arg, box=box, format=format, **kwargs) 24 | 25 | if format is not None: 26 | # if format is specified, return pd.to_datetime as it is 27 | return result 28 | 29 | if result is None: 30 | return result 31 | elif isinstance(result, (pd.Timestamp, pd.DatetimeIndex)): 32 | return result 33 | except ValueError: 34 | # as of pandas 0.17, to_datetime raises when parsing fails 35 | result = arg 36 | 37 | def _convert_listlike(arg, box): 38 | for format in _formats: 39 | try: 40 | return pd.to_datetime(arg, box=box, format=format, **kwargs) 41 | except ValueError: 42 | pass 43 | return arg 44 | 45 | if isinstance(result, compat.string_types): 46 | arg = np.array([arg], dtype='O') 47 | result = _convert_listlike(arg, box) 48 | return result[0] 49 | 50 | if isinstance(result, pd.Series): 51 | values = _convert_listlike(arg.values, False) 52 | return pd.Series(values, index=arg.index, name=arg.name) 53 | elif pd.api.types.is_list_like(result): 54 | return _convert_listlike(result, box) 55 | return result 56 | 57 | 58 | def date_range(start=None, end=None, **kwargs): 59 | start = to_datetime(start) 60 | end = to_datetime(end) 61 | return pd.date_range(start=start, end=end, **kwargs) 62 | 63 | 64 | def period_range(start=None, end=None, **kwargs): 65 | start = to_datetime(start) 66 | end = to_datetime(end) 67 | return pd.period_range(start=start, end=end, **kwargs) 68 | 69 | 70 | to_datetime.__doc__ = pd.to_datetime.__doc__ 71 | date_range.__doc__ = pd.date_range.__doc__ 72 | period_range.__doc__ = pd.period_range.__doc__ 73 | 74 | 75 | """ 76 | try: 77 | import pandas.tseries.timedeltas as timedeltas 78 | abbrevs = [('d' ,'days|d|day|日'), 79 | ('h' ,'hours|h|hour|時間'), 80 | ('m' ,'minutes|min|minute|m|分'), 81 | ('s' ,'seconds|sec|second|s|秒'), 82 | ('ms','milliseconds|milli|millis|millisecond|ms'), 83 | ('us','microseconds|micro|micros|microsecond|us'), 84 | ('ns','nanoseconds|nano|nanos|nanosecond|ns')] 85 | timedeltas.abbrevs = abbrevs 86 | except Exception: 87 | pass 88 | """ 89 | -------------------------------------------------------------------------------- /japandas/version.py: -------------------------------------------------------------------------------- 1 | version = '0.5.1' 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas >= 0.20.0 2 | pandas-datareader >= 0.7.0 3 | -------------------------------------------------------------------------------- /requirements_test.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | html5lib 3 | beautifulsoup4 4 | pandas >= 0.20.0 5 | pandas-datareader >= 0.7.0 6 | IPython>=2.3 7 | matplotlib>=1.4.0 8 | mpl_finance 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | import codecs 5 | import os 6 | from setuptools import setup, find_packages 7 | 8 | PACKAGE = 'japandas' 9 | README = 'README.rst' 10 | REQUIREMENTS = 'requirements.txt' 11 | 12 | VERSION = '0.6.0.dev0' 13 | 14 | def read(fname): 15 | # file must be read as utf-8 in py3 to avoid to be bytes 16 | return codecs.open(os.path.join(os.path.dirname(__file__), fname), encoding='utf-8').read() 17 | 18 | def write_version_py(filename=None): 19 | cnt = """\ 20 | version = '%s' 21 | """ 22 | a = open(filename, 'w') 23 | try: 24 | a.write(cnt % VERSION) 25 | finally: 26 | a.close() 27 | 28 | version_file = os.path.join(os.path.dirname(__file__), PACKAGE, 'version.py') 29 | write_version_py(filename=version_file) 30 | 31 | setup(name=PACKAGE, 32 | version=VERSION, 33 | description='pandas japanese extension', 34 | long_description=read(README), 35 | author='sinhrks', 36 | author_email='sinhrks@gmail.com', 37 | url='http://japandas.readthedocs.org/en/stable', 38 | license = 'BSD', 39 | packages=find_packages(), 40 | package_data = {'japandas.tseries': ['data/*.pkl']}, 41 | install_requires=list(read(REQUIREMENTS).splitlines()) 42 | ) 43 | --------------------------------------------------------------------------------