├── .coveragerc
├── .gitattributes
├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE
├── LICENSES
    ├── MSGPACK_LICENSE
    ├── MSGPACK_NUMPY_LICENSE
    ├── NUMPY_LICENSE
    ├── OTHER
    ├── PSF_LICENSE
    ├── SCIPY_LICENSE
    ├── SIX
    └── ULTRAJSON_LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── RELEASE.md
├── bench
    ├── alignment.py
    ├── bench_dense_to_sparse.py
    ├── bench_get_put_value.py
    ├── bench_groupby.py
    ├── bench_join_panel.py
    ├── bench_khash_dict.py
    ├── bench_merge.R
    ├── bench_merge.py
    ├── bench_merge_sqlite.py
    ├── bench_pivot.R
    ├── bench_pivot.py
    ├── bench_sparse.py
    ├── bench_take_indexing.py
    ├── bench_unique.py
    ├── bench_with_subset.R
    ├── bench_with_subset.py
    ├── better_unique.py
    ├── duplicated.R
    ├── io_roundtrip.py
    ├── larry.py
    ├── serialize.py
    ├── test.py
    ├── zoo_bench.R
    └── zoo_bench.py
├── ci
    ├── README.txt
    ├── after_script.sh
    ├── before_install.sh
    ├── build_docs.sh
    ├── cron
    │   └── go_doc.sh
    ├── install.sh
    ├── ironcache
    │   ├── get.py
    │   └── put.py
    ├── prep_ccache.sh
    ├── print_skipped.py
    ├── print_versions.py
    ├── requirements-2.6.txt
    ├── requirements-2.7.txt
    ├── requirements-2.7_LOCALE.txt
    ├── requirements-2.7_NUMPY_DEV_1_8_x.txt
    ├── requirements-2.7_NUMPY_DEV_master.txt
    ├── requirements-3.2.txt
    ├── requirements-3.3.txt
    ├── requirements-3.4.txt
    ├── script.sh
    ├── speedpack
    │   ├── Vagrantfile
    │   ├── build.sh
    │   └── nginx
    │   │   └── nginx.conf.template
    └── submit_ccache.sh
├── doc
    ├── README.rst
    ├── _templates
    │   └── autosummary
    │   │   └── class.rst
    ├── data
    │   ├── baseball.csv
    │   ├── fx_prices
    │   ├── iris.data
    │   ├── mindex_ex.csv
    │   ├── test.xls
    │   └── tips.csv
    ├── make.py
    ├── plots
    │   └── stats
    │   │   ├── moment_plots.py
    │   │   ├── moments_ewma.py
    │   │   ├── moments_ewmvol.py
    │   │   ├── moments_expw.py
    │   │   ├── moments_rolling.py
    │   │   └── moments_rolling_binary.py
    ├── source
    │   ├── 10min.rst
    │   ├── _static
    │   │   ├── banklist.html
    │   │   ├── df_repr_truncated.png
    │   │   ├── eval-perf-small.png
    │   │   ├── eval-perf.png
    │   │   ├── legacy_0.10.h5
    │   │   ├── query-perf-small.png
    │   │   ├── query-perf.png
    │   │   ├── stub
    │   │   ├── trunc_after.png
    │   │   └── trunc_before.png
    │   ├── api.rst
    │   ├── basics.rst
    │   ├── categorical.rst
    │   ├── comparison_with_r.rst
    │   ├── comparison_with_sql.rst
    │   ├── computation.rst
    │   ├── conf.py
    │   ├── contributing.rst
    │   ├── cookbook.rst
    │   ├── dsintro.rst
    │   ├── ecosystem.rst
    │   ├── enhancingperf.rst
    │   ├── faq.rst
    │   ├── gotchas.rst
    │   ├── groupby.rst
    │   ├── index.rst.template
    │   ├── indexing.rst
    │   ├── install.rst
    │   ├── io.rst
    │   ├── merging.rst
    │   ├── missing_data.rst
    │   ├── options.rst
    │   ├── overview.rst
    │   ├── r_interface.rst
    │   ├── release.rst
    │   ├── remote_data.rst
    │   ├── reshaping.rst
    │   ├── rplot.rst
    │   ├── sparse.rst
    │   ├── themes
    │   │   └── nature_with_gtoc
    │   │   │   ├── layout.html
    │   │   │   ├── static
    │   │   │       └── nature.css_t
    │   │   │   └── theme.conf
    │   ├── timeseries.rst
    │   ├── tutorials.rst
    │   ├── v0.10.0.txt
    │   ├── v0.10.1.txt
    │   ├── v0.11.0.txt
    │   ├── v0.12.0.txt
    │   ├── v0.13.0.txt
    │   ├── v0.13.1.txt
    │   ├── v0.14.0.txt
    │   ├── v0.14.1.txt
    │   ├── v0.15.0.txt
    │   ├── v0.4.x.txt
    │   ├── v0.5.0.txt
    │   ├── v0.6.0.txt
    │   ├── v0.6.1.txt
    │   ├── v0.7.0.txt
    │   ├── v0.7.1.txt
    │   ├── v0.7.2.txt
    │   ├── v0.7.3.txt
    │   ├── v0.8.0.txt
    │   ├── v0.8.1.txt
    │   ├── v0.9.0.txt
    │   ├── v0.9.1.txt
    │   ├── visualization.rst
    │   └── whatsnew.rst
    └── sphinxext
    │   ├── README.rst
    │   ├── ipython_sphinxext
    │       ├── __init__.py
    │       ├── ipython_console_highlighting.py
    │       └── ipython_directive.py
    │   └── numpydoc
    │       ├── LICENSE.txt
    │       ├── README.rst
    │       ├── __init__.py
    │       ├── comment_eater.py
    │       ├── compiler_unparse.py
    │       ├── docscrape.py
    │       ├── docscrape_sphinx.py
    │       ├── linkcode.py
    │       ├── numpydoc.py
    │       ├── phantom_import.py
    │       ├── plot_directive.py
    │       ├── tests
    │           ├── test_docscrape.py
    │           ├── test_linkcode.py
    │           ├── test_phantom_import.py
    │           ├── test_plot_directive.py
    │           └── test_traitsdoc.py
    │       └── traitsdoc.py
├── examples
    ├── data
    │   └── SOURCES
    ├── finance.py
    └── regressions.py
├── ez_setup.py
├── fake_pyrex
    └── Pyrex
    │   ├── Distutils
    │       ├── __init__.py
    │       └── build_ext.py
    │   └── __init__.py
├── pandas
    ├── __init__.py
    ├── algos.pyx
    ├── compat
    │   ├── __init__.py
    │   ├── chainmap.py
    │   ├── chainmap_impl.py
    │   ├── openpyxl_compat.py
    │   └── pickle_compat.py
    ├── computation
    │   ├── __init__.py
    │   ├── align.py
    │   ├── api.py
    │   ├── common.py
    │   ├── engines.py
    │   ├── eval.py
    │   ├── expr.py
    │   ├── expressions.py
    │   ├── ops.py
    │   ├── pytables.py
    │   ├── scope.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   └── test_eval.py
    ├── core
    │   ├── __init__.py
    │   ├── algorithms.py
    │   ├── api.py
    │   ├── array.py
    │   ├── base.py
    │   ├── categorical.py
    │   ├── common.py
    │   ├── config.py
    │   ├── config_init.py
    │   ├── datetools.py
    │   ├── format.py
    │   ├── frame.py
    │   ├── generic.py
    │   ├── groupby.py
    │   ├── index.py
    │   ├── indexing.py
    │   ├── internals.py
    │   ├── matrix.py
    │   ├── nanops.py
    │   ├── ops.py
    │   ├── panel.py
    │   ├── panel4d.py
    │   ├── panelnd.py
    │   ├── reshape.py
    │   ├── series.py
    │   ├── sparse.py
    │   └── strings.py
    ├── hashtable.pxd
    ├── hashtable.pyx
    ├── index.pyx
    ├── info.py
    ├── io
    │   ├── __init__.py
    │   ├── api.py
    │   ├── auth.py
    │   ├── clipboard.py
    │   ├── common.py
    │   ├── data.py
    │   ├── date_converters.py
    │   ├── excel.py
    │   ├── ga.py
    │   ├── gbq.py
    │   ├── html.py
    │   ├── json.py
    │   ├── packers.py
    │   ├── parsers.py
    │   ├── pickle.py
    │   ├── pytables.py
    │   ├── sql.py
    │   ├── stata.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── banklist.csv
    │   │   │   ├── banklist.html
    │   │   │   ├── computer_sales_page.html
    │   │   │   ├── gbq_fake_job.txt
    │   │   │   ├── html_encoding
    │   │   │   │   ├── chinese_utf-16.html
    │   │   │   │   ├── chinese_utf-32.html
    │   │   │   │   ├── chinese_utf-8.html
    │   │   │   │   └── letz_latin1.html
    │   │   │   ├── iris.csv
    │   │   │   ├── legacy_hdf
    │   │   │   │   ├── legacy.h5
    │   │   │   │   ├── legacy_0.10.h5
    │   │   │   │   ├── legacy_table.h5
    │   │   │   │   ├── legacy_table_0.11.h5
    │   │   │   │   ├── pytables_native.h5
    │   │   │   │   └── pytables_native2.h5
    │   │   │   ├── legacy_pickle
    │   │   │   │   ├── 0.10.1
    │   │   │   │   │   ├── AMD64_windows_2.7.3.pickle
    │   │   │   │   │   └── x86_64_linux_2.7.3.pickle
    │   │   │   │   ├── 0.11.0
    │   │   │   │   │   ├── 0.11.0_x86_64_linux_3.3.0.pickle
    │   │   │   │   │   ├── x86_64_linux_2.7.3.pickle
    │   │   │   │   │   └── x86_64_linux_3.3.0.pickle
    │   │   │   │   ├── 0.12.0
    │   │   │   │   │   ├── 0.12.0_AMD64_windows_2.7.3.pickle
    │   │   │   │   │   └── 0.12.0_x86_64_linux_2.7.3.pickle
    │   │   │   │   ├── 0.13.0
    │   │   │   │   │   ├── 0.13.0_AMD64_windows_2.7.3.pickle
    │   │   │   │   │   ├── 0.13.0_i686_linux_2.6.5.pickle
    │   │   │   │   │   ├── 0.13.0_i686_linux_2.7.3.pickle
    │   │   │   │   │   ├── 0.13.0_i686_linux_3.2.3.pickle
    │   │   │   │   │   ├── 0.13.0_x86_64_darwin_2.7.5.pickle
    │   │   │   │   │   ├── 0.13.0_x86_64_darwin_2.7.6.pickle
    │   │   │   │   │   ├── 0.13.0_x86_64_linux_2.7.3.pickle
    │   │   │   │   │   ├── 0.13.0_x86_64_linux_2.7.8.pickle
    │   │   │   │   │   └── 0.13.0_x86_64_linux_3.3.0.pickle
    │   │   │   │   ├── 0.14.0
    │   │   │   │   │   ├── 0.14.0_x86_64_darwin_2.7.6.pickle
    │   │   │   │   │   └── 0.14.0_x86_64_linux_2.7.8.pickle
    │   │   │   │   └── 0.14.1
    │   │   │   │   │   └── 0.14.1_x86_64_linux_2.7.8.pickle
    │   │   │   ├── macau.html
    │   │   │   ├── nyse_wsj.html
    │   │   │   ├── salary.table
    │   │   │   ├── spam.html
    │   │   │   ├── stata1_114.dta
    │   │   │   ├── stata1_117.dta
    │   │   │   ├── stata1_encoding.dta
    │   │   │   ├── stata2_113.dta
    │   │   │   ├── stata2_114.dta
    │   │   │   ├── stata2_115.dta
    │   │   │   ├── stata2_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats
    │   │   │   ├── stata2_117.dta
    │   │   │   ├── stata3.csv
    │   │   │   ├── stata3_113.dta
    │   │   │   ├── stata3_114.dta
    │   │   │   ├── stata3_115.dta
    │   │   │   ├── stata3_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats
    │   │   │   ├── stata3_117.dta
    │   │   │   ├── stata4_113.dta
    │   │   │   ├── stata4_114.dta
    │   │   │   ├── stata4_115.dta
    │   │   │   ├── stata4_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats
    │   │   │   ├── stata4_117.dta
    │   │   │   ├── stata5.csv
    │   │   │   ├── stata5_113.dta
    │   │   │   ├── stata5_114.dta
    │   │   │   ├── stata5_115.dta
    │   │   │   ├── stata5_117.dta
    │   │   │   ├── stata6.csv
    │   │   │   ├── stata6_113.dta
    │   │   │   ├── stata6_114.dta
    │   │   │   ├── stata6_115.dta
    │   │   │   ├── stata6_117.dta
    │   │   │   ├── stata7_115.dta
    │   │   │   ├── stata7_117.dta
    │   │   │   ├── test.xls
    │   │   │   ├── test.xlsm
    │   │   │   ├── test.xlsx
    │   │   │   ├── test1.csv
    │   │   │   ├── test2.csv
    │   │   │   ├── test2.xls
    │   │   │   ├── test2.xlsx
    │   │   │   ├── test3.xls
    │   │   │   ├── test_types.xls
    │   │   │   ├── test_types.xlsx
    │   │   │   ├── times_1900.xls
    │   │   │   ├── times_1904.xls
    │   │   │   ├── tips.csv
    │   │   │   ├── unicode_series.csv
    │   │   │   ├── utf16_ex.txt
    │   │   │   ├── valid_markup.html
    │   │   │   ├── wikipedia_states.html
    │   │   │   ├── yahoo_options1.html
    │   │   │   └── yahoo_options2.html
    │   │   ├── generate_legacy_pickles.py
    │   │   ├── test_clipboard.py
    │   │   ├── test_cparser.py
    │   │   ├── test_data.py
    │   │   ├── test_date_converters.py
    │   │   ├── test_excel.py
    │   │   ├── test_ga.py
    │   │   ├── test_gbq.py
    │   │   ├── test_html.py
    │   │   ├── test_json
    │   │   │   ├── __init__.py
    │   │   │   ├── data
    │   │   │   │   ├── tsframe_iso_v012.json
    │   │   │   │   └── tsframe_v012.json
    │   │   │   ├── test_pandas.py
    │   │   │   └── test_ujson.py
    │   │   ├── test_json_norm.py
    │   │   ├── test_packers.py
    │   │   ├── test_parsers.py
    │   │   ├── test_pickle.py
    │   │   ├── test_pytables.py
    │   │   ├── test_sql.py
    │   │   ├── test_stata.py
    │   │   └── test_wb.py
    │   └── wb.py
    ├── lib.pyx
    ├── msgpack.pyx
    ├── parser.pyx
    ├── rpy
    │   ├── __init__.py
    │   ├── base.py
    │   ├── common.py
    │   ├── mass.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_common.py
    │   └── vars.py
    ├── sandbox
    │   ├── __init__.py
    │   └── qtpandas.py
    ├── sparse
    │   ├── __init__.py
    │   ├── api.py
    │   ├── array.py
    │   ├── frame.py
    │   ├── list.py
    │   ├── panel.py
    │   ├── series.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── test_array.py
    │   │   ├── test_libsparse.py
    │   │   ├── test_list.py
    │   │   └── test_sparse.py
    ├── src
    │   ├── datetime.pxd
    │   ├── datetime
    │   │   ├── np_datetime.c
    │   │   ├── np_datetime.h
    │   │   ├── np_datetime_strings.c
    │   │   └── np_datetime_strings.h
    │   ├── datetime_helper.h
    │   ├── generate_code.py
    │   ├── generated.pyx
    │   ├── headers
    │   │   ├── math.h
    │   │   ├── ms_inttypes.h
    │   │   ├── ms_stdint.h
    │   │   ├── portable.h
    │   │   └── stdint.h
    │   ├── helper.h
    │   ├── inference.pyx
    │   ├── join.pyx
    │   ├── khash.pxd
    │   ├── klib
    │   │   ├── khash.h
    │   │   ├── khash_python.h
    │   │   ├── ktypes.h
    │   │   └── kvec.h
    │   ├── msgpack
    │   │   ├── pack.h
    │   │   ├── pack_template.h
    │   │   ├── sysdep.h
    │   │   ├── unpack.h
    │   │   ├── unpack_define.h
    │   │   └── unpack_template.h
    │   ├── numpy.pxd
    │   ├── numpy_helper.h
    │   ├── offsets.pyx
    │   ├── parse_helper.h
    │   ├── parser
    │   │   ├── .gitignore
    │   │   ├── Makefile
    │   │   ├── io.c
    │   │   ├── io.h
    │   │   ├── tokenizer.c
    │   │   └── tokenizer.h
    │   ├── period.c
    │   ├── period.h
    │   ├── properties.pyx
    │   ├── reduce.pyx
    │   ├── skiplist.h
    │   ├── skiplist.pxd
    │   ├── skiplist.pyx
    │   ├── sparse.pyx
    │   ├── testing.pyx
    │   ├── ujson
    │   │   ├── lib
    │   │   │   ├── ultrajson.h
    │   │   │   ├── ultrajsondec.c
    │   │   │   └── ultrajsonenc.c
    │   │   └── python
    │   │   │   ├── JSONtoObj.c
    │   │   │   ├── objToJSON.c
    │   │   │   ├── py_defines.h
    │   │   │   ├── ujson.c
    │   │   │   └── version.h
    │   └── util.pxd
    ├── stats
    │   ├── __init__.py
    │   ├── api.py
    │   ├── common.py
    │   ├── fama_macbeth.py
    │   ├── interface.py
    │   ├── math.py
    │   ├── misc.py
    │   ├── moments.py
    │   ├── ols.py
    │   ├── plm.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── common.py
    │   │   ├── test_fama_macbeth.py
    │   │   ├── test_math.py
    │   │   ├── test_moments.py
    │   │   ├── test_ols.py
    │   │   └── test_var.py
    │   └── var.py
    ├── tests
    │   ├── __init__.py
    │   ├── data
    │   │   ├── iris.csv
    │   │   ├── mindex_073.pickle
    │   │   ├── multiindex_v1.pickle
    │   │   ├── tips.csv
    │   │   └── unicode_series.csv
    │   ├── test_algos.py
    │   ├── test_base.py
    │   ├── test_categorical.py
    │   ├── test_common.py
    │   ├── test_compat.py
    │   ├── test_config.py
    │   ├── test_expressions.py
    │   ├── test_format.py
    │   ├── test_frame.py
    │   ├── test_generic.py
    │   ├── test_graphics.py
    │   ├── test_groupby.py
    │   ├── test_index.py
    │   ├── test_indexing.py
    │   ├── test_internals.py
    │   ├── test_msgpack
    │   │   ├── __init__.py
    │   │   ├── test_buffer.py
    │   │   ├── test_case.py
    │   │   ├── test_except.py
    │   │   ├── test_format.py
    │   │   ├── test_obj.py
    │   │   ├── test_pack.py
    │   │   ├── test_read_size.py
    │   │   ├── test_seq.py
    │   │   ├── test_sequnpack.py
    │   │   ├── test_subtype.py
    │   │   └── test_unpack_raw.py
    │   ├── test_multilevel.py
    │   ├── test_nanops.py
    │   ├── test_panel.py
    │   ├── test_panel4d.py
    │   ├── test_panelnd.py
    │   ├── test_reshape.py
    │   ├── test_rplot.py
    │   ├── test_series.py
    │   ├── test_stats.py
    │   ├── test_strings.py
    │   ├── test_testing.py
    │   └── test_tseries.py
    ├── tools
    │   ├── __init__.py
    │   ├── describe.py
    │   ├── merge.py
    │   ├── pivot.py
    │   ├── plotting.py
    │   ├── rplot.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── cut_data.csv
    │   │   ├── test_merge.py
    │   │   ├── test_pivot.py
    │   │   ├── test_tile.py
    │   │   ├── test_tools.py
    │   │   └── test_util.py
    │   ├── tile.py
    │   └── util.py
    ├── tseries
    │   ├── __init__.py
    │   ├── api.py
    │   ├── common.py
    │   ├── converter.py
    │   ├── frequencies.py
    │   ├── holiday.py
    │   ├── index.py
    │   ├── interval.py
    │   ├── offsets.py
    │   ├── period.py
    │   ├── plotting.py
    │   ├── resample.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── daterange_073.pickle
    │   │   │   ├── frame.pickle
    │   │   │   ├── series.pickle
    │   │   │   └── series_daterange0.pickle
    │   │   ├── test_converter.py
    │   │   ├── test_daterange.py
    │   │   ├── test_frequencies.py
    │   │   ├── test_holiday.py
    │   │   ├── test_offsets.py
    │   │   ├── test_period.py
    │   │   ├── test_plotting.py
    │   │   ├── test_resample.py
    │   │   ├── test_timedeltas.py
    │   │   ├── test_timeseries.py
    │   │   ├── test_timeseries_legacy.py
    │   │   ├── test_timezones.py
    │   │   ├── test_tslib.py
    │   │   └── test_util.py
    │   ├── timedeltas.py
    │   ├── tools.py
    │   └── util.py
    ├── tslib.pxd
    ├── tslib.pyx
    └── util
    │   ├── __init__.py
    │   ├── clipboard.py
    │   ├── decorators.py
    │   ├── misc.py
    │   ├── print_versions.py
    │   ├── terminal.py
    │   └── testing.py
├── scripts
    ├── bench_join.R
    ├── bench_join.py
    ├── bench_join_multi.py
    ├── bench_refactor.py
    ├── boxplot_test.py
    ├── count_code.sh
    ├── faster_xs.py
    ├── file_sizes.py
    ├── find_commits_touching_func.py
    ├── find_undoc_args.py
    ├── gen_release_notes.py
    ├── git-mrb
    ├── git_code_churn.py
    ├── groupby_sample.py
    ├── groupby_speed.py
    ├── groupby_test.py
    ├── hdfstore_panel_perf.py
    ├── json_manip.py
    ├── leak.py
    ├── parser_magic.py
    ├── preepoch_test.py
    ├── pypistats.py
    ├── roll_median_leak.py
    ├── runtests.py
    ├── test_py25.bat
    ├── test_py26.bat
    ├── test_py27.bat
    ├── test_py31.bat
    ├── test_py32.bat
    ├── testmed.py
    ├── touchup_gh_issues.py
    ├── use_build_cache.py
    ├── winbuild_py25.bat
    ├── winbuild_py27.bat
    └── windows_builder
    │   ├── build_26-32.bat
    │   ├── build_26-64.bat
    │   ├── build_27-32.bat
    │   ├── build_27-64.bat
    │   ├── build_33-32.bat
    │   ├── build_33-64.bat
    │   ├── build_34-32.bat
    │   ├── build_34-64.bat
    │   ├── check_and_build.bat
    │   ├── check_and_build.py
    │   └── readme.txt
├── setup.py
├── test.sh
├── test_fast.sh
├── test_multi.sh
├── test_perf.sh
├── test_rebuild.sh
├── tox.ini
└── vb_suite
    ├── .gitignore
    ├── attrs_caching.py
    ├── binary_ops.py
    ├── ctors.py
    ├── eval.py
    ├── frame_ctor.py
    ├── frame_methods.py
    ├── generate_rst_files.py
    ├── groupby.py
    ├── hdfstore_bench.py
    ├── index_object.py
    ├── indexing.py
    ├── inference.py
    ├── io_bench.py
    ├── join_merge.py
    ├── make.py
    ├── measure_memory_consumption.py
    ├── miscellaneous.py
    ├── packers.py
    ├── pandas_vb_common.py
    ├── panel_ctor.py
    ├── panel_methods.py
    ├── parser_vb.py
    ├── perf_HEAD.py
    ├── plotting.py
    ├── reindex.py
    ├── replace.py
    ├── reshape.py
    ├── run_suite.py
    ├── series_methods.py
    ├── source
        ├── _static
        │   └── stub
        ├── conf.py
        └── themes
        │   └── agogo
        │       ├── layout.html
        │       ├── static
        │           ├── agogo.css_t
        │           ├── bgfooter.png
        │           └── bgtop.png
        │       └── theme.conf
    ├── sparse.py
    ├── stat_ops.py
    ├── strings.py
    ├── suite.py
    ├── test.py
    ├── test_perf.py
    ├── timedelta.py
    └── timeseries.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | # .coveragerc to control coverage.py
 2 | [run]
 3 | branch = False
 4 | 
 5 | [report]
 6 | # Regexes for lines to exclude from consideration
 7 | exclude_lines =
 8 |     # Have to re-enable the standard pragma
 9 |     pragma: no cover
10 | 
11 |     # Don't complain about missing debug-only code:
12 |     def __repr__
13 |     if self\.debug
14 | 
15 |     # Don't complain if tests don't hit defensive assertion code:
16 |     raise AssertionError
17 |     raise NotImplementedError
18 | 
19 |     # Don't complain if non-runnable code isn't run:
20 |     if 0:
21 |     if __name__ == .__main__.:
22 | 
23 | ignore_errors = False
24 | 
25 | [html]
26 | directory = coverage_html_report


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | * text=auto
 2 | # enforce text on certain files
 3 | *.py text
 4 | *.pyx text
 5 | *.pyd text
 6 | *.c text
 7 | *.h text
 8 | *.html text
 9 | *.csv text
10 | *.json text
11 | *.pickle binary
12 | *.h5 binary
13 | *.dta binary
14 | *.xls binary
15 | *.xlsx binary
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | #########################################
 2 | # Editor temporary/working/backup files #
 3 | .#*
 4 | *\#*\#
 5 | [#]*#
 6 | *~
 7 | *$
 8 | *.bak
 9 | *flymake*
10 | *.kdev4
11 | *.log
12 | *.swp
13 | *.pdb
14 | .project
15 | .pydevproject
16 | .settings
17 | .idea
18 | .vagrant
19 | .noseids
20 | 
21 | # Compiled source #
22 | ###################
23 | *.a
24 | *.com
25 | *.class
26 | *.dll
27 | *.exe
28 | *.o
29 | *.py[ocd]
30 | *.so
31 | .build_cache_dir
32 | MANIFEST
33 | 
34 | # Python files #
35 | ################
36 | # setup.py working directory
37 | build
38 | # sphinx build directory
39 | doc/_build
40 | # setup.py dist directory
41 | dist
42 | # Egg metadata
43 | *.egg-info
44 | # tox testing tool
45 | .tox
46 | # rope
47 | .ropeproject
48 | # wheel files
49 | *.whl
50 | **/wheelhouse/*
51 | # coverage
52 | .coverage
53 | 
54 | # OS generated files #
55 | ######################
56 | .directory
57 | .gdb_history
58 | .DS_Store?
59 | ehthumbs.db
60 | Icon?
61 | Thumbs.db
62 | 
63 | # Data files #
64 | ##############
65 | *.dta
66 | *.h5
67 | pandas/io/*.dat
68 | pandas/io/*.json
69 | scikits
70 | 
71 | # Generated Sources #
72 | #####################
73 | !skts.c
74 | !np_datetime.c
75 | !np_datetime_strings.c
76 | *.c
77 | *.cpp
78 | 
79 | # Things specific to this project #
80 | ###################################
81 | pandas/version.py
82 | 
83 | # Documentation generated files #
84 | #################################
85 | doc/source/generated
86 | doc/source/_static
87 | doc/source/vbench
88 | doc/source/vbench.rst
89 | doc/source/index.rst
90 | doc/build/html/index.html
91 | # Windows specific leftover:
92 | doc/tmp.sv
93 | 


--------------------------------------------------------------------------------
/LICENSES/MSGPACK_LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2008-2011 INADA Naoki <songofacandy@gmail.com>
 2 | 
 3 |    Licensed under the Apache License, Version 2.0 (the "License");
 4 |    you may not use this file except in compliance with the License.
 5 |    You may obtain a copy of the License at
 6 | 
 7 |        http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 |    Unless required by applicable law or agreed to in writing, software
10 |    distributed under the License is distributed on an "AS IS" BASIS,
11 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |    See the License for the specific language governing permissions and
13 |    limitations under the License.


--------------------------------------------------------------------------------
/LICENSES/MSGPACK_NUMPY_LICENSE:
--------------------------------------------------------------------------------
 1 | .. -*- rst -*-
 2 | 
 3 | License
 4 | =======
 5 | 
 6 | Copyright (c) 2013, Lev Givon.
 7 | All rights reserved.
 8 | 
 9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions are
11 | met:
12 | 
13 | * Redistributions of source code must retain the above copyright
14 |   notice, this list of conditions and the following disclaimer.
15 | * Redistributions in binary form must reproduce the above
16 |   copyright notice, this list of conditions and the following
17 |   disclaimer in the documentation and/or other materials provided
18 |   with the distribution.
19 | * Neither the name of Lev Givon nor the names of any
20 |   contributors may be used to endorse or promote products derived
21 |   from this software without specific prior written permission.
22 | 
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 | 


--------------------------------------------------------------------------------
/LICENSES/NUMPY_LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2005-2011, NumPy Developers.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |        notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |        copyright notice, this list of conditions and the following
13 |        disclaimer in the documentation and/or other materials provided
14 |        with the distribution.
15 | 
16 |     * Neither the name of the NumPy Developers nor the names of any
17 |        contributors may be used to endorse or promote products derived
18 |        from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/LICENSES/SCIPY_LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2001, 2002 Enthought, Inc.
 2 | All rights reserved.
 3 | 
 4 | Copyright (c) 2003-2012 SciPy Developers.
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 |   a. Redistributions of source code must retain the above copyright notice,
11 |      this list of conditions and the following disclaimer.
12 |   b. Redistributions in binary form must reproduce the above copyright
13 |      notice, this list of conditions and the following disclaimer in the
14 |      documentation and/or other materials provided with the distribution.
15 |   c. Neither the name of Enthought nor the names of the SciPy Developers
16 |      may be used to endorse or promote products derived from this software
17 |      without specific prior written permission.
18 | 
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
30 | DAMAGE.
31 | 
32 | 


--------------------------------------------------------------------------------
/LICENSES/SIX:
--------------------------------------------------------------------------------
 1 | six license (substantial portions used in the python 3 compatibility module)
 2 | ===========================================================================
 3 | Copyright (c) 2010-2013 Benjamin Peterson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | #
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | #
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LICENSES/ULTRAJSON_LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the ESN Social Software AB nor the
12 |       names of its contributors may be used to endorse or promote products
13 |       derived from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 
26 | 
27 | Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
28 | http://code.google.com/p/stringencoders/
29 | Copyright (c) 2007  Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
30 | 
31 | Numeric decoder derived from from TCL library
32 | http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
33 |  * Copyright (c) 1988-1993 The Regents of the University of California.
34 |  * Copyright (c) 1994 Sun Microsystems, Inc.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include MANIFEST.in
 2 | include LICENSE
 3 | include RELEASE.md
 4 | include README.rst
 5 | include setup.py
 6 | 
 7 | graft doc
 8 | prune doc/build
 9 | 
10 | graft examples
11 | graft pandas
12 | 
13 | global-exclude *.so
14 | global-exclude *.pyd
15 | global-exclude *.pyc
16 | global-exclude *~
17 | global-exclude \#*
18 | global-exclude .git*
19 | global-exclude .DS_Store
20 | global-exclude *.png
21 | 
22 | # include examples/data/*
23 | # recursive-include examples *.py
24 | # recursive-include doc/source *
25 | # recursive-include doc/sphinxext *
26 | # recursive-include LICENSES *
27 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | tseries: pandas/lib.pyx pandas/tslib.pyx pandas/hashtable.pyx
 2 | 	python setup.py build_ext --inplace
 3 | 
 4 | .PHONY : develop build clean clean_pyc tseries doc
 5 | 
 6 | clean:
 7 | 	-python setup.py clean
 8 | 
 9 | clean_pyc:
10 | 	-find . -name '*.py[co]' -exec rm {} \;
11 | 
12 | sparse: pandas/src/sparse.pyx
13 | 	python setup.py build_ext --inplace
14 | 
15 | build: clean_pyc
16 | 	python setup.py build_ext --inplace
17 | 
18 | develop: build
19 | 	-python setup.py develop
20 | 
21 | doc:
22 | 	-rm -rf doc/build doc/source/generated
23 | 	cd doc; \
24 | 	python make.py clean; \
25 | 	python make.py html
26 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
1 | Release Notes
2 | =============
3 | 
4 | The list of changes to pandas between each release can be found
5 | [here](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html). For full
6 | details, see the commit logs at http://github.com/pydata/pandas.
7 | 


--------------------------------------------------------------------------------
/bench/alignment.py:
--------------------------------------------------------------------------------
 1 | # Setup
 2 | from pandas.compat import range, lrange
 3 | import numpy as np
 4 | import pandas
 5 | import la
 6 | N = 1000
 7 | K = 50
 8 | arr1 = np.random.randn(N, K)
 9 | arr2 = np.random.randn(N, K)
10 | idx1 = lrange(N)
11 | idx2 = lrange(K)
12 | 
13 | # pandas
14 | dma1 = pandas.DataFrame(arr1, idx1, idx2)
15 | dma2 = pandas.DataFrame(arr2, idx1[::-1], idx2[::-1])
16 | 
17 | # larry
18 | lar1 = la.larry(arr1, [idx1, idx2])
19 | lar2 = la.larry(arr2, [idx1[::-1], idx2[::-1]])
20 | 
21 | for i in range(100):
22 |     result = lar1 + lar2
23 | 


--------------------------------------------------------------------------------
/bench/bench_dense_to_sparse.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | 
 3 | K = 100
 4 | N = 100000
 5 | rng = DatetimeIndex('1/1/2000', periods=N, offset=datetools.Minute())
 6 | 
 7 | rng2 = np.asarray(rng).astype('M8[us]').astype('i8')
 8 | 
 9 | series = {}
10 | for i in range(1, K + 1):
11 |     data = np.random.randn(N)[:-i]
12 |     this_rng = rng2[:-i]
13 |     data[100:] = np.nan
14 |     series[i] = SparseSeries(data, index=this_rng)
15 | 


--------------------------------------------------------------------------------
/bench/bench_get_put_value.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | from pandas.util.testing import rands
 3 | from pandas.compat import range
 4 | 
 5 | N = 1000
 6 | K = 50
 7 | 
 8 | 
 9 | def _random_index(howmany):
10 |     return Index([rands(10) for _ in range(howmany)])
11 | 
12 | df = DataFrame(np.random.randn(N, K), index=_random_index(N),
13 |                columns=_random_index(K))
14 | 
15 | 
16 | def get1():
17 |     for col in df.columns:
18 |         for row in df.index:
19 |             _ = df[col][row]
20 | 
21 | 
22 | def get2():
23 |     for col in df.columns:
24 |         for row in df.index:
25 |             _ = df.get_value(row, col)
26 | 
27 | 
28 | def put1():
29 |     for col in df.columns:
30 |         for row in df.index:
31 |             df[col][row] = 0
32 | 
33 | 
34 | def put2():
35 |     for col in df.columns:
36 |         for row in df.index:
37 |             df.set_value(row, col, 0)
38 | 
39 | 
40 | def resize1():
41 |     buf = DataFrame()
42 |     for col in df.columns:
43 |         for row in df.index:
44 |             buf = buf.set_value(row, col, 5.)
45 |     return buf
46 | 
47 | 
48 | def resize2():
49 |     from collections import defaultdict
50 | 
51 |     buf = defaultdict(dict)
52 |     for col in df.columns:
53 |         for row in df.index:
54 |             buf[col][row] = 5.
55 | 
56 |     return DataFrame(buf)
57 | 


--------------------------------------------------------------------------------
/bench/bench_groupby.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | from pandas.util.testing import rands
 3 | from pandas.compat import range
 4 | 
 5 | import string
 6 | import random
 7 | 
 8 | k = 20000
 9 | n = 10
10 | 
11 | foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n)
12 | foo2 = list(foo)
13 | random.shuffle(foo)
14 | random.shuffle(foo2)
15 | 
16 | df = DataFrame({'A': foo,
17 |                 'B': foo2,
18 |                 'C': np.random.randn(n * k)})
19 | 
20 | import pandas._sandbox as sbx
21 | 
22 | 
23 | def f():
24 |     table = sbx.StringHashTable(len(df))
25 |     ret = table.factorize(df['A'])
26 |     return ret
27 | 
28 | 
29 | def g():
30 |     table = sbx.PyObjectHashTable(len(df))
31 |     ret = table.factorize(df['A'])
32 |     return ret
33 | 
34 | ret = f()
35 | 
36 | """
37 | import pandas._tseries as lib
38 | 
39 | f = np.std
40 | 
41 | 
42 | grouped = df.groupby(['A', 'B'])
43 | 
44 | label_list = [ping.labels for ping in grouped.groupings]
45 | shape = [len(ping.ids) for ping in grouped.groupings]
46 | 
47 | from pandas.core.groupby import get_group_index
48 | 
49 | 
50 | group_index = get_group_index(label_list, shape).astype('i4')
51 | 
52 | ngroups = np.prod(shape)
53 | 
54 | indexer = lib.groupsort_indexer(group_index, ngroups)
55 | 
56 | values = df['C'].values.take(indexer)
57 | group_index = group_index.take(indexer)
58 | 
59 | f = lambda x: x.std(ddof=1)
60 | 
61 | grouper = lib.Grouper(df['C'], np.ndarray.std, group_index, ngroups)
62 | result = grouper.get_result()
63 | 
64 | expected = grouped.std()
65 | """
66 | 


--------------------------------------------------------------------------------
/bench/bench_khash_dict.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Some comparisons of khash.h to Python dict
 3 | """
 4 | from __future__ import print_function
 5 | 
 6 | import numpy as np
 7 | import os
 8 | 
 9 | from vbench.api import Benchmark
10 | from pandas.util.testing import rands
11 | from pandas.compat import range
12 | import pandas._tseries as lib
13 | import pandas._sandbox as sbx
14 | import time
15 | 
16 | import psutil
17 | 
18 | pid = os.getpid()
19 | proc = psutil.Process(pid)
20 | 
21 | 
22 | def object_test_data(n):
23 |     pass
24 | 
25 | 
26 | def string_test_data(n):
27 |     return np.array([rands(10) for _ in range(n)], dtype='O')
28 | 
29 | 
30 | def int_test_data(n):
31 |     return np.arange(n, dtype='i8')
32 | 
33 | N = 1000000
34 | 
35 | #----------------------------------------------------------------------
36 | # Benchmark 1: map_locations
37 | 
38 | 
39 | def map_locations_python_object():
40 |     arr = string_test_data(N)
41 |     return _timeit(lambda: lib.map_indices_object(arr))
42 | 
43 | 
44 | def map_locations_khash_object():
45 |     arr = string_test_data(N)
46 | 
47 |     def f():
48 |         table = sbx.PyObjectHashTable(len(arr))
49 |         table.map_locations(arr)
50 |     return _timeit(f)
51 | 
52 | 
53 | def _timeit(f, iterations=10):
54 |     start = time.time()
55 |     for _ in range(iterations):
56 |         foo = f()
57 |     elapsed = time.time() - start
58 |     return elapsed
59 | 
60 | #----------------------------------------------------------------------
61 | # Benchmark 2: lookup_locations
62 | 
63 | 
64 | def lookup_python(values):
65 |     table = lib.map_indices_object(values)
66 |     return _timeit(lambda: lib.merge_indexer_object(values, table))
67 | 
68 | 
69 | def lookup_khash(values):
70 |     table = sbx.PyObjectHashTable(len(values))
71 |     table.map_locations(values)
72 |     locs = table.lookup_locations(values)
73 |     # elapsed = _timeit(lambda: table.lookup_locations2(values))
74 |     return table
75 | 
76 | 
77 | def leak(values):
78 |     for _ in range(100):
79 |         print(proc.get_memory_info())
80 |         table = lookup_khash(values)
81 |         # table.destroy()
82 | 
83 | arr = string_test_data(N)
84 | 
85 | #----------------------------------------------------------------------
86 | # Benchmark 3: unique
87 | 
88 | #----------------------------------------------------------------------
89 | # Benchmark 4: factorize
90 | 


--------------------------------------------------------------------------------
/bench/bench_pivot.R:
--------------------------------------------------------------------------------
 1 | library(reshape2)
 2 | 
 3 | 
 4 | n <- 100000
 5 | a.size <- 5
 6 | b.size <- 5
 7 | 
 8 | data <- data.frame(a=sample(letters[1:a.size], n, replace=T),
 9 |                    b=sample(letters[1:b.size], n, replace=T),
10 |                    c=rnorm(n),
11 |                    d=rnorm(n))
12 | 
13 | timings <- numeric()
14 | 
15 | # acast(melt(data, id=c("a", "b")), a ~ b, mean)
16 | # acast(melt(data, id=c("a", "b")), a + b ~ variable, mean)
17 | 
18 | for (i in 1:10) {
19 |   gc()
20 |   tim <- system.time(acast(melt(data, id=c("a", "b")), a ~ b, mean,
21 |                            subset=.(variable=="c")))
22 |   timings[i] = tim[3]
23 | }
24 | 
25 | mean(timings)
26 | 
27 | acast(melt(data, id=c("a", "b")), a ~ b, mean, subset=.(variable="c"))
28 | 


--------------------------------------------------------------------------------
/bench/bench_pivot.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | import string
 3 | 
 4 | 
 5 | n = 100000
 6 | asize = 5
 7 | bsize = 5
 8 | 
 9 | letters = np.asarray(list(string.letters), dtype=object)
10 | 
11 | data = DataFrame(dict(foo=letters[:asize][np.random.randint(0, asize, n)],
12 |                       bar=letters[:bsize][np.random.randint(0, bsize, n)],
13 |                       baz=np.random.randn(n),
14 |                       qux=np.random.randn(n)))
15 | 
16 | table = pivot_table(data, xby=['foo', 'bar'])
17 | 


--------------------------------------------------------------------------------
/bench/bench_sparse.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | 
 4 | from pandas import *
 5 | import pandas.core.sparse as spm
 6 | import pandas.compat as compat
 7 | reload(spm)
 8 | from pandas.core.sparse import *
 9 | 
10 | N = 10000.
11 | 
12 | arr1 = np.arange(N)
13 | index = Index(np.arange(N))
14 | 
15 | off = N // 10
16 | arr1[off: 2 * off] = np.NaN
17 | arr1[4 * off: 5 * off] = np.NaN
18 | arr1[8 * off: 9 * off] = np.NaN
19 | 
20 | arr2 = np.arange(N)
21 | arr2[3 * off // 2: 2 * off + off // 2] = np.NaN
22 | arr2[8 * off + off // 2: 9 * off + off // 2] = np.NaN
23 | 
24 | s1 = SparseSeries(arr1, index=index)
25 | s2 = SparseSeries(arr2, index=index)
26 | 
27 | is1 = SparseSeries(arr1, kind='integer', index=index)
28 | is2 = SparseSeries(arr2, kind='integer', index=index)
29 | 
30 | s1_dense = s1.to_dense()
31 | s2_dense = s2.to_dense()
32 | 
33 | if 'linux' in sys.platform:
34 |     pth = '/home/wesm/code/pandas/example'
35 | else:
36 |     pth = '/Users/wesm/code/pandas/example'
37 | 
38 | dm = DataFrame.load(pth)
39 | 
40 | sdf = dm.to_sparse()
41 | 
42 | 
43 | def new_data_like(sdf):
44 |     new_data = {}
45 |     for col, series in compat.iteritems(sdf):
46 |         new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)),
47 |                                      index=sdf.index,
48 |                                      sparse_index=series.sp_index,
49 |                                      fill_value=series.fill_value)
50 | 
51 |     return SparseDataFrame(new_data)
52 | 
53 | # data = {}
54 | # for col, ser in dm.iteritems():
55 | #     data[col] = SparseSeries(ser)
56 | 
57 | dwp = Panel.fromDict({'foo': dm})
58 | # sdf = SparseDataFrame(data)
59 | 
60 | 
61 | lp = stack_sparse_frame(sdf)
62 | 
63 | 
64 | swp = SparsePanel({'A': sdf})
65 | swp = SparsePanel({'A': sdf,
66 |                    'B': sdf,
67 |                    'C': sdf,
68 |                    'D': sdf})
69 | 
70 | y = sdf
71 | x = SparsePanel({'x1': sdf + new_data_like(sdf) / 10,
72 |                  'x2': sdf + new_data_like(sdf) / 10})
73 | 
74 | dense_y = sdf
75 | dense_x = x.to_dense()
76 | 
77 | # import hotshot, hotshot.stats
78 | # prof = hotshot.Profile('test.prof')
79 | 
80 | # benchtime, stones = prof.runcall(ols, y=y, x=x)
81 | 
82 | # prof.close()
83 | 
84 | # stats = hotshot.stats.load('test.prof')
85 | 
86 | dense_model = ols(y=dense_y, x=dense_x)
87 | 
88 | import pandas.stats.plm as plm
89 | import pandas.stats.interface as face
90 | reload(plm)
91 | reload(face)
92 | 
93 | # model = face.ols(y=y, x=x)
94 | 


--------------------------------------------------------------------------------
/bench/bench_take_indexing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import numpy as np
 3 | 
 4 | from pandas import *
 5 | import pandas._tseries as lib
 6 | 
 7 | from pandas import DataFrame
 8 | import timeit
 9 | from pandas.compat import zip
10 | 
11 | setup = """
12 | from pandas import Series
13 | import pandas._tseries as lib
14 | import random
15 | import numpy as np
16 | 
17 | import random
18 | n = %d
19 | k = %d
20 | arr = np.random.randn(n, k)
21 | indexer = np.arange(n, dtype=np.int32)
22 | indexer = indexer[::-1]
23 | """
24 | 
25 | sizes = [100, 1000, 10000, 100000]
26 | iters = [1000, 1000, 100, 1]
27 | 
28 | fancy_2d = []
29 | take_2d = []
30 | cython_2d = []
31 | 
32 | n = 1000
33 | 
34 | 
35 | def _timeit(stmt, size, k=5, iters=1000):
36 |     timer = timeit.Timer(stmt=stmt, setup=setup % (sz, k))
37 |     return timer.timeit(n) / n
38 | 
39 | for sz, its in zip(sizes, iters):
40 |     print(sz)
41 |     fancy_2d.append(_timeit('arr[indexer]', sz, iters=its))
42 |     take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its))
43 |     cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its))
44 | 
45 | df = DataFrame({'fancy': fancy_2d,
46 |                 'take': take_2d,
47 |                 'cython': cython_2d})
48 | 
49 | print(df)
50 | 
51 | from pandas.rpy.common import r
52 | r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)')
53 | r('set.seed(12345')
54 | r('indexer <- sample(1:10000)')
55 | r('mat[indexer,]')
56 | 


--------------------------------------------------------------------------------
/bench/bench_with_subset.R:
--------------------------------------------------------------------------------
 1 | library(microbenchmark)
 2 | library(data.table)
 3 | 
 4 | 
 5 | data.frame.subset.bench <- function (n=1e7, times=30) {
 6 |     df <- data.frame(a=rnorm(n), b=rnorm(n), c=rnorm(n))
 7 |     print(microbenchmark(subset(df, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c),
 8 |                          times=times))
 9 | }
10 | 
11 | 
12 | # data.table allows something very similar to query with an expression
13 | # but we have chained comparisons AND we're faster BOO YAH!
14 | data.table.subset.expression.bench <- function (n=1e7, times=30) {
15 |     dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n))
16 |     print(microbenchmark(dt[, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c],
17 |                          times=times))
18 | }
19 | 
20 | 
21 | # compare against subset with data.table for good measure
22 | data.table.subset.bench <- function (n=1e7, times=30) {
23 |     dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n))
24 |     print(microbenchmark(subset(dt, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c),
25 |                          times=times))
26 | }
27 | 
28 | 
29 | data.frame.with.bench <- function (n=1e7, times=30) {
30 |     df <- data.frame(a=rnorm(n), b=rnorm(n), c=rnorm(n))
31 | 
32 |     print(microbenchmark(with(df, a + b * (c ^ 2 + b ^ 2 - a) / (a * c) ^ 3),
33 |                          times=times))
34 | }
35 | 
36 | 
37 | data.table.with.bench <- function (n=1e7, times=30) {
38 |     dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n))
39 |     print(microbenchmark(with(dt, a + b * (c ^ 2 + b ^ 2 - a) / (a * c) ^ 3),
40 |                          times=times))
41 | }
42 | 
43 | 
44 | bench <- function () {
45 |     data.frame.subset.bench()
46 |     data.table.subset.expression.bench()
47 |     data.table.subset.bench()
48 |     data.frame.with.bench()
49 |     data.table.with.bench()
50 | }
51 | 
52 | 
53 | bench()
54 | 


--------------------------------------------------------------------------------
/bench/duplicated.R:
--------------------------------------------------------------------------------
 1 | N <- 100000
 2 | 
 3 | k1 = rep(NA, N)
 4 | k2 = rep(NA, N)
 5 | for (i in 1:N){
 6 |   k1[i] <- paste(sample(letters, 1), collapse="")
 7 |   k2[i] <- paste(sample(letters, 1), collapse="")
 8 | }
 9 | df <- data.frame(a=k1, b=k2, c=rep(1:100, N / 100))
10 | df2 <- data.frame(a=k1, b=k2)
11 | 
12 | timings <- numeric()
13 | timings2 <- numeric()
14 | for (i in 1:50) {
15 |   gc()
16 |   timings[i] = system.time(deduped <- df[!duplicated(df),])[3]
17 |   gc()
18 |   timings2[i] = system.time(deduped <- df[!duplicated(df[,c("a", "b")]),])[3]
19 | }
20 | 
21 | mean(timings)
22 | mean(timings2)
23 | 


--------------------------------------------------------------------------------
/bench/larry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/bench/larry.py


--------------------------------------------------------------------------------
/bench/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import itertools
 3 | import collections
 4 | import scipy.ndimage as ndi
 5 | from pandas.compat import zip, range
 6 | 
 7 | N = 10000
 8 | 
 9 | lat = np.random.randint(0, 360, N)
10 | lon = np.random.randint(0, 360, N)
11 | data = np.random.randn(N)
12 | 
13 | 
14 | def groupby1(lat, lon, data):
15 |     indexer = np.lexsort((lon, lat))
16 |     lat = lat.take(indexer)
17 |     lon = lon.take(indexer)
18 |     sorted_data = data.take(indexer)
19 | 
20 |     keys = 1000. * lat + lon
21 |     unique_keys = np.unique(keys)
22 |     bounds = keys.searchsorted(unique_keys)
23 | 
24 |     result = group_agg(sorted_data, bounds, lambda x: x.mean())
25 | 
26 |     decoder = keys.searchsorted(unique_keys)
27 | 
28 |     return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result))
29 | 
30 | 
31 | def group_mean(lat, lon, data):
32 |     indexer = np.lexsort((lon, lat))
33 |     lat = lat.take(indexer)
34 |     lon = lon.take(indexer)
35 |     sorted_data = data.take(indexer)
36 | 
37 |     keys = 1000 * lat + lon
38 |     unique_keys = np.unique(keys)
39 | 
40 |     result = ndi.mean(sorted_data, labels=keys, index=unique_keys)
41 |     decoder = keys.searchsorted(unique_keys)
42 | 
43 |     return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result))
44 | 
45 | 
46 | def group_mean_naive(lat, lon, data):
47 |     grouped = collections.defaultdict(list)
48 |     for lt, ln, da in zip(lat, lon, data):
49 |         grouped[(lt, ln)].append(da)
50 | 
51 |     averaged = dict((ltln, np.mean(da)) for ltln, da in grouped.items())
52 | 
53 |     return averaged
54 | 
55 | 
56 | def group_agg(values, bounds, f):
57 |     N = len(values)
58 |     result = np.empty(len(bounds), dtype=float)
59 |     for i, left_bound in enumerate(bounds):
60 |         if i == len(bounds) - 1:
61 |             right_bound = N
62 |         else:
63 |             right_bound = bounds[i + 1]
64 | 
65 |         result[i] = f(values[left_bound: right_bound])
66 | 
67 |     return result
68 | 
69 | # for i in range(10):
70 | #     groupby1(lat, lon, data)
71 | 


--------------------------------------------------------------------------------
/bench/zoo_bench.R:
--------------------------------------------------------------------------------
 1 | library(zoo)
 2 | library(xts)
 3 | library(fts)
 4 | library(tseries)
 5 | library(its)
 6 | library(xtable)
 7 | 
 8 | ## indices = rep(NA, 100000)
 9 | ## for (i in 1:100000)
10 | ##   indices[i] <- paste(sample(letters, 10), collapse="")
11 | 
12 | 
13 | 
14 | ## x <- zoo(rnorm(100000), indices)
15 | ## y <- zoo(rnorm(90000), indices[sample(1:100000, 90000)])
16 | 
17 | ## indices <- as.POSIXct(1:100000)
18 | 
19 | indices <- as.POSIXct(Sys.Date()) + seq(1, 100000000, 100)
20 | 
21 | sz <- 500000
22 | 
23 | ## x <- xts(rnorm(sz), sample(indices, sz))
24 | ## y <- xts(rnorm(sz), sample(indices, sz))
25 | 
26 | zoo.bench <- function(){
27 |     x <- zoo(rnorm(sz), sample(indices, sz))
28 |     y <- zoo(rnorm(sz), sample(indices, sz))
29 |     timeit(function() {x + y})
30 | }
31 | 
32 | xts.bench <- function(){
33 |     x <- xts(rnorm(sz), sample(indices, sz))
34 |     y <- xts(rnorm(sz), sample(indices, sz))
35 |     timeit(function() {x + y})
36 | }
37 | 
38 | fts.bench <- function(){
39 |     x <- fts(rnorm(sz), sort(sample(indices, sz)))
40 |     y <- fts(rnorm(sz), sort(sample(indices, sz))
41 |     timeit(function() {x + y})
42 | }
43 | 
44 | its.bench <- function(){
45 |     x <- its(rnorm(sz), sort(sample(indices, sz)))
46 |     y <- its(rnorm(sz), sort(sample(indices, sz)))
47 |     timeit(function() {x + y})
48 | }
49 | 
50 | irts.bench <- function(){
51 |     x <- irts(sort(sample(indices, sz)), rnorm(sz))
52 |     y <- irts(sort(sample(indices, sz)), rnorm(sz))
53 |     timeit(function() {x + y})
54 | }
55 | 
56 | timeit <- function(f){
57 |   timings <- numeric()
58 |   for (i in 1:10) {
59 |     gc()
60 |     timings[i] = system.time(f())[3]
61 |   }
62 |   mean(timings)
63 | }
64 | 
65 | bench <- function(){
66 |   results <- c(xts.bench(), fts.bench(), its.bench(), zoo.bench())
67 |   names <- c("xts", "fts", "its", "zoo")
68 |   data.frame(results, names)
69 | }
70 | 
71 | result <- bench()
72 | 


--------------------------------------------------------------------------------
/bench/zoo_bench.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | from pandas.util.testing import rands
 3 | 
 4 | n = 1000000
 5 | # indices = Index([rands(10) for _ in xrange(n)])
 6 | 
 7 | 
 8 | def sample(values, k):
 9 |     sampler = np.random.permutation(len(values))
10 |     return values.take(sampler[:k])
11 | sz = 500000
12 | rng = np.arange(0, 10000000000000, 10000000)
13 | stamps = np.datetime64(datetime.now()).view('i8') + rng
14 | idx1 = np.sort(sample(stamps, sz))
15 | idx2 = np.sort(sample(stamps, sz))
16 | ts1 = Series(np.random.randn(sz), idx1)
17 | ts2 = Series(np.random.randn(sz), idx2)
18 | 
19 | 
20 | # subsample_size = 90000
21 | 
22 | # x = Series(np.random.randn(100000), indices)
23 | # y = Series(np.random.randn(subsample_size),
24 | #            index=sample(indices, subsample_size))
25 | 
26 | 
27 | # lx = larry(np.random.randn(100000), [list(indices)])
28 | # ly = larry(np.random.randn(subsample_size), [list(y.index)])
29 | 
30 | # Benchmark 1: Two 1-million length time series (int64-based index) with
31 | # randomly chosen timestamps
32 | 
33 | # Benchmark 2: Join two 5-variate time series DataFrames (outer and inner join)
34 | 
35 | # df1 = DataFrame(np.random.randn(1000000, 5), idx1, columns=range(5))
36 | # df2 = DataFrame(np.random.randn(1000000, 5), idx2, columns=range(5, 10))
37 | 


--------------------------------------------------------------------------------
/ci/README.txt:
--------------------------------------------------------------------------------
 1 | Travis is a ci service that's well-integrated with GitHub.
 2 | The following types of breakage should be detected
 3 | by Travis builds:
 4 | 
 5 | 1) Failing tests on any supported version of Python.
 6 | 2) Pandas should install and the tests should run if no optional deps are installed.
 7 | That also means tests which rely on optional deps need to raise SkipTest()
 8 | if the dep is missing.
 9 | 3) unicode related fails when running under exotic locales.
10 | 
11 | We tried running the vbench suite for a while, but with varying load
12 | on Travis machines, that wasn't useful.
13 | 
14 | Travis currently (4/2013) has a 5-job concurrency limit. Exceeding it
15 | basically doubles the total runtime for a commit through travis, and
16 | since dep+pandas installation is already quite long, this should become
17 | a hard limit on concurrent travis runs.
18 | 


--------------------------------------------------------------------------------
/ci/after_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #wget https://raw.github.com/y-p/ScatterCI-CLI/master/scatter_cli.py
 4 | #chmod u+x scatter_cli.py
 5 | 
 6 | pip install -I requests==2.1.0
 7 | echo "${TRAVIS_PYTHON_VERSION:0:4}"
 8 | if [ x"${TRAVIS_PYTHON_VERSION:0:4}" == x"2.6" ]; then
 9 |     pip install simplejson;
10 | fi
11 | 
12 | # ScatterCI accepts a build log, but currently does nothing with it.
13 | echo '' > /tmp/build.log
14 | 
15 | # nore exposed in the build logs
16 | #export SCATTERCI_ACCESS_KEY=
17 | #export SCATTERCI_HOST=
18 | 
19 | # Generate a json file describing system and dep versions
20 | ci/print_versions.py -j /tmp/env.json
21 | 
22 | # nose ran using "--with-xunit --xunit-file nosetest.xml" and generated /tmp/nosetest.xml
23 | # Will timeout if server not available, and should not fail the build
24 | #python scatter_cli.py --xunit-file /tmp/nosetests.xml  --log-file /tmp/build.log  --env-file /tmp/env.json --build-name "$JOB_NAME" --succeed
25 | 
26 | true # never fail because bad things happened here
27 | 


--------------------------------------------------------------------------------
/ci/before_install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # If envars.sh determined we're running  in an authorized fork
 4 | # and the user opted in to the network cache,and that cached versions
 5 | # are available on the cache server, download and deploy the cached
 6 | # files to the local filesystem
 7 | 
 8 | echo "inside $0"
 9 | 
10 | # overview
11 | sudo apt-get update $APT_ARGS # run apt-get update for all versions
12 | 
13 | true # never fail because bad things happened here
14 | 


--------------------------------------------------------------------------------
/ci/build_docs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | cd "$TRAVIS_BUILD_DIR"
 5 | 
 6 | git show --pretty="format:" --name-only HEAD~5.. --first-parent | grep -P "rst|txt|doc"
 7 | 
 8 | if [ "$?" != "0" ]; then
 9 |     echo "Skipping doc build, none were modified"
10 |     # nope, skip docs build
11 |     exit 0
12 | fi
13 | 
14 | 
15 | if [ x"$DOC_BUILD" != x"" ]; then
16 |     # we're running network tests, let's build the docs in the meantim
17 |     echo "Will build docs"
18 |     pip install sphinx==1.1.3 ipython==1.1.0
19 | 
20 |     mv "$TRAVIS_BUILD_DIR"/doc /tmp
21 |     cd /tmp/doc
22 | 
23 |     rm /tmp/doc/source/api.rst # no R
24 |     rm /tmp/doc/source/r_interface.rst # no R
25 | 
26 |     echo ############################### > /tmp/doc.log
27 |     echo # Log file for the doc build  # > /tmp/doc.log
28 |     echo ############################### > /tmp/doc.log
29 |     echo "" > /tmp/doc.log
30 |     echo -e "y\n" | ./make.py --no-api 2>&1
31 | 
32 |     cd /tmp/doc/build/html
33 |     git config --global user.email "pandas-docs-bot@localhost.foo"
34 |     git config --global user.name "pandas-docs-bot"
35 | 
36 |     git init
37 |     touch README
38 |     git add README
39 |     git commit -m "Initial commit" --allow-empty
40 |     git branch gh-pages
41 |     git checkout gh-pages
42 |     touch .nojekyll
43 |     git add --all .
44 |     git commit -m "Version" --allow-empty
45 |     git remote add origin https://$GH_TOKEN@github.com/pandas-docs/pandas-docs-travis
46 |     git push origin gh-pages -f
47 | fi
48 | 
49 | exit 0
50 | 


--------------------------------------------------------------------------------
/ci/ironcache/get.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import sys
 5 | import re
 6 | import os
 7 | import time
 8 | import json
 9 | import base64
10 | from hashlib import sha1
11 | from iron_cache import *
12 | import traceback as tb
13 | 
14 | key='KEY.%s.%s' %(os.environ.get('TRAVIS_REPO_SLUG','unk'),
15 |                    os.environ.get('JOB_NAME','unk'))
16 | print(key)
17 | 
18 | if sys.version_info[0] > 2:
19 |     key = bytes(key,encoding='utf8')
20 | 
21 | key = sha1(key).hexdigest()[:8]+'.'
22 | 
23 | b = b''
24 | cache = IronCache()
25 | for i in range(20):
26 |     print("getting %s" % key+str(i))
27 |     try:
28 |         item = cache.get(cache="travis", key=key+str(i))
29 |         v = item.value
30 |         if sys.version_info[0] > 2:
31 |             v = bytes(v,encoding='utf8')
32 |         b += bytes(base64.b64decode(v))
33 |     except Exception as e:
34 |         try:
35 |             print(tb.format_exc(e))
36 |         except:
37 |             print("exception during exception, oh my")
38 |         break
39 | 
40 | with open(os.path.join(os.environ.get('HOME',''),"ccache.7z"),'wb') as f:
41 |     f.write(b)
42 | 


--------------------------------------------------------------------------------
/ci/ironcache/put.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import sys
 5 | import re
 6 | import os
 7 | import time
 8 | import json
 9 | import base64
10 | from hashlib import sha1
11 | from iron_cache import *
12 | 
13 | key='KEY.%s.%s' %(os.environ.get('TRAVIS_REPO_SLUG','unk'),
14 |                    os.environ.get('JOB_NAME','unk'))
15 | 
16 | key='KEY.%s.%s' %(os.environ.get('TRAVIS_REPO_SLUG','unk'),
17 |                    os.environ.get('JOB_NAME','unk'))
18 | print(key)
19 | 
20 | if sys.version_info[0] > 2:
21 |     key = bytes(key,encoding='utf8')
22 | 
23 | key = sha1(key).hexdigest()[:8]+'.'
24 | 
25 | os.chdir(os.environ.get('HOME'))
26 | 
27 | cache = IronCache()
28 | 
29 | i=0
30 | 
31 | for i, fname in enumerate(sorted([x for x in os.listdir('.') if re.match("ccache.\d+$",x)])):
32 |     print("Putting %s" % key+str(i))
33 |     with open(fname,"rb") as f:
34 |         s= f.read()
35 |         value=base64.b64encode(s)
36 |         if isinstance(value, bytes):
37 |             value = value.decode('ascii')
38 |     item = cache.put(cache="travis", key=key+str(i), value=value,options=dict(expires_in=24*60*60))
39 | 
40 | # print("foo")
41 | for i in range(i+1,20):
42 | 
43 |     try:
44 |         item = cache.delete(key+str(i),cache='travis')
45 |         print("Deleted %s" % key+str(i))
46 |     except:
47 |         break
48 |         pass
49 | 


--------------------------------------------------------------------------------
/ci/prep_ccache.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ "$IRON_TOKEN" ]; then
 4 | 
 5 |     home_dir=$(pwd)
 6 | 
 7 |     # install the compiler cache
 8 |     sudo apt-get $APT_ARGS install ccache p7zip-full
 9 |     # iron_cache, pending py3 fixes upstream
10 |     pip install -I --allow-external --allow-insecure git+https://github.com/iron-io/iron_cache_python.git@8a451c7d7e4d16e0c3bedffd0f280d5d9bd4fe59#egg=iron_cache
11 | 
12 |     python ci/ironcache/get.py
13 |     ccache -C
14 | 
15 |     clear_cache=0
16 |     if [ -f ~/ccache.7z ]; then
17 |         echo "Cache retrieved"
18 |         clear_cache=1
19 |         cd $HOME
20 |         7za e $HOME/ccache.7z
21 |         # ls -l $HOME
22 |         cd /
23 |         tar xvf $HOME/ccache
24 |         rm -rf $HOME/ccache.7z
25 |         rm -rf $HOME/ccache
26 | 
27 |     fi
28 | 
29 |     # did the last commit change cython files?
30 |     cd $home_dir
31 | 
32 |     retval=$(git diff HEAD~3 --numstat | grep -P "pyx|pxd"|wc -l)
33 |     echo "number of cython files changed: $retval"
34 | 
35 |     if [ $clear_cache -eq 1 ] && [ $retval -eq 0 ]
36 |     then
37 |         # nope, reuse cython files
38 |         echo "Will reuse cached cython file"
39 |         touch "$TRAVIS_BUILD_DIR"/pandas/*.c
40 |         touch "$TRAVIS_BUILD_DIR"/pandas/src/*.c
41 |         touch "$TRAVIS_BUILD_DIR"/pandas/*.cpp
42 |     else
43 |         echo "Rebuilding cythonized files"
44 |     fi
45 | fi
46 | 
47 | exit 0
48 | 


--------------------------------------------------------------------------------
/ci/print_skipped.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import math
 5 | import xml.etree.ElementTree as et
 6 | 
 7 | 
 8 | def parse_results(filename):
 9 |     tree = et.parse(filename)
10 |     root = tree.getroot()
11 |     skipped = []
12 | 
13 |     current_class = old_class = ''
14 |     i = 1
15 |     assert i - 1 == len(skipped)
16 |     for el in root.findall('testcase'):
17 |         cn = el.attrib['classname']
18 |         for sk in el.findall('skipped'):
19 |             old_class = current_class
20 |             current_class = cn
21 |             name = '{classname}.{name}'.format(classname=current_class,
22 |                                                name=el.attrib['name'])
23 |             msg = sk.attrib['message']
24 |             out = ''
25 |             if old_class != current_class:
26 |                 ndigits = int(math.log(i, 10) + 1)
27 |                 out += ('-' * (len(name + msg) + 4 + ndigits) + '\n') # 4 for : + space + # + space
28 |             out += '#{i} {name}: {msg}'.format(i=i, name=name, msg=msg)
29 |             skipped.append(out)
30 |             i += 1
31 |             assert i - 1 == len(skipped)
32 |     assert i - 1 == len(skipped)
33 |     assert len(skipped) == int(root.attrib['skip'])
34 |     return '\n'.join(skipped)
35 | 
36 | 
37 | def main(args):
38 |     print('SKIPPED TESTS:')
39 |     print(parse_results(args.filename))
40 |     return 0
41 | 
42 | 
43 | def parse_args():
44 |     import argparse
45 |     parser = argparse.ArgumentParser()
46 |     parser.add_argument('filename', help='XUnit file to parse')
47 |     return parser.parse_args()
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     sys.exit(main(parse_args()))
52 | 


--------------------------------------------------------------------------------
/ci/print_versions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | def show_versions(as_json=False):
 5 |     import imp
 6 |     import os
 7 |     fn = __file__
 8 |     this_dir = os.path.dirname(fn)
 9 |     pandas_dir = os.path.abspath(os.path.join(this_dir, ".."))
10 |     sv_path = os.path.join(pandas_dir, 'pandas', 'util')
11 |     mod = imp.load_module(
12 |         'pvmod', *imp.find_module('print_versions', [sv_path]))
13 |     return mod.show_versions(as_json)
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     # optparse is 2.6-safe
18 |     from optparse import OptionParser
19 |     parser = OptionParser()
20 |     parser.add_option("-j", "--json", metavar="FILE", nargs=1,
21 |                       help="Save output as JSON into file, pass in '-' to output to stdout")
22 | 
23 |     (options, args) = parser.parse_args()
24 | 
25 |     if options.json == "-":
26 |         options.json = True
27 | 
28 |     show_versions(as_json=options.json)
29 | 


--------------------------------------------------------------------------------
/ci/requirements-2.6.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.6.1
 2 | cython==0.19.1
 3 | python-dateutil==1.5
 4 | pytz==2013b
 5 | http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz
 6 | html5lib==1.0b2
 7 | numexpr==1.4.2
 8 | sqlalchemy==0.7.1
 9 | pymysql==0.6.0
10 | psycopg2==2.5
11 | scipy==0.11.0
12 | statsmodels==0.4.3
13 | xlwt==0.7.5
14 | openpyxl==2.0.3
15 | xlsxwriter==0.4.6
16 | xlrd==0.9.2
17 | 


--------------------------------------------------------------------------------
/ci/requirements-2.7.txt:
--------------------------------------------------------------------------------
 1 | python-dateutil==2.1
 2 | pytz==2013b
 3 | xlwt==0.7.5
 4 | numpy==1.8.1
 5 | cython==0.19.1
 6 | bottleneck==0.6.0
 7 | numexpr==2.2.2
 8 | tables==2.3.1
 9 | matplotlib==1.3.1
10 | openpyxl==1.6.2
11 | xlsxwriter==0.4.6
12 | xlrd==0.9.2
13 | patsy==0.1.0
14 | sqlalchemy==0.9.6
15 | pymysql==0.6.1
16 | psycopg2==2.5.2
17 | html5lib==1.0b2
18 | lxml==3.2.1
19 | scipy==0.13.3
20 | beautifulsoup4==4.2.1
21 | statsmodels==0.5.0
22 | boto==2.26.1
23 | httplib2==0.8
24 | python-gflags==2.0
25 | google-api-python-client==1.2
26 | 


--------------------------------------------------------------------------------
/ci/requirements-2.7_LOCALE.txt:
--------------------------------------------------------------------------------
 1 | python-dateutil
 2 | pytz==2013b
 3 | xlwt==0.7.5
 4 | openpyxl==1.6.2
 5 | xlsxwriter==0.4.6
 6 | xlrd==0.9.2
 7 | numpy==1.6.1
 8 | cython==0.19.1
 9 | bottleneck==0.6.0
10 | matplotlib==1.3.0
11 | patsy==0.1.0
12 | sqlalchemy==0.8.1
13 | html5lib==1.0b2
14 | lxml==3.2.1
15 | scipy==0.10.0
16 | beautifulsoup4==4.2.1
17 | statsmodels==0.4.3
18 | bigquery==2.0.17
19 | 


--------------------------------------------------------------------------------
/ci/requirements-2.7_NUMPY_DEV_1_8_x.txt:
--------------------------------------------------------------------------------
1 | python-dateutil
2 | pytz==2013b
3 | cython==0.19.1
4 | 


--------------------------------------------------------------------------------
/ci/requirements-2.7_NUMPY_DEV_master.txt:
--------------------------------------------------------------------------------
1 | python-dateutil
2 | pytz
3 | cython==0.19.1
4 | 


--------------------------------------------------------------------------------
/ci/requirements-3.2.txt:
--------------------------------------------------------------------------------
 1 | python-dateutil==2.1
 2 | pytz==2013b
 3 | xlsxwriter==0.4.6
 4 | xlrd==0.9.2
 5 | numpy==1.7.1
 6 | cython==0.19.1
 7 | numexpr==2.1
 8 | tables==3.0.0
 9 | matplotlib==1.2.1
10 | patsy==0.1.0
11 | lxml==3.2.1
12 | scipy==0.12.0
13 | beautifulsoup4==4.2.1
14 | statsmodels==0.5.0
15 | 


--------------------------------------------------------------------------------
/ci/requirements-3.3.txt:
--------------------------------------------------------------------------------
 1 | python-dateutil==2.2
 2 | pytz==2013b
 3 | openpyxl==1.6.2
 4 | xlsxwriter==0.4.6
 5 | xlrd==0.9.2
 6 | html5lib==1.0b2
 7 | numpy==1.8.0
 8 | cython==0.19.1
 9 | numexpr==2.3
10 | tables==3.1.0
11 | bottleneck==0.8.0
12 | matplotlib==1.2.1
13 | patsy==0.1.0
14 | lxml==3.2.1
15 | scipy==0.13.3
16 | beautifulsoup4==4.2.1
17 | statsmodels==0.5.0
18 | 


--------------------------------------------------------------------------------
/ci/requirements-3.4.txt:
--------------------------------------------------------------------------------
 1 | python-dateutil
 2 | pytz
 3 | openpyxl
 4 | xlsxwriter
 5 | xlrd
 6 | html5lib
 7 | numpy==1.8.0
 8 | cython==0.20.2
 9 | scipy==0.13.3
10 | numexpr==2.4
11 | tables==3.1.0
12 | bottleneck==0.8.0
13 | matplotlib==1.3.1
14 | patsy
15 | lxml==3.3.5
16 | sqlalchemy==0.9.6
17 | pymysql==0.6.1
18 | psycopg2==2.5.2
19 | beautifulsoup4
20 | 


--------------------------------------------------------------------------------
/ci/script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "inside $0"
 4 | 
 5 | if [ -n "$LOCALE_OVERRIDE" ]; then
 6 |     export LC_ALL="$LOCALE_OVERRIDE";
 7 |     echo "Setting LC_ALL to $LOCALE_OVERRIDE"
 8 |     curdir="$(pwd)"
 9 |     cd /tmp
10 |     pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))'
11 |     python -c "$pycmd"
12 |     cd "$curdir"
13 | fi
14 | 
15 | # conditionally build and upload docs to GH/pandas-docs/pandas-docs/travis
16 | "$TRAVIS_BUILD_DIR"/ci/build_docs.sh 2>&1 > /tmp/doc.log &
17 | # doc build log will be shown after tests
18 | 
19 | echo nosetests --exe -w /tmp -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml
20 | nosetests --exe -w /tmp -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml
21 | 
22 | RET="$?"
23 | 
24 | # wait until subprocesses finish (build_docs.sh)
25 | wait
26 | 
27 | exit "$RET"
28 | 


--------------------------------------------------------------------------------
/ci/speedpack/Vagrantfile:
--------------------------------------------------------------------------------
 1 | # -*- mode: ruby -*-
 2 | # vi: set ft=ruby :
 3 | Vagrant.configure("2") do |config|
 4 |   config.vm.box = "precise64"
 5 |   config.vm.box_url = "http://files.vagrantup.com/precise64.box"
 6 | 
 7 | #  config.vbguest.auto_update = true
 8 | #  config.vbguest.no_remote = true
 9 | 
10 |   config.vm.synced_folder File.expand_path("..", Dir.pwd), "/reqf"
11 |   config.vm.synced_folder "wheelhouse", "/wheelhouse"
12 | 
13 |   config.vm.provider :virtualbox do |vb|
14 |     vb.customize ["modifyvm", :id, "--cpus", "4"]
15 |     vb.customize ["modifyvm", :id, "--memory", "2048"]
16 |     vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
17 |     vb.customize ["modifyvm", :id, "--natdnsproxy1", "on"]
18 |   end
19 | 
20 |   config.vm.provision :shell, :path => "build.sh"
21 | 
22 | end
23 | 


--------------------------------------------------------------------------------
/ci/speedpack/nginx/nginx.conf.template:
--------------------------------------------------------------------------------
 1 | #user  nobody;
 2 | worker_processes  1;
 3 | 
 4 | #error_log  logs/error.log;
 5 | #error_log  logs/error.log  notice;
 6 | #error_log  logs/error.log  info;
 7 | 
 8 | #pid        logs/nginx.pid;
 9 | 
10 | 
11 | events {
12 |     worker_connections  1024;
13 | }
14 | 
15 | 
16 | http {
17 |     include       mime.types;
18 |     default_type  application/octet-stream;
19 | 
20 |     #log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
21 |     #                  '$status $body_bytes_sent "$http_referer" '
22 |     #                  '"$http_user_agent" "$http_x_forwarded_for"';
23 | 
24 |     #access_log  logs/access.log  on;
25 | 
26 |     sendfile        on;
27 |     #tcp_nopush     on;
28 | 
29 |     #keepalive_timeout  0;
30 |     keepalive_timeout  65;
31 | 
32 |     #gzip  on;
33 | 
34 |     server {
35 |         listen $OPENSHIFT_IP:$OPENSHIFT_PORT;
36 | 
37 |         access_log  access.log ;
38 |         sendfile        on;
39 | 
40 |         location / {
41 |                 root ../../app-root/data/store/;
42 |                 autoindex on;
43 |         }
44 | 
45 | 
46 |     }
47 | 
48 | }
49 | 


--------------------------------------------------------------------------------
/ci/submit_ccache.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | home_dir=$(pwd)
 4 | ccache -s
 5 | 
 6 | MISSES=$(ccache -s | grep "cache miss" | grep -Po "\d+")
 7 | echo "MISSES: $MISSES"
 8 | 
 9 | if [ x"$MISSES" == x"0" ]; then
10 |     echo "No cache misses detected, skipping upload"
11 |     exit 0
12 | fi
13 | 
14 | if [ "$IRON_TOKEN" ]; then
15 | 
16 |     rm -rf $HOME/ccache.7z
17 | 
18 |     tar cf - $HOME/.ccache \
19 |     "$TRAVIS_BUILD_DIR"/pandas/{index,algos,lib,tslib,parser,hashtable}.c \
20 |     "$TRAVIS_BUILD_DIR"/pandas/src/{sparse,testing}.c \
21 |     "$TRAVIS_BUILD_DIR"/pandas/msgpack.cpp  \
22 |     |  7za a -si $HOME/ccache.7z
23 | 
24 |     split -b 500000 -d $HOME/ccache.7z $HOME/ccache.
25 | 
26 |     python ci/ironcache/put.py
27 | fi;
28 | 
29 | exit 0
30 | 


--------------------------------------------------------------------------------
/doc/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
 1 | {% extends "!autosummary/class.rst" %}
 2 | 
 3 | {% block methods %}
 4 | {% if methods %}
 5 | 
 6 | ..
 7 |    HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
 8 |    .. autosummary::
 9 |       :toctree:
10 |       {% for item in all_methods %}
11 |       {%- if not item.startswith('_') or item in ['__call__'] %}
12 |       {{ name }}.{{ item }}
13 |       {%- endif -%}
14 |       {%- endfor %}
15 | 
16 | {% endif %}
17 | {% endblock %}
18 | 
19 | {% block attributes %}
20 | {% if attributes %}
21 | 
22 | ..
23 |    HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
24 |    .. autosummary::
25 |       :toctree:
26 |       {% for item in all_attributes %}
27 |       {%- if not item.startswith('_') %}
28 |       {{ name }}.{{ item }}
29 |       {%- endif -%}
30 |       {%- endfor %}
31 | 
32 | {% endif %}
33 | {% endblock %}
34 | 


--------------------------------------------------------------------------------
/doc/data/fx_prices:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/data/fx_prices


--------------------------------------------------------------------------------
/doc/data/mindex_ex.csv:
--------------------------------------------------------------------------------
 1 | year,indiv,zit,xit
 2 | 1977,"A",1.2,.6
 3 | 1977,"B",1.5,.5
 4 | 1977,"C",1.7,.8
 5 | 1978,"A",.2,.06
 6 | 1978,"B",.7,.2
 7 | 1978,"C",.8,.3
 8 | 1978,"D",.9,.5
 9 | 1978,"E",1.4,.9
10 | 1979,"C",.2,.15
11 | 1979,"D",.14,.05
12 | 1979,"E",.5,.15
13 | 1979,"F",1.2,.5
14 | 1979,"G",3.4,1.9
15 | 1979,"H",5.4,2.7
16 | 1979,"I",6.4,1.2
17 | 


--------------------------------------------------------------------------------
/doc/data/test.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/data/test.xls


--------------------------------------------------------------------------------
/doc/plots/stats/moment_plots.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import pandas.util.testing as t
 5 | import pandas.stats.moments as m
 6 | 
 7 | 
 8 | def test_series(n=1000):
 9 |     t.N = n
10 |     s = t.makeTimeSeries()
11 |     return s
12 | 
13 | 
14 | def plot_timeseries(*args, **kwds):
15 |     n = len(args)
16 | 
17 |     fig, axes = plt.subplots(n, 1, figsize=kwds.get('size', (10, 5)),
18 |                              sharex=True)
19 |     titles = kwds.get('titles', None)
20 | 
21 |     for k in range(1, n + 1):
22 |         ax = axes[k - 1]
23 |         ts = args[k - 1]
24 |         ax.plot(ts.index, ts.values)
25 | 
26 |         if titles:
27 |             ax.set_title(titles[k - 1])
28 | 
29 |     fig.autofmt_xdate()
30 |     fig.subplots_adjust(bottom=0.10, top=0.95)
31 | 


--------------------------------------------------------------------------------
/doc/plots/stats/moments_ewma.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas.util.testing as t
 3 | import pandas.stats.moments as m
 4 | 
 5 | t.N = 200
 6 | s = t.makeTimeSeries().cumsum()
 7 | 
 8 | plt.figure(figsize=(10, 5))
 9 | plt.plot(s.index, s.values)
10 | plt.plot(s.index, m.ewma(s, 20, min_periods=1).values)
11 | f = plt.gcf()
12 | f.autofmt_xdate()
13 | 
14 | plt.show()
15 | plt.close('all')
16 | 


--------------------------------------------------------------------------------
/doc/plots/stats/moments_ewmvol.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas.util.testing as t
 3 | import pandas.stats.moments as m
 4 | 
 5 | t.N = 500
 6 | ts = t.makeTimeSeries()
 7 | ts[::100] = 20
 8 | 
 9 | s = ts.cumsum()
10 | 
11 | 
12 | plt.figure(figsize=(10, 5))
13 | plt.plot(s.index, m.ewmvol(s, span=50, min_periods=1).values, color='b')
14 | plt.plot(s.index, m.rolling_std(s, 50, min_periods=1).values, color='r')
15 | 
16 | plt.title('Exp-weighted std with shocks')
17 | plt.legend(('Exp-weighted', 'Equal-weighted'))
18 | 
19 | f = plt.gcf()
20 | f.autofmt_xdate()
21 | 
22 | plt.show()
23 | plt.close('all')
24 | 


--------------------------------------------------------------------------------
/doc/plots/stats/moments_expw.py:
--------------------------------------------------------------------------------
 1 | from moment_plots import *
 2 | 
 3 | np.random.seed(1)
 4 | 
 5 | ts = test_series(500) * 10
 6 | 
 7 | # ts[::100] = 20
 8 | 
 9 | s = ts.cumsum()
10 | 
11 | fig, axes = plt.subplots(3, 1, figsize=(8, 10), sharex=True)
12 | 
13 | ax0, ax1, ax2 = axes
14 | 
15 | ax0.plot(s.index, s.values)
16 | ax0.set_title('time series')
17 | 
18 | ax1.plot(s.index, m.ewma(s, span=50, min_periods=1).values, color='b')
19 | ax1.plot(s.index, m.rolling_mean(s, 50, min_periods=1).values, color='r')
20 | ax1.set_title('rolling_mean vs. ewma')
21 | 
22 | line1 = ax2.plot(
23 |     s.index, m.ewmstd(s, span=50, min_periods=1).values, color='b')
24 | line2 = ax2.plot(
25 |     s.index, m.rolling_std(s, 50, min_periods=1).values, color='r')
26 | ax2.set_title('rolling_std vs. ewmstd')
27 | 
28 | fig.legend((line1, line2),
29 |            ('Exp-weighted', 'Equal-weighted'),
30 |            loc='upper right')
31 | fig.autofmt_xdate()
32 | fig.subplots_adjust(bottom=0.10, top=0.95)
33 | 
34 | plt.show()
35 | plt.close('all')
36 | 


--------------------------------------------------------------------------------
/doc/plots/stats/moments_rolling.py:
--------------------------------------------------------------------------------
 1 | from moment_plots import *
 2 | 
 3 | ts = test_series()
 4 | s = ts.cumsum()
 5 | 
 6 | s[20:50] = np.NaN
 7 | s[120:150] = np.NaN
 8 | plot_timeseries(s,
 9 |                 m.rolling_count(s, 50),
10 |                 m.rolling_sum(s, 50, min_periods=10),
11 |                 m.rolling_mean(s, 50, min_periods=10),
12 |                 m.rolling_std(s, 50, min_periods=10),
13 |                 m.rolling_skew(s, 50, min_periods=10),
14 |                 m.rolling_kurt(s, 50, min_periods=10),
15 |                 size=(10, 12),
16 |                 titles=('time series',
17 |                         'rolling_count',
18 |                         'rolling_sum',
19 |                         'rolling_mean',
20 |                         'rolling_std',
21 |                         'rolling_skew',
22 |                         'rolling_kurt'))
23 | plt.show()
24 | plt.close('all')
25 | 


--------------------------------------------------------------------------------
/doc/plots/stats/moments_rolling_binary.py:
--------------------------------------------------------------------------------
 1 | from moment_plots import *
 2 | 
 3 | np.random.seed(1)
 4 | 
 5 | ts = test_series()
 6 | s = ts.cumsum()
 7 | ts2 = test_series()
 8 | s2 = ts2.cumsum()
 9 | 
10 | s[20:50] = np.NaN
11 | s[120:150] = np.NaN
12 | fig, axes = plt.subplots(3, 1, figsize=(8, 10), sharex=True)
13 | 
14 | ax0, ax1, ax2 = axes
15 | 
16 | ax0.plot(s.index, s.values)
17 | ax0.plot(s2.index, s2.values)
18 | ax0.set_title('time series')
19 | 
20 | ax1.plot(s.index, m.rolling_corr(s, s2, 50, min_periods=1).values)
21 | ax1.set_title('rolling_corr')
22 | 
23 | ax2.plot(s.index, m.rolling_cov(s, s2, 50, min_periods=1).values)
24 | ax2.set_title('rolling_cov')
25 | 
26 | fig.autofmt_xdate()
27 | fig.subplots_adjust(bottom=0.10, top=0.95)
28 | 
29 | plt.show()
30 | plt.close('all')
31 | 


--------------------------------------------------------------------------------
/doc/source/_static/df_repr_truncated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/df_repr_truncated.png


--------------------------------------------------------------------------------
/doc/source/_static/eval-perf-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/eval-perf-small.png


--------------------------------------------------------------------------------
/doc/source/_static/eval-perf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/eval-perf.png


--------------------------------------------------------------------------------
/doc/source/_static/legacy_0.10.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/legacy_0.10.h5


--------------------------------------------------------------------------------
/doc/source/_static/query-perf-small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/query-perf-small.png


--------------------------------------------------------------------------------
/doc/source/_static/query-perf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/query-perf.png


--------------------------------------------------------------------------------
/doc/source/_static/stub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/stub


--------------------------------------------------------------------------------
/doc/source/_static/trunc_after.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/trunc_after.png


--------------------------------------------------------------------------------
/doc/source/_static/trunc_before.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/trunc_before.png


--------------------------------------------------------------------------------
/doc/source/contributing.rst:
--------------------------------------------------------------------------------
 1 | .. _contributing:
 2 | 
 3 | **********************
 4 | Contributing to pandas
 5 | **********************
 6 | 
 7 | See the following links:
 8 | 
 9 | - `The developer pages on the website
10 |   <http://pandas.pydata.org/developers.html>`_
11 | - `Guidelines on bug reports and pull requests
12 |   <https://github.com/pydata/pandas/blob/master/CONTRIBUTING.md>`_
13 | - `Some extra tips on using git
14 |   <https://github.com/pydata/pandas/wiki/Using-Git>`_
15 | 
16 | .. include:: ../README.rst
17 | 


--------------------------------------------------------------------------------
/doc/source/themes/nature_with_gtoc/theme.conf:
--------------------------------------------------------------------------------
1 | [theme]
2 | inherit = basic
3 | stylesheet = nature.css
4 | pygments_style = tango
5 | 


--------------------------------------------------------------------------------
/doc/source/v0.7.1.txt:
--------------------------------------------------------------------------------
 1 | .. _whatsnew_0701:
 2 | 
 3 | v.0.7.1 (February 29, 2012)
 4 | ---------------------------
 5 | 
 6 | This release includes a few new features and addresses over a dozen bugs in
 7 | 0.7.0.
 8 | 
 9 | New features
10 | ~~~~~~~~~~~~
11 | 
12 |   - Add ``to_clipboard`` function to pandas namespace for writing objects to
13 |     the system clipboard (:issue:`774`)
14 |   - Add ``itertuples`` method to DataFrame for iterating through the rows of a
15 |     dataframe as tuples (:issue:`818`)
16 |   - Add ability to pass fill_value and method to DataFrame and Series align
17 |     method (:issue:`806`, :issue:`807`)
18 |   - Add fill_value option to reindex, align methods (:issue:`784`)
19 |   - Enable concat to produce DataFrame from Series (:issue:`787`)
20 |   - Add ``between`` method to Series (:issue:`802`)
21 |   - Add HTML representation hook to DataFrame for the IPython HTML notebook
22 |     (:issue:`773`)
23 |   - Support for reading Excel 2007 XML documents using openpyxl
24 | 
25 | Performance improvements
26 | ~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 |   - Improve performance and memory usage of fillna on DataFrame
29 |   - Can concatenate a list of Series along axis=1 to obtain a DataFrame (:issue:`787`)
30 | 
31 | 


--------------------------------------------------------------------------------
/doc/source/v0.7.2.txt:
--------------------------------------------------------------------------------
 1 | .. _whatsnew_0702:
 2 | 
 3 | v.0.7.2 (March 16, 2012)
 4 | ---------------------------
 5 | 
 6 | This release targets bugs in 0.7.1, and adds a few minor features.
 7 | 
 8 | New features
 9 | ~~~~~~~~~~~~
10 | 
11 |   - Add additional tie-breaking methods in DataFrame.rank (:issue:`874`)
12 |   - Add ascending parameter to rank in Series, DataFrame (:issue:`875`)
13 |   - Add coerce_float option to DataFrame.from_records (:issue:`893`)
14 |   - Add sort_columns parameter to allow unsorted plots (:issue:`918`)
15 |   - Enable column access via attributes on GroupBy (:issue:`882`)
16 |   - Can pass dict of values to DataFrame.fillna (:issue:`661`)
17 |   - Can select multiple hierarchical groups by passing list of values in .ix
18 |     (:issue:`134`)
19 |   - Add ``axis`` option to DataFrame.fillna (:issue:`174`)
20 |   - Add level keyword to ``drop`` for dropping values from a level (:issue:`159`)
21 | 
22 | Performance improvements
23 | ~~~~~~~~~~~~~~~~~~~~~~~~
24 | 
25 |   - Use khash for Series.value_counts, add raw function to algorithms.py (:issue:`861`)
26 |   - Intercept __builtin__.sum in groupby (:issue:`885`)
27 | 
28 | 


--------------------------------------------------------------------------------
/doc/source/v0.8.1.txt:
--------------------------------------------------------------------------------
 1 | .. _whatsnew_0801:
 2 | 
 3 | v0.8.1 (July 22, 2012)
 4 | ----------------------
 5 | 
 6 | This release includes a few new features, performance enhancements, and over 30
 7 | bug fixes from 0.8.0.  New features include notably NA friendly string
 8 | processing functionality and a series of new plot types and options.
 9 | 
10 | New features
11 | ~~~~~~~~~~~~
12 | 
13 |   - Add :ref:`vectorized string processing methods <basics.string_methods>`
14 |     accessible via Series.str (:issue:`620`)
15 |   - Add option to disable adjustment in EWMA (:issue:`1584`)
16 |   - :ref:`Radviz plot <visualization.radviz>` (:issue:`1566`)
17 |   - :ref:`Parallel coordinates plot <visualization.parallel_coordinates>`
18 |   - :ref:`Bootstrap plot <visualization.bootstrap>`
19 |   - Per column styles and secondary y-axis plotting (:issue:`1559`)
20 |   - New datetime converters millisecond plotting  (:issue:`1599`)
21 |   - Add option to disable "sparse" display of hierarchical indexes (:issue:`1538`)
22 |   - Series/DataFrame's ``set_index`` method can :ref:`append levels
23 |     <indexing.set_index>` to an existing Index/MultiIndex (:issue:`1569`, :issue:`1577`)
24 | 
25 | Performance improvements
26 | ~~~~~~~~~~~~~~~~~~~~~~~~
27 | 
28 |   - Improved implementation of rolling min and max (thanks to `Bottleneck
29 |     <http://berkeleyanalytics.com/bottleneck/>`__ !)
30 |   - Add accelerated ``'median'`` GroupBy option (:issue:`1358`)
31 |   - Significantly improve the performance of parsing ISO8601-format date
32 |     strings with ``DatetimeIndex`` or ``to_datetime`` (:issue:`1571`)
33 |   - Improve the performance of GroupBy on single-key aggregations and use with
34 |     Categorical types
35 |   - Significant datetime parsing performance improvments
36 | 
37 | 


--------------------------------------------------------------------------------
/doc/source/whatsnew.rst:
--------------------------------------------------------------------------------
 1 | .. _whatsnew:
 2 | 
 3 | .. currentmodule:: pandas
 4 | 
 5 | .. ipython:: python
 6 |    :suppress:
 7 | 
 8 |    import numpy as np
 9 |    from pandas import *
10 |    randn = np.random.randn
11 |    np.set_printoptions(precision=4, suppress=True)
12 |    options.display.max_rows = 15
13 | 
14 | **********
15 | What's New
16 | **********
17 | 
18 | These are new features and improvements of note in each release.
19 | 
20 | .. include:: v0.15.0.txt
21 | 
22 | .. include:: v0.14.1.txt
23 | 
24 | .. include:: v0.14.0.txt
25 | 
26 | .. include:: v0.13.1.txt
27 | 
28 | .. include:: v0.13.0.txt
29 | 
30 | .. include:: v0.12.0.txt
31 | 
32 | .. include:: v0.11.0.txt
33 | 
34 | .. include:: v0.10.1.txt
35 | 
36 | .. include:: v0.10.0.txt
37 | 
38 | .. include:: v0.9.1.txt
39 | 
40 | .. include:: v0.9.0.txt
41 | 
42 | .. include:: v0.8.1.txt
43 | 
44 | .. include:: v0.8.0.txt
45 | 
46 | .. include:: v0.7.3.txt
47 | 
48 | .. include:: v0.7.2.txt
49 | 
50 | .. include:: v0.7.1.txt
51 | 
52 | .. include:: v0.7.0.txt
53 | 
54 | .. include:: v0.6.1.txt
55 | 
56 | .. include:: v0.6.0.txt
57 | 
58 | .. include:: v0.5.0.txt
59 | 
60 | .. include:: v0.4.x.txt
61 | 


--------------------------------------------------------------------------------
/doc/sphinxext/README.rst:
--------------------------------------------------------------------------------
 1 | sphinxext
 2 | =========
 3 | 
 4 | This directory contains copies of different sphinx extensions in use in the
 5 | pandas documentation. These copies originate from other projects:
 6 | 
 7 | - ``numpydoc`` - Numpy's Sphinx extensions: this can be found at its own
 8 |   repository: https://github.com/numpy/numpydoc
 9 | - ``ipython_directive`` and ``ipython_console_highlighting`` in the folder
10 |   `ipython_sphinxext` - Sphinx extensions from IPython: these are included
11 |   in IPython: https://github.com/ipython/ipython/tree/master/IPython/sphinxext
12 | 
13 | .. note::
14 | 
15 |     These copies are maintained at the respective projects, so fixes should,
16 |     to the extent possible, be pushed upstream instead of only adapting our
17 |     local copy to avoid divergence between the the local and upstream version.
18 | 


--------------------------------------------------------------------------------
/doc/sphinxext/ipython_sphinxext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/sphinxext/ipython_sphinxext/__init__.py


--------------------------------------------------------------------------------
/doc/sphinxext/numpydoc/README.rst:
--------------------------------------------------------------------------------
 1 | =====================================
 2 | numpydoc -- Numpy's Sphinx extensions
 3 | =====================================
 4 | 
 5 | Numpy's documentation uses several custom extensions to Sphinx.  These
 6 | are shipped in this ``numpydoc`` package, in case you want to make use
 7 | of them in third-party projects.
 8 | 
 9 | The following extensions are available:
10 | 
11 |   - ``numpydoc``: support for the Numpy docstring format in Sphinx, and add
12 |     the code description directives ``np:function``, ``np-c:function``, etc.
13 |     that support the Numpy docstring syntax.
14 | 
15 |   - ``numpydoc.traitsdoc``: For gathering documentation about Traits attributes.
16 | 
17 |   - ``numpydoc.plot_directive``: Adaptation of Matplotlib's ``plot::``
18 |     directive. Note that this implementation may still undergo severe
19 |     changes or eventually be deprecated.
20 | 
21 | 
22 | numpydoc
23 | ========
24 | 
25 | Numpydoc inserts a hook into Sphinx's autodoc that converts docstrings
26 | following the Numpy/Scipy format to a form palatable to Sphinx.
27 | 
28 | Options
29 | -------
30 | 
31 | The following options can be set in conf.py:
32 | 
33 | - numpydoc_use_plots: bool
34 | 
35 |   Whether to produce ``plot::`` directives for Examples sections that
36 |   contain ``import matplotlib``.
37 | 
38 | - numpydoc_show_class_members: bool
39 | 
40 |   Whether to show all members of a class in the Methods and Attributes
41 |   sections automatically.
42 | 
43 | - numpydoc_class_members_toctree: bool
44 | 
45 |   Whether to create a Sphinx table of contents for the lists of class
46 |   methods and attributes. If a table of contents is made, Sphinx expects
47 |   each entry to have a separate page.
48 | 
49 | - numpydoc_edit_link: bool  (DEPRECATED -- edit your HTML template instead)
50 | 
51 |   Whether to insert an edit link after docstrings.
52 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpydoc/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, absolute_import, print_function
2 | 
3 | from .numpydoc import setup
4 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpydoc/tests/test_linkcode.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, absolute_import, print_function
2 | 
3 | import numpydoc.linkcode
4 | 
5 | # No tests at the moment...
6 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpydoc/tests/test_phantom_import.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, absolute_import, print_function
2 | 
3 | import numpydoc.phantom_import
4 | 
5 | # No tests at the moment...
6 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpydoc/tests/test_plot_directive.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, absolute_import, print_function
2 | 
3 | import numpydoc.plot_directive
4 | 
5 | # No tests at the moment...
6 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpydoc/tests/test_traitsdoc.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, absolute_import, print_function
2 | 
3 | import numpydoc.traitsdoc
4 | 
5 | # No tests at the moment...
6 | 


--------------------------------------------------------------------------------
/examples/data/SOURCES:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/examples/data/SOURCES


--------------------------------------------------------------------------------
/examples/finance.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Some examples playing around with yahoo finance data
 3 | """
 4 | 
 5 | from datetime import datetime
 6 | from pandas.compat import zip
 7 | 
 8 | import matplotlib.finance as fin
 9 | import numpy as np
10 | from pylab import show
11 | 
12 | 
13 | from pandas import Index, DataFrame
14 | from pandas.core.datetools import BMonthEnd
15 | from pandas import ols
16 | 
17 | startDate = datetime(2008, 1, 1)
18 | endDate = datetime(2009, 9, 1)
19 | 
20 | 
21 | def getQuotes(symbol, start, end):
22 |     quotes = fin.quotes_historical_yahoo(symbol, start, end)
23 |     dates, open, close, high, low, volume = zip(*quotes)
24 | 
25 |     data = {
26 |         'open': open,
27 |         'close': close,
28 |         'high': high,
29 |         'low': low,
30 |         'volume': volume
31 |     }
32 | 
33 |     dates = Index([datetime.fromordinal(int(d)) for d in dates])
34 |     return DataFrame(data, index=dates)
35 | 
36 | msft = getQuotes('MSFT', startDate, endDate)
37 | aapl = getQuotes('AAPL', startDate, endDate)
38 | goog = getQuotes('GOOG', startDate, endDate)
39 | ibm = getQuotes('IBM', startDate, endDate)
40 | 
41 | px = DataFrame({'MSFT': msft['close'],
42 |                 'IBM': ibm['close'],
43 |                 'GOOG': goog['close'],
44 |                 'AAPL': aapl['close']})
45 | returns = px / px.shift(1) - 1
46 | 
47 | # Select dates
48 | 
49 | subIndex = ibm.index[(ibm['close'] > 95) & (ibm['close'] < 100)]
50 | msftOnSameDates = msft.reindex(subIndex)
51 | 
52 | # Insert columns
53 | 
54 | msft['hi-lo spread'] = msft['high'] - msft['low']
55 | ibm['hi-lo spread'] = ibm['high'] - ibm['low']
56 | 
57 | # Aggregate monthly
58 | 
59 | 
60 | def toMonthly(frame, how):
61 |     offset = BMonthEnd()
62 | 
63 |     return frame.groupby(offset.rollforward).aggregate(how)
64 | 
65 | msftMonthly = toMonthly(msft, np.mean)
66 | ibmMonthly = toMonthly(ibm, np.mean)
67 | 
68 | # Statistics
69 | 
70 | stdev = DataFrame({
71 |     'MSFT': msft.std(),
72 |     'IBM': ibm.std()
73 | })
74 | 
75 | # Arithmetic
76 | 
77 | ratios = ibm / msft
78 | 
79 | # Works with different indices
80 | 
81 | ratio = ibm / ibmMonthly
82 | monthlyRatio = ratio.reindex(ibmMonthly.index)
83 | 
84 | # Ratio relative to past month average
85 | 
86 | filledRatio = ibm / ibmMonthly.reindex(ibm.index, method='pad')
87 | 


--------------------------------------------------------------------------------
/examples/regressions.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import string
 3 | 
 4 | import numpy as np
 5 | 
 6 | from pandas.core.api import Series, DataFrame, DatetimeIndex
 7 | from pandas.stats.api import ols
 8 | 
 9 | N = 100
10 | 
11 | start = datetime(2009, 9, 2)
12 | dateRange = DatetimeIndex(start, periods=N)
13 | 
14 | 
15 | def makeDataFrame():
16 |     data = DataFrame(np.random.randn(N, 7),
17 |                      columns=list(string.ascii_uppercase[:7]),
18 |                      index=dateRange)
19 | 
20 |     return data
21 | 
22 | 
23 | def makeSeries():
24 |     return Series(np.random.randn(N), index=dateRange)
25 | 
26 | #-------------------------------------------------------------------------------
27 | # Standard rolling linear regression
28 | 
29 | X = makeDataFrame()
30 | Y = makeSeries()
31 | 
32 | model = ols(y=Y, x=X)
33 | 
34 | print(model)
35 | 
36 | #-------------------------------------------------------------------------------
37 | # Panel regression
38 | 
39 | data = {
40 |     'A': makeDataFrame(),
41 |     'B': makeDataFrame(),
42 |     'C': makeDataFrame()
43 | }
44 | 
45 | Y = makeDataFrame()
46 | 
47 | panelModel = ols(y=Y, x=data, window=50)
48 | 
49 | model = ols(y=Y, x=data)
50 | 
51 | print(panelModel)
52 | 


--------------------------------------------------------------------------------
/fake_pyrex/Pyrex/Distutils/__init__.py:
--------------------------------------------------------------------------------
1 | # work around broken setuptools monkey patching
2 | 


--------------------------------------------------------------------------------
/fake_pyrex/Pyrex/Distutils/build_ext.py:
--------------------------------------------------------------------------------
1 | build_ext = "yes, it's there!"
2 | 


--------------------------------------------------------------------------------
/fake_pyrex/Pyrex/__init__.py:
--------------------------------------------------------------------------------
1 | # work around broken setuptools monkey patching
2 | 


--------------------------------------------------------------------------------
/pandas/__init__.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable-msg=W0614,W0401,W0611,W0622
 2 | 
 3 | __docformat__ = 'restructuredtext'
 4 | 
 5 | try:
 6 |     from . import hashtable, tslib, lib
 7 | except Exception:  # pragma: no cover
 8 |     import sys
 9 |     e = sys.exc_info()[1]  # Py25 and Py3 current exception syntax conflict
10 |     print(e)
11 |     if 'No module named lib' in str(e):
12 |         raise ImportError('C extensions not built: if you installed already '
13 |                           'verify that you are not importing from the source '
14 |                           'directory')
15 |     else:
16 |         raise
17 | 
18 | from datetime import datetime
19 | import numpy as np
20 | 
21 | # XXX: HACK for NumPy 1.5.1 to suppress warnings
22 | try:
23 |     np.seterr(all='ignore')
24 | except Exception:  # pragma: no cover
25 |     pass
26 | 
27 | # numpy versioning
28 | from distutils.version import LooseVersion
29 | _np_version = np.version.short_version
30 | _np_version_under1p6 = LooseVersion(_np_version) < '1.6'
31 | _np_version_under1p7 = LooseVersion(_np_version) < '1.7'
32 | _np_version_under1p8 = LooseVersion(_np_version) < '1.8'
33 | _np_version_under1p9 = LooseVersion(_np_version) < '1.9'
34 | 
35 | from pandas.version import version as __version__
36 | from pandas.info import __doc__
37 | 
38 | # let init-time option registration happen
39 | import pandas.core.config_init
40 | 
41 | from pandas.core.api import *
42 | from pandas.sparse.api import *
43 | from pandas.stats.api import *
44 | from pandas.tseries.api import *
45 | from pandas.io.api import *
46 | from pandas.computation.api import *
47 | 
48 | from pandas.tools.describe import value_range
49 | from pandas.tools.merge import merge, concat, ordered_merge
50 | from pandas.tools.pivot import pivot_table, crosstab
51 | from pandas.tools.plotting import scatter_matrix, plot_params
52 | from pandas.tools.tile import cut, qcut
53 | from pandas.core.reshape import melt
54 | from pandas.util.print_versions import show_versions
55 | import pandas.util.testing
56 | 


--------------------------------------------------------------------------------
/pandas/compat/chainmap.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from collections import ChainMap
 3 | except ImportError:
 4 |     from pandas.compat.chainmap_impl import ChainMap
 5 | 
 6 | 
 7 | class DeepChainMap(ChainMap):
 8 |     def __setitem__(self, key, value):
 9 |         for mapping in self.maps:
10 |             if key in mapping:
11 |                 mapping[key] = value
12 |                 return
13 |         self.maps[0][key] = value
14 | 
15 |     def __delitem__(self, key):
16 |         for mapping in self.maps:
17 |             if key in mapping:
18 |                 del mapping[key]
19 |                 return
20 |         raise KeyError(key)
21 | 
22 |     # override because the m parameter is introduced in Python 3.4
23 |     def new_child(self, m=None):
24 |         if m is None:
25 |             m = {}
26 |         return self.__class__(m, *self.maps)
27 | 


--------------------------------------------------------------------------------
/pandas/compat/openpyxl_compat.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Detect incompatible version of OpenPyXL
 3 | 
 4 | GH7169
 5 | """
 6 | 
 7 | from distutils.version import LooseVersion
 8 | 
 9 | start_ver = '1.6.1'
10 | stop_ver = '2.0.0'
11 | 
12 | 
13 | def is_compat():
14 |     """Detect whether the installed version of openpyxl is supported.
15 | 
16 |     Returns
17 |     -------
18 |     compat : bool
19 |         ``True`` if openpyxl is installed and is between versions 1.6.1 and
20 |         2.0.0, ``False`` otherwise.
21 |     """
22 |     import openpyxl
23 |     ver = LooseVersion(openpyxl.__version__)
24 |     return LooseVersion(start_ver) < ver <= LooseVersion(stop_ver)
25 | 


--------------------------------------------------------------------------------
/pandas/computation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/computation/__init__.py


--------------------------------------------------------------------------------
/pandas/computation/api.py:
--------------------------------------------------------------------------------
1 | from pandas.computation.eval import eval
2 | from pandas.computation.expr import Expr
3 | 


--------------------------------------------------------------------------------
/pandas/computation/common.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from pandas.compat import reduce
 4 | 
 5 | 
 6 | def _ensure_decoded(s):
 7 |     """ if we have bytes, decode them to unicode """
 8 |     if isinstance(s, (np.bytes_, bytes)):
 9 |         s = s.decode(pd.get_option('display.encoding'))
10 |     return s
11 | 
12 | 
13 | def _result_type_many(*arrays_and_dtypes):
14 |     """ wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32)
15 |     argument limit """
16 |     try:
17 |         return np.result_type(*arrays_and_dtypes)
18 |     except ValueError:
19 |         # we have > NPY_MAXARGS terms in our expression
20 |         return reduce(np.result_type, arrays_and_dtypes)
21 | 
22 | 
23 | class NameResolutionError(NameError):
24 |     pass
25 | 


--------------------------------------------------------------------------------
/pandas/computation/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/computation/tests/__init__.py


--------------------------------------------------------------------------------
/pandas/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/core/__init__.py


--------------------------------------------------------------------------------
/pandas/core/api.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # pylint: disable=W0614,W0401,W0611
 3 | 
 4 | import numpy as np
 5 | 
 6 | from pandas.core.algorithms import factorize, match, unique, value_counts
 7 | from pandas.core.common import isnull, notnull
 8 | from pandas.core.categorical import Categorical
 9 | from pandas.core.groupby import Grouper
10 | from pandas.core.format import set_eng_float_format
11 | from pandas.core.index import Index, Int64Index, Float64Index, MultiIndex
12 | 
13 | from pandas.core.series import Series, TimeSeries
14 | from pandas.core.frame import DataFrame
15 | from pandas.core.panel import Panel
16 | from pandas.core.panel4d import Panel4D
17 | from pandas.core.groupby import groupby
18 | from pandas.core.reshape import (pivot_simple as pivot, get_dummies,
19 |                                  lreshape, wide_to_long)
20 | 
21 | WidePanel = Panel
22 | 
23 | from pandas.core.indexing import IndexSlice
24 | from pandas.tseries.offsets import DateOffset
25 | from pandas.tseries.tools import to_datetime
26 | from pandas.tseries.index import (DatetimeIndex, Timestamp,
27 |                                   date_range, bdate_range)
28 | from pandas.tseries.period import Period, PeriodIndex
29 | 
30 | # legacy
31 | from pandas.core.common import save, load  # deprecated, remove in 0.13
32 | import pandas.core.datetools as datetools
33 | 
34 | from pandas.core.config import (get_option, set_option, reset_option,
35 |                                 describe_option, option_context, options)
36 | 


--------------------------------------------------------------------------------
/pandas/core/array.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Isolate pandas's exposure to NumPy
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | Array = np.ndarray
 8 | 
 9 | bool = np.bool_
10 | 
11 | _dtypes = {
12 |     'int': [8, 16, 32, 64],
13 |     'uint': [8, 16, 32, 64],
14 |     'float': [16, 32, 64]
15 | }
16 | 
17 | _lift_types = []
18 | 
19 | for _k, _v in _dtypes.items():
20 |     for _i in _v:
21 |         _lift_types.append(_k + str(_i))
22 | 
23 | for _t in _lift_types:
24 |     globals()[_t] = getattr(np, _t)
25 | 
26 | _lift_function = ['empty', 'arange', 'array', 'putmask', 'where']
27 | 
28 | for _f in _lift_function:
29 |     globals()[_f] = getattr(np, _f)
30 | 
31 | _lift_random = ['randn', 'rand']
32 | 
33 | for _f in _lift_random:
34 |     globals()[_f] = getattr(np.random, _f)
35 | 
36 | NA = np.nan
37 | 
38 | 


--------------------------------------------------------------------------------
/pandas/core/datetools.py:
--------------------------------------------------------------------------------
 1 | """A collection of random tools for dealing with dates in Python"""
 2 | 
 3 | from pandas.tseries.tools import *
 4 | from pandas.tseries.offsets import *
 5 | from pandas.tseries.frequencies import *
 6 | 
 7 | day = DateOffset()
 8 | bday = BDay()
 9 | businessDay = bday
10 | try:
11 |     cday = CDay()
12 |     customBusinessDay = CustomBusinessDay()
13 |     customBusinessMonthEnd = CBMonthEnd()
14 |     customBusinessMonthBegin = CBMonthBegin()
15 | except NotImplementedError:
16 |     cday = None
17 |     customBusinessDay = None
18 |     customBusinessMonthEnd = None
19 |     customBusinessMonthBegin = None
20 | monthEnd = MonthEnd()
21 | yearEnd = YearEnd()
22 | yearBegin = YearBegin()
23 | bmonthEnd = BMonthEnd()
24 | bmonthBegin = BMonthBegin()
25 | cbmonthEnd = customBusinessMonthEnd
26 | cbmonthBegin = customBusinessMonthBegin
27 | bquarterEnd = BQuarterEnd()
28 | quarterEnd = QuarterEnd()
29 | byearEnd = BYearEnd()
30 | week = Week()
31 | 
32 | # Functions/offsets to roll dates forward
33 | thisMonthEnd = MonthEnd(0)
34 | thisBMonthEnd = BMonthEnd(0)
35 | thisYearEnd = YearEnd(0)
36 | thisYearBegin = YearBegin(0)
37 | thisBQuarterEnd = BQuarterEnd(0)
38 | thisQuarterEnd = QuarterEnd(0)
39 | 
40 | # Functions to check where a date lies
41 | isBusinessDay = BDay().onOffset
42 | isMonthEnd = MonthEnd().onOffset
43 | isBMonthEnd = BMonthEnd().onOffset
44 | 
45 | 
46 | def _resolve_offset(freq, kwds):
47 |     if 'timeRule' in kwds or 'offset' in kwds:
48 |         offset = kwds.get('offset', None)
49 |         offset = kwds.get('timeRule', offset)
50 |         if isinstance(offset, compat.string_types):
51 |             offset = getOffset(offset)
52 |         warn = True
53 |     else:
54 |         offset = freq
55 |         warn = False
56 | 
57 |     if warn:
58 |         import warnings
59 |         warnings.warn("'timeRule' and 'offset' parameters are deprecated,"
60 |                       " please use 'freq' instead",
61 |                       FutureWarning)
62 | 
63 |     return offset
64 | 


--------------------------------------------------------------------------------
/pandas/core/matrix.py:
--------------------------------------------------------------------------------
1 | from pandas.core.frame import DataFrame as DataMatrix
2 | 


--------------------------------------------------------------------------------
/pandas/core/panel4d.py:
--------------------------------------------------------------------------------
 1 | """ Panel4D: a 4-d dict like collection of panels """
 2 | 
 3 | from pandas.core.panelnd import create_nd_panel_factory
 4 | from pandas.core.panel import Panel
 5 | 
 6 | Panel4D = create_nd_panel_factory(
 7 |     klass_name='Panel4D',
 8 |     orders=['labels', 'items', 'major_axis', 'minor_axis'],
 9 |     slices={'labels': 'labels', 'items': 'items', 'major_axis': 'major_axis',
10 |             'minor_axis': 'minor_axis'},
11 |     slicer=Panel,
12 |     aliases={'major': 'major_axis', 'minor': 'minor_axis'},
13 |     stat_axis=2,
14 |     ns=dict(__doc__="""
15 |     Represents a 4 dimensional structured
16 | 
17 |     Parameters
18 |     ----------
19 |     data : ndarray (labels x items x major x minor), or dict of Panels
20 | 
21 |     labels : Index or array-like : axis=0
22 |     items  : Index or array-like : axis=1
23 |     major_axis : Index or array-like: axis=2
24 |     minor_axis : Index or array-like: axis=3
25 | 
26 |     dtype : dtype, default None
27 |     Data type to force, otherwise infer
28 |     copy : boolean, default False
29 |     Copy data from inputs. Only affects DataFrame / 2d ndarray input
30 |     """)
31 | )
32 | 
33 | 
34 | def panel4d_init(self, data=None, labels=None, items=None, major_axis=None,
35 |                  minor_axis=None, copy=False, dtype=None):
36 | 
37 |     self._init_data(data=data, labels=labels, items=items,
38 |                     major_axis=major_axis, minor_axis=minor_axis,
39 |                     copy=copy, dtype=dtype)
40 | 
41 | Panel4D.__init__ = panel4d_init
42 | 


--------------------------------------------------------------------------------
/pandas/core/sparse.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Data structures for sparse float data. Life is made simpler by dealing only
 3 | with float64 data
 4 | """
 5 | 
 6 | # pylint: disable=W0611
 7 | 
 8 | from pandas.sparse.series import SparseSeries
 9 | from pandas.sparse.frame import SparseDataFrame
10 | from pandas.sparse.panel import SparsePanel
11 | 


--------------------------------------------------------------------------------
/pandas/hashtable.pxd:
--------------------------------------------------------------------------------
 1 | from khash cimport kh_int64_t, kh_float64_t, kh_pymap_t, int64_t, float64_t
 2 | 
 3 | # prototypes for sharing
 4 | 
 5 | cdef class HashTable:
 6 |     pass
 7 | 
 8 | cdef class Int64HashTable(HashTable):
 9 |     cdef kh_int64_t *table
10 | 
11 |     cpdef get_item(self, int64_t val)
12 |     cpdef set_item(self, int64_t key, Py_ssize_t val)
13 | 
14 | cdef class Float64HashTable(HashTable):
15 |     cdef kh_float64_t *table
16 | 
17 |     cpdef get_item(self, float64_t val)
18 |     cpdef set_item(self, float64_t key, Py_ssize_t val)
19 | 
20 | cdef class PyObjectHashTable(HashTable):
21 |     cdef kh_pymap_t *table
22 | 
23 |     cpdef get_item(self, object val)
24 |     cpdef set_item(self, object key, Py_ssize_t val)
25 | 


--------------------------------------------------------------------------------
/pandas/info.py:
--------------------------------------------------------------------------------
 1 | """
 2 | pandas - a powerful data analysis and manipulation library for Python
 3 | =====================================================================
 4 | 
 5 | See http://pandas.sourceforge.net for full documentation. Otherwise, see the
 6 | docstrings of the various objects in the pandas namespace:
 7 | 
 8 | Series
 9 | DataFrame
10 | Panel
11 | Index
12 | DatetimeIndex
13 | HDFStore
14 | bdate_range
15 | date_range
16 | read_csv
17 | read_fwf
18 | read_table
19 | ols
20 | """
21 | 


--------------------------------------------------------------------------------
/pandas/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/__init__.py


--------------------------------------------------------------------------------
/pandas/io/api.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Data IO api
 3 | """
 4 | 
 5 | from pandas.io.parsers import read_csv, read_table, read_fwf
 6 | from pandas.io.clipboard import read_clipboard
 7 | from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
 8 | from pandas.io.pytables import HDFStore, Term, get_store, read_hdf
 9 | from pandas.io.json import read_json
10 | from pandas.io.html import read_html
11 | from pandas.io.sql import read_sql, read_sql_table, read_sql_query
12 | from pandas.io.stata import read_stata
13 | from pandas.io.pickle import read_pickle, to_pickle
14 | from pandas.io.packers import read_msgpack, to_msgpack
15 | from pandas.io.gbq import read_gbq
16 | 


--------------------------------------------------------------------------------
/pandas/io/date_converters.py:
--------------------------------------------------------------------------------
 1 | """This module is designed for community supported date conversion functions"""
 2 | from pandas.compat import range, map
 3 | import numpy as np
 4 | import pandas.lib as lib
 5 | 
 6 | 
 7 | def parse_date_time(date_col, time_col):
 8 |     date_col = _maybe_cast(date_col)
 9 |     time_col = _maybe_cast(time_col)
10 |     return lib.try_parse_date_and_time(date_col, time_col)
11 | 
12 | 
13 | def parse_date_fields(year_col, month_col, day_col):
14 |     year_col = _maybe_cast(year_col)
15 |     month_col = _maybe_cast(month_col)
16 |     day_col = _maybe_cast(day_col)
17 |     return lib.try_parse_year_month_day(year_col, month_col, day_col)
18 | 
19 | 
20 | def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col,
21 |                      second_col):
22 |     year_col = _maybe_cast(year_col)
23 |     month_col = _maybe_cast(month_col)
24 |     day_col = _maybe_cast(day_col)
25 |     hour_col = _maybe_cast(hour_col)
26 |     minute_col = _maybe_cast(minute_col)
27 |     second_col = _maybe_cast(second_col)
28 |     return lib.try_parse_datetime_components(year_col, month_col, day_col,
29 |                                              hour_col, minute_col, second_col)
30 | 
31 | 
32 | def generic_parser(parse_func, *cols):
33 |     N = _check_columns(cols)
34 |     results = np.empty(N, dtype=object)
35 | 
36 |     for i in range(N):
37 |         args = [c[i] for c in cols]
38 |         results[i] = parse_func(*args)
39 | 
40 |     return results
41 | 
42 | 
43 | def _maybe_cast(arr):
44 |     if not arr.dtype.type == np.object_:
45 |         arr = np.array(arr, dtype=object)
46 |     return arr
47 | 
48 | 
49 | def _check_columns(cols):
50 |     if not len(cols):
51 |         raise AssertionError("There must be at least 1 column")
52 | 
53 |     head, tail = cols[0], cols[1:]
54 | 
55 |     N = len(head)
56 | 
57 |     for i, n in enumerate(map(len, tail)):
58 |         if n != N:
59 |             raise AssertionError('All columns must have the same length: {0}; '
60 |                                  'column {1} has length {2}'.format(N, i, n))
61 | 
62 |     return N
63 | 


--------------------------------------------------------------------------------
/pandas/io/pickle.py:
--------------------------------------------------------------------------------
 1 | from pandas.compat import cPickle as pkl, pickle_compat as pc, PY3
 2 | 
 3 | def to_pickle(obj, path):
 4 |     """
 5 |     Pickle (serialize) object to input file path
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     obj : any object
10 |     path : string
11 |         File path
12 |     """
13 |     with open(path, 'wb') as f:
14 |         pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)
15 | 
16 | 
17 | def read_pickle(path):
18 |     """
19 |     Load pickled pandas object (or any other pickled object) from the specified
20 |     file path
21 | 
22 |     Warning: Loading pickled data received from untrusted sources can be
23 |     unsafe. See: http://docs.python.org/2.7/library/pickle.html
24 | 
25 |     Parameters
26 |     ----------
27 |     path : string
28 |         File path
29 | 
30 |     Returns
31 |     -------
32 |     unpickled : type of object stored in file
33 |     """
34 | 
35 |     def try_read(path, encoding=None):
36 |         # try with cPickle
37 |         # try with current pickle, if we have a Type Error then
38 |         # try with the compat pickle to handle subclass changes
39 |         # pass encoding only if its not None as py2 doesn't handle
40 |         # the param
41 | 
42 |         # cpickle
43 |         # GH 6899
44 |         try:
45 |             with open(path, 'rb') as fh:
46 |                 return pkl.load(fh)
47 |         except (Exception) as e:
48 | 
49 |             # reg/patched pickle
50 |             try:
51 |                 with open(path, 'rb') as fh:
52 |                     return pc.load(fh, encoding=encoding, compat=False)
53 | 
54 |             # compat pickle
55 |             except:
56 |                 with open(path, 'rb') as fh:
57 |                     return pc.load(fh, encoding=encoding, compat=True)
58 | 
59 |     try:
60 |         return try_read(path)
61 |     except:
62 |         if PY3:
63 |             return try_read(path, encoding='latin1')
64 |         raise
65 | 


--------------------------------------------------------------------------------
/pandas/io/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | def setUp():
3 |     import socket
4 |     socket.setdefaulttimeout(5)
5 | 


--------------------------------------------------------------------------------
/pandas/io/tests/data/gbq_fake_job.txt:
--------------------------------------------------------------------------------
1 | {u'status': {u'state': u'DONE'}, u'kind': u'bigquery#job', u'statistics': {u'query': {u'cacheHit': True, u'totalBytesProcessed': u'0'}, u'endTime': u'1377668744674', u'totalBytesProcessed': u'0', u'startTime': u'1377668744466'}, u'jobReference': {u'projectId': u'57288129629', u'jobId': u'bqjob_r5f956972f0190bdf_00000140c374bf42_2'}, u'etag': u'"4PTsVxg68bQkQs1RJ1Ndewqkgg4/oO4VmgFrAku4N6FWci9s7iFIftc"', u'configuration': {u'query': {u'createDisposition': u'CREATE_IF_NEEDED', u'query': u'SELECT * FROM [publicdata:samples.shakespeare]', u'writeDisposition': u'WRITE_TRUNCATE', u'destinationTable': {u'projectId': u'57288129629', u'tableId': u'anonb5ec450da88eeeb78a27784ea482ee75a146d442', u'datasetId': u'_d0b4f5f0d50dc68a3eb0fa6cba66a9a8687d9253'}}}, u'id': u'57288129629:bqjob_r5f956972f0190bdf_00000140c374bf42_2', u'selfLink': u'https://www.googleapis.com/bigquery/v2/projects/57288129629/jobs/bqjob_r5f956972f0190bdf_00000140c374bf42_2'}


--------------------------------------------------------------------------------
/pandas/io/tests/data/html_encoding/chinese_utf-16.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/html_encoding/chinese_utf-16.html


--------------------------------------------------------------------------------
/pandas/io/tests/data/html_encoding/chinese_utf-32.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/html_encoding/chinese_utf-32.html


--------------------------------------------------------------------------------
/pandas/io/tests/data/html_encoding/chinese_utf-8.html:
--------------------------------------------------------------------------------
 1 | <table border="1" class="dataframe">
 2 |   <thead>
 3 |     <tr style="text-align: right;">
 4 |       <th></th>
 5 |       <th>0</th>
 6 |       <th>1</th>
 7 |     </tr>
 8 |   </thead>
 9 |   <tbody>
10 |     <tr>
11 |       <th>0</th>
12 |       <td> 漊煻獌</td>
13 |       <td> 漊煻獌</td>
14 |     </tr>
15 |     <tr>
16 |       <th>1</th>
17 |       <td> 袟袘觕</td>
18 |       <td> 袟袘觕</td>
19 |     </tr>
20 |     <tr>
21 |       <th>2</th>
22 |       <td> 埱娵徖</td>
23 |       <td> 埱娵徖</td>
24 |     </tr>
25 |   </tbody>
26 | </table>


--------------------------------------------------------------------------------
/pandas/io/tests/data/html_encoding/letz_latin1.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/html_encoding/letz_latin1.html


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_hdf/legacy.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/legacy.h5


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_hdf/legacy_0.10.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/legacy_0.10.h5


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_hdf/legacy_table.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/legacy_table.h5


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_hdf/pytables_native.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/pytables_native.h5


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_hdf/pytables_native2.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/pytables_native2.h5


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle


--------------------------------------------------------------------------------
/pandas/io/tests/data/salary.table:
--------------------------------------------------------------------------------
 1 | S	X	E	M
 2 | 13876 	1 	1 	1 
 3 | 11608 	1 	3 	0 
 4 | 18701 	1 	3 	1 
 5 | 11283 	1 	2 	0 
 6 | 11767 	1 	3 	0 
 7 | 20872 	2 	2 	1 
 8 | 11772 	2 	2 	0 
 9 | 10535 	2 	1 	0 
10 | 12195 	2 	3 	0 
11 | 12313 	3 	2 	0 
12 | 14975 	3 	1 	1 
13 | 21371 	3 	2 	1 
14 | 19800 	3 	3 	1 
15 | 11417 	4 	1 	0 
16 | 20263 	4 	3 	1 
17 | 13231 	4 	3 	0 
18 | 12884 	4 	2 	0 
19 | 13245 	5 	2 	0 
20 | 13677 	5 	3 	0 
21 | 15965 	5 	1 	1 
22 | 12336 	6 	1 	0 
23 | 21352 	6 	3 	1 
24 | 13839 	6 	2 	0 
25 | 22884 	6 	2 	1 
26 | 16978 	7 	1 	1 
27 | 14803 	8 	2 	0 
28 | 17404 	8 	1 	1 
29 | 22184 	8 	3 	1 
30 | 13548 	8 	1 	0 
31 | 14467 	10 	1 	0 
32 | 15942 	10 	2 	0 
33 | 23174 	10 	3 	1 
34 | 23780 	10 	2 	1 
35 | 25410 	11 	2 	1 
36 | 14861 	11 	1 	0 
37 | 16882 	12 	2 	0 
38 | 24170 	12 	3 	1 
39 | 15990 	13 	1 	0 
40 | 26330 	13 	2 	1 
41 | 17949 	14 	2 	0 
42 | 25685 	15 	3 	1 
43 | 27837 	16 	2 	1 
44 | 18838 	16 	2 	0 
45 | 17483 	16 	1 	0 
46 | 19207 	17 	2 	0 
47 | 19346 	20 	1 	0 
48 | 


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata1_114.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata1_114.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata1_117.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata1_117.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata1_encoding.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata1_encoding.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata2_113.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata2_113.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata2_114.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata2_114.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata2_115.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata2_115.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata2_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata2_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata2_117.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata2_117.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata3_113.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata3_113.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata3_114.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata3_114.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata3_115.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata3_115.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata3_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata3_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata3_117.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata3_117.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata4_113.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata4_113.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata4_114.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata4_114.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata4_115.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata4_115.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata4_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata4_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata4_117.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata4_117.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata5.csv:
--------------------------------------------------------------------------------
 1 | byte_,int_,long_,float_,double_,date_td,string_,string_1
 2 | 0,0,0,0,0,,"a","a"
 3 | 1,1,1,1,1,,"ab","b"
 4 | -1,-1,-1,-1,-1,,"abc","c"
 5 | 100,32740,-2147483647,-1.70100000027769e+38,-2.0000000000000e+307,1970-01-01,"abcdefghijklmnop","d"
 6 | -127,-32767,2147483620,1.70100000027769e+38,8.0000000000000e+307,1970-01-02,"abcdefghijklmnopqrstuvwxyz","e"
 7 | ,0,,,,2014-01-01,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","f"
 8 | 0,,,,,2114-01-01,"1234567890","1"
 9 | ,,0,,,2014-12-31,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","2"
10 | .a,.a,.a,.a,.a,2012-02-29,"!","A"
11 | 100,32740,-2.15e+09,-1.70e+38,-2.0e+307,01jan1970,"abcdefghijklmnop","d"
12 | -127,-32767,2.15e+09,1.70e+38,8.0e+307,02jan1970,"abcdefghijklmnopqrstuvwxyz","e"
13 | ,0,,,,01jan2014,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","f"
14 | 0,,,,,01jan2114,"1234567890","1"
15 | ,,0,,,31dec2014,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","2"
16 | .a,.a,.a,.a,.a,29feb2012,"!","A"
17 | .z,.z,.z,.z,.z,,"&","Z"
18 | ,,,0,,,"1.23","!"
19 | ,,,,0,,"10jan1970","."
20 | 


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata5_113.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata5_113.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata5_114.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata5_114.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata5_115.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata5_115.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata5_117.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata5_117.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata6.csv:
--------------------------------------------------------------------------------
1 | byte_,int_,long_,float_,double_,date_td,string_,string_1
2 | 0,0,0,0,0,1960-01-01,"a","a"
3 | 1,1,1,1,1,3014-12-31,"ab","b"
4 | -1,-1,-1,-1,-1,2014-12-31,"abc","c"
5 | 100,32740,-2147483647,-1.7010000002777e+38,-2.000000000000e+307,1970-01-01,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","d"
6 | -127,-32767,2147483620,1.7010000002777e+38,8.000000000000e+307,1970-01-02,"abcdefghijklmnopqrstuvwxyz","e"
7 | 


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata6_113.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata6_113.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata6_114.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata6_114.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata6_115.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata6_115.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata6_117.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata6_117.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata7_115.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata7_115.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/stata7_117.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata7_117.dta


--------------------------------------------------------------------------------
/pandas/io/tests/data/test.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test.xls


--------------------------------------------------------------------------------
/pandas/io/tests/data/test.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test.xlsm


--------------------------------------------------------------------------------
/pandas/io/tests/data/test.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test.xlsx


--------------------------------------------------------------------------------
/pandas/io/tests/data/test1.csv:
--------------------------------------------------------------------------------
1 | index,A,B,C,D
2 | 2000-01-03 00:00:00,0.980268513777,3.68573087906,-0.364216805298,-1.15973806169
3 | 2000-01-04 00:00:00,1.04791624281,-0.0412318367011,-0.16181208307,0.212549316967
4 | 2000-01-05 00:00:00,0.498580885705,0.731167677815,-0.537677223318,1.34627041952
5 | 2000-01-06 00:00:00,1.12020151869,1.56762092543,0.00364077397681,0.67525259227
6 | 2000-01-07 00:00:00,-0.487094399463,0.571454623474,-1.6116394093,0.103468562917
7 | 2000-01-10 00:00:00,0.836648671666,0.246461918642,0.588542635376,1.0627820613
8 | 2000-01-11 00:00:00,-0.157160753327,1.34030689438,1.19577795622,-1.09700699751


--------------------------------------------------------------------------------
/pandas/io/tests/data/test2.csv:
--------------------------------------------------------------------------------
1 | A,B,C,D,E
2 | 2000-01-03 00:00:00,0.980268513777,3.68573087906,-0.364216805298,-1.15973806169,foo
3 | 2000-01-04 00:00:00,1.04791624281,-0.0412318367011,-0.16181208307,0.212549316967,bar
4 | 2000-01-05 00:00:00,0.498580885705,0.731167677815,-0.537677223318,1.34627041952,baz
5 | 2000-01-06 00:00:00,1.12020151869,1.56762092543,0.00364077397681,0.67525259227,qux
6 | 2000-01-07 00:00:00,-0.487094399463,0.571454623474,-1.6116394093,0.103468562917,foo2
7 | 


--------------------------------------------------------------------------------
/pandas/io/tests/data/test2.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test2.xls


--------------------------------------------------------------------------------
/pandas/io/tests/data/test2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test2.xlsx


--------------------------------------------------------------------------------
/pandas/io/tests/data/test3.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test3.xls


--------------------------------------------------------------------------------
/pandas/io/tests/data/test_types.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test_types.xls


--------------------------------------------------------------------------------
/pandas/io/tests/data/test_types.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test_types.xlsx


--------------------------------------------------------------------------------
/pandas/io/tests/data/times_1900.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/times_1900.xls


--------------------------------------------------------------------------------
/pandas/io/tests/data/times_1904.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/times_1904.xls


--------------------------------------------------------------------------------
/pandas/io/tests/data/unicode_series.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/unicode_series.csv


--------------------------------------------------------------------------------
/pandas/io/tests/data/utf16_ex.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/utf16_ex.txt


--------------------------------------------------------------------------------
/pandas/io/tests/data/valid_markup.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
 2 | <html>
 3 |     <head>
 4 |         <meta name="generator" content=
 5 |         "HTML Tidy for Linux (vers 25 March 2009), see www.w3.org">
 6 |         <title></title>
 7 |     </head>
 8 |     <body>
 9 |         <table border="1" class="dataframe">
10 |             <thead>
11 |                 <tr style="text-align: right;">
12 |                     <th></th>
13 |                     <th>a</th>
14 |                     <th>b</th>
15 |                 </tr>
16 |             </thead>
17 |             <tbody>
18 |                 <tr>
19 |                     <th>0</th>
20 |                     <td>6</td>
21 |                     <td>7</td>
22 |                 </tr>
23 |                 <tr>
24 |                     <th>1</th>
25 |                     <td>4</td>
26 |                     <td>0</td>
27 |                 </tr>
28 |                 <tr>
29 |                     <th>2</th>
30 |                     <td>9</td>
31 |                     <td>4</td>
32 |                 </tr>
33 |                 <tr>
34 |                     <th>3</th>
35 |                     <td>7</td>
36 |                     <td>0</td>
37 |                 </tr>
38 |             </tbody>
39 |         </table>
40 |         <table border="1" class="dataframe">
41 |             <thead>
42 |                 <tr style="text-align: right;">
43 |                     <th></th>
44 |                     <th>a</th>
45 |                     <th>b</th>
46 |                 </tr>
47 |             </thead>
48 |             <tbody>
49 |                 <tr>
50 |                     <th>0</th>
51 |                     <td>6</td>
52 |                     <td>7</td>
53 |                 </tr>
54 |                 <tr>
55 |                     <th>1</th>
56 |                     <td>4</td>
57 |                     <td>0</td>
58 |                 </tr>
59 |             </tbody>
60 |         </table>
61 |     </body>
62 | </html>
63 | 


--------------------------------------------------------------------------------
/pandas/io/tests/test_json/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/test_json/__init__.py


--------------------------------------------------------------------------------
/pandas/io/tests/test_json/data/tsframe_iso_v012.json:
--------------------------------------------------------------------------------
1 | {"A":{"2000-01-03T00:00:00":1.56808523,"2000-01-04T00:00:00":-0.2550111,"2000-01-05T00:00:00":1.51493992,"2000-01-06T00:00:00":-0.02765498,"2000-01-07T00:00:00":0.05951614},"B":{"2000-01-03T00:00:00":0.65727391,"2000-01-04T00:00:00":-0.08072427,"2000-01-05T00:00:00":0.11805825,"2000-01-06T00:00:00":0.44679743,"2000-01-07T00:00:00":-2.69652057},"C":{"2000-01-03T00:00:00":1.81021139,"2000-01-04T00:00:00":-0.03202878,"2000-01-05T00:00:00":1.629455,"2000-01-06T00:00:00":0.33192641,"2000-01-07T00:00:00":1.28163262},"D":{"2000-01-03T00:00:00":-0.17251653,"2000-01-04T00:00:00":-0.17581665,"2000-01-05T00:00:00":-1.31506612,"2000-01-06T00:00:00":-0.27885413,"2000-01-07T00:00:00":0.34703478},"date":{"2000-01-03T00:00:00":"1992-01-06T18:21:32.120000","2000-01-04T00:00:00":"1992-01-06T18:21:32.120000","2000-01-05T00:00:00":"1992-01-06T18:21:32.120000","2000-01-06T00:00:00":"2013-01-01T00:00:00","2000-01-07T00:00:00":"1992-01-06T18:21:32.120000"}}


--------------------------------------------------------------------------------
/pandas/io/tests/test_json/data/tsframe_v012.json:
--------------------------------------------------------------------------------
1 | {"A":{"946857600000000000":1.56808523,"946944000000000000":-0.2550111,"947030400000000000":1.51493992,"947116800000000000":-0.02765498,"947203200000000000":0.05951614},"B":{"946857600000000000":0.65727391,"946944000000000000":-0.08072427,"947030400000000000":0.11805825,"947116800000000000":0.44679743,"947203200000000000":-2.69652057},"C":{"946857600000000000":1.81021139,"946944000000000000":-0.03202878,"947030400000000000":1.629455,"947116800000000000":0.33192641,"947203200000000000":1.28163262},"D":{"946857600000000000":-0.17251653,"946944000000000000":-0.17581665,"947030400000000000":-1.31506612,"947116800000000000":-0.27885413,"947203200000000000":0.34703478},"date":{"946857600000000000":694722092120000000,"946944000000000000":694722092120000000,"947030400000000000":694722092120000000,"947116800000000000":1356998400000000000,"947203200000000000":694722092120000000},"modified":{"946857600000000000":694722092120000000,"946944000000000000":null,"947030400000000000":694722092120000000,"947116800000000000":1356998400000000000,"947203200000000000":694722092120000000}}


--------------------------------------------------------------------------------
/pandas/rpy/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 |     from .common import importr, r, load_data
3 | except ImportError:
4 |     pass
5 | 


--------------------------------------------------------------------------------
/pandas/rpy/base.py:
--------------------------------------------------------------------------------
 1 | import pandas.rpy.util as util
 2 | 
 3 | 
 4 | class lm(object):
 5 |     """
 6 |     Examples
 7 |     --------
 8 |     >>> model = lm('x ~ y + z', data)
 9 |     >>> model.coef
10 |     """
11 |     def __init__(self, formula, data):
12 |         pass
13 | 


--------------------------------------------------------------------------------
/pandas/rpy/mass.py:
--------------------------------------------------------------------------------
1 | class rlm(object):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/pandas/rpy/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/rpy/tests/__init__.py


--------------------------------------------------------------------------------
/pandas/rpy/vars.py:
--------------------------------------------------------------------------------
 1 | import pandas.rpy.util as util
 2 | 
 3 | 
 4 | class VAR(object):
 5 |     """
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     y :
10 |     p :
11 |     type : {"const", "trend", "both", "none"}
12 |     season :
13 |     exogen :
14 |     lag_max :
15 |     ic : {"AIC", "HQ", "SC", "FPE"}
16 |         Information criterion to use, if lag_max is not None
17 |     """
18 |     def __init__(y, p=1, type="none", season=None, exogen=None,
19 |                  lag_max=None, ic=None):
20 |         pass
21 | 


--------------------------------------------------------------------------------
/pandas/sandbox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/sandbox/__init__.py


--------------------------------------------------------------------------------
/pandas/sparse/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/sparse/__init__.py


--------------------------------------------------------------------------------
/pandas/sparse/api.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=W0611
2 | 
3 | from pandas.sparse.array import SparseArray
4 | from pandas.sparse.list import SparseList
5 | from pandas.sparse.series import SparseSeries, SparseTimeSeries
6 | from pandas.sparse.frame import SparseDataFrame
7 | from pandas.sparse.panel import SparsePanel
8 | 


--------------------------------------------------------------------------------
/pandas/sparse/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/sparse/tests/__init__.py


--------------------------------------------------------------------------------
/pandas/src/datetime_helper.h:
--------------------------------------------------------------------------------
1 | #include "datetime.h"
2 | 
3 | void mangle_nat(PyObject *val) {
4 |   PyDateTime_GET_MONTH(val) = -1;
5 |   PyDateTime_GET_DAY(val) = -1;
6 | }
7 | 


--------------------------------------------------------------------------------
/pandas/src/headers/math.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PANDAS_MATH_H_
 2 | #define _PANDAS_MATH_H_
 3 | 
 4 | #if defined(_MSC_VER)
 5 | #include <math.h>
 6 | __inline int signbit(double num) { return _copysign(1.0, num) < 0; }
 7 | #else
 8 | #include <math.h>
 9 | #endif
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/pandas/src/headers/portable.h:
--------------------------------------------------------------------------------
1 | #ifndef _PANDAS_PORTABLE_H_
2 | #define _PANDAS_PORTABLE_H_
3 | 
4 | #if defined(_MSC_VER)
5 | #define strcasecmp( s1, s2 ) _stricmp( s1, s2 )
6 | #endif
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/pandas/src/headers/stdint.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PANDAS_STDINT_H_
 2 | #define _PANDAS_STDINT_H_
 3 | 
 4 | #if defined(_MSC_VER)
 5 | #include "ms_stdint.h"
 6 | #else
 7 | #include <stdint.h>
 8 | #endif
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/pandas/src/helper.h:
--------------------------------------------------------------------------------
 1 | #ifndef C_HELPER_H
 2 | #define C_HELPER_H
 3 | 
 4 | #ifndef PANDAS_INLINE
 5 |   #if defined(__GNUC__)
 6 |     #define PANDAS_INLINE __inline__
 7 |   #elif defined(_MSC_VER)
 8 |     #define PANDAS_INLINE __inline
 9 |   #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
10 |     #define PANDAS_INLINE inline
11 |   #else
12 |     #define PANDAS_INLINE
13 |   #endif
14 | #endif
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/pandas/src/klib/khash_python.h:
--------------------------------------------------------------------------------
 1 | #include <Python.h>
 2 | 
 3 | #include "khash.h"
 4 | 
 5 | // kludge
 6 | 
 7 | #define kh_float64_hash_func _Py_HashDouble
 8 | #define kh_float64_hash_equal kh_int64_hash_equal
 9 | 
10 | #define KHASH_MAP_INIT_FLOAT64(name, khval_t)								\
11 | 	KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, kh_float64_hash_equal)
12 | 
13 | KHASH_MAP_INIT_FLOAT64(float64, size_t)
14 | 
15 | 
16 | int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
17 | 	int result = PyObject_RichCompareBool(a, b, Py_EQ);
18 | 	if (result < 0) {
19 | 		PyErr_Clear();
20 | 		return 0;
21 | 	}
22 | 	return result;
23 | }
24 | 
25 | 
26 | #define kh_python_hash_func(key) (PyObject_Hash(key))
27 | #define kh_python_hash_equal(a, b) (pyobject_cmp(a, b))
28 | 
29 | 
30 | // Python object
31 | 
32 | typedef PyObject* kh_pyobject_t;
33 | 
34 | #define KHASH_MAP_INIT_PYOBJECT(name, khval_t)							\
35 | 	KHASH_INIT(name, kh_pyobject_t, khval_t, 1,						\
36 | 			   kh_python_hash_func, kh_python_hash_equal)
37 | 
38 | KHASH_MAP_INIT_PYOBJECT(pymap, Py_ssize_t)
39 | 
40 | #define KHASH_SET_INIT_PYOBJECT(name)                                  \
41 | 	KHASH_INIT(name, kh_pyobject_t, char, 0,     \
42 | 			   kh_python_hash_func, kh_python_hash_equal)
43 | 
44 | KHASH_SET_INIT_PYOBJECT(pyset)
45 | 
46 | #define kh_exist_pymap(h, k) (kh_exist(h, k))
47 | #define kh_exist_pyset(h, k) (kh_exist(h, k))
48 | 
49 | KHASH_MAP_INIT_STR(strbox, kh_pyobject_t)
50 | 


--------------------------------------------------------------------------------
/pandas/src/klib/ktypes.h:
--------------------------------------------------------------------------------
1 | #ifndef __KTYPES_H
2 | #define __KTYPES_H
3 | 
4 | /* compipler specific configuration */
5 | 
6 | #endif /* __KTYPES_H */
7 | 


--------------------------------------------------------------------------------
/pandas/src/parser/.gitignore:
--------------------------------------------------------------------------------
1 | !*.c
2 | test*


--------------------------------------------------------------------------------
/pandas/src/parser/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHONBASE = /Library/Frameworks/EPD64.framework/Versions/Current
 2 | NUMPY_INC = /Library/Frameworks/EPD64.framework/Versions/7.1/lib/python2.7/site-packages/numpy/core/include
 3 | PYTHON_INC = -I$(PYTHONBASE)/include/python2.7 -I$(NUMPY_INC)
 4 | PYTHON_LINK = -L$(PYTHONBASE)/lib -lpython
 5 | 
 6 | SOURCES = conversions.c parser.c str_to.c
 7 | 
 8 | check-syntax:
 9 | 	gcc -g $(PYTHON_INC) -o /dev/null -S ${CHK_SOURCES}
10 | 
11 | test: $(SOURCES)
12 | 	gcc $(PYTHON_INC) -o test $(SOURCES)
13 | 	./test


--------------------------------------------------------------------------------
/pandas/src/parser/io.h:
--------------------------------------------------------------------------------
 1 | #include "Python.h"
 2 | #include "tokenizer.h"
 3 | 
 4 | 
 5 | typedef struct _file_source {
 6 |     /* The file being read. */
 7 |     FILE *fp;
 8 | 
 9 |     char *buffer;
10 |     /* Size of the file, in bytes. */
11 |     /* off_t size; */
12 | 
13 |     /* file position when the file_buffer was created. */
14 |     off_t initial_file_pos;
15 | 
16 |     /* Offset in the file of the data currently in the buffer. */
17 |     off_t buffer_file_pos;
18 | 
19 |     /* Actual number of bytes in the current buffer. (Can be less than buffer_size.) */
20 |     off_t last_pos;
21 | 
22 |     /* Size (in bytes) of the buffer. */
23 |     // off_t buffer_size;
24 | 
25 |     /* Pointer to the buffer. */
26 |     // char *buffer;
27 | 
28 | } file_source;
29 | 
30 | #define FS(source) ((file_source *)source)
31 | 
32 | #if !defined(_WIN32)
33 | #define HAVE_MMAP
34 | #endif
35 | 
36 | typedef struct _memory_map {
37 | 
38 |     FILE *fp;
39 | 
40 |     /* Size of the file, in bytes. */
41 |     off_t size;
42 | 
43 |     /* file position when the file_buffer was created. */
44 |     off_t initial_file_pos;
45 | 
46 |     int line_number;
47 | 
48 |     int fileno;
49 |     off_t position;
50 |     off_t last_pos;
51 |     char *memmap;
52 | 
53 | } memory_map;
54 | 
55 | #define MM(src) ((memory_map*) src)
56 | 
57 | void *new_mmap(char *fname);
58 | 
59 | int del_mmap(void *src);
60 | 
61 | void* buffer_mmap_bytes(void *source, size_t nbytes,
62 |                         size_t *bytes_read, int *status);
63 | 
64 | 
65 | typedef struct _rd_source {
66 |     PyObject* obj;
67 |     PyObject* buffer;
68 |     size_t position;
69 | } rd_source;
70 | 
71 | #define RDS(source) ((rd_source *)source)
72 | 
73 | void *new_file_source(char *fname, size_t buffer_size);
74 | 
75 | void *new_rd_source(PyObject *obj);
76 | 
77 | int del_file_source(void *src);
78 | int del_rd_source(void *src);
79 | 
80 | void* buffer_file_bytes(void *source, size_t nbytes,
81 |                         size_t *bytes_read, int *status);
82 | 
83 | void* buffer_rd_bytes(void *source, size_t nbytes,
84 |                       size_t *bytes_read, int *status);
85 | 
86 | 


--------------------------------------------------------------------------------
/pandas/src/properties.pyx:
--------------------------------------------------------------------------------
 1 | from cpython cimport PyDict_Contains, PyDict_GetItem, PyDict_GetItem
 2 | 
 3 | 
 4 | cdef class cache_readonly(object):
 5 | 
 6 |     cdef readonly:
 7 |         object func, name, allow_setting
 8 | 
 9 |     def __init__(self, func=None, allow_setting=False):
10 |         if func is not None:
11 |             self.func = func
12 |             self.name = func.__name__
13 |         self.allow_setting = allow_setting
14 | 
15 |     def __call__(self, func, doc=None):
16 |         self.func = func
17 |         self.name = func.__name__
18 |         return self
19 | 
20 |     def __get__(self, obj, typ):
21 |         # Get the cache or set a default one if needed
22 | 
23 |         cache = getattr(obj, '_cache', None)
24 |         if cache is None:
25 |             try:
26 |                 cache = obj._cache = {}
27 |             except (AttributeError):
28 |                 return
29 | 
30 |         if PyDict_Contains(cache, self.name):
31 |             # not necessary to Py_INCREF
32 |             val = <object> PyDict_GetItem(cache, self.name)
33 |         else:
34 |             val = self.func(obj)
35 |             PyDict_SetItem(cache, self.name, val)
36 |         return val
37 | 
38 |     def __set__(self, obj, value):
39 | 
40 |         if not self.allow_setting:
41 |             raise Exception("cannot set values for [%s]" % self.name)
42 | 
43 |         # Get the cache or set a default one if needed
44 |         cache = getattr(obj, '_cache', None)
45 |         if cache is None:
46 |             try:
47 |                 cache = obj._cache = {}
48 |             except (AttributeError):
49 |                 return
50 | 
51 |         PyDict_SetItem(cache, self.name, value)
52 | 
53 | cdef class AxisProperty(object):
54 |     cdef:
55 |         Py_ssize_t axis
56 | 
57 |     def __init__(self, axis=0):
58 |         self.axis = axis
59 | 
60 |     def __get__(self, obj, type):
61 |         cdef list axes = obj._data.axes
62 |         return axes[self.axis]
63 | 
64 |     def __set__(self, obj, value):
65 |         obj._set_axis(self.axis, value)
66 | 


--------------------------------------------------------------------------------
/pandas/src/skiplist.pxd:
--------------------------------------------------------------------------------
 1 | cdef extern from "skiplist.h":
 2 |     ctypedef struct node_t:
 3 |         double value
 4 |         int is_nil
 5 |         int levels
 6 |         node_t **next
 7 |         int *width
 8 |         int ref_count
 9 | 
10 |     ctypedef struct skiplist_t:
11 |         node_t *head
12 |         int size, maxlevels
13 |         node_t **tmp_chain
14 |         int *tmp_steps
15 | 
16 |     inline skiplist_t* skiplist_init(int)
17 |     inline void skiplist_destroy(skiplist_t*)
18 |     inline double skiplist_get(skiplist_t*, int, int*)
19 |     inline int skiplist_insert(skiplist_t*, double)
20 |     inline int skiplist_remove(skiplist_t*, double)
21 | 
22 | 


--------------------------------------------------------------------------------
/pandas/src/ujson/python/version.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 |     * Redistributions of source code must retain the above copyright
 8 |       notice, this list of conditions and the following disclaimer.
 9 |     * Redistributions in binary form must reproduce the above copyright
10 |       notice, this list of conditions and the following disclaimer in the
11 |       documentation and/or other materials provided with the distribution.
12 |     * Neither the name of the ESN Social Software AB nor the
13 |       names of its contributors may be used to endorse or promote products
14 |       derived from this software without specific prior written permission.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE 
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | 
28 | Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
29 | http://code.google.com/p/stringencoders/
30 | Copyright (c) 2007  Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
31 | 
32 | Numeric decoder derived from from TCL library
33 | http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
34 |  * Copyright (c) 1988-1993 The Regents of the University of California.
35 |  * Copyright (c) 1994 Sun Microsystems, Inc.
36 | */
37 | 
38 | #define UJSON_VERSION "1.33"
39 | 


--------------------------------------------------------------------------------
/pandas/stats/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/stats/__init__.py


--------------------------------------------------------------------------------
/pandas/stats/api.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Common namespace of statistical functions
 3 | """
 4 | 
 5 | # pylint: disable-msg=W0611,W0614,W0401
 6 | 
 7 | from pandas.stats.moments import *
 8 | from pandas.stats.interface import ols
 9 | from pandas.stats.fama_macbeth import fama_macbeth
10 | 


--------------------------------------------------------------------------------
/pandas/stats/common.py:
--------------------------------------------------------------------------------
 1 | 
 2 | _WINDOW_TYPES = {
 3 |     0: 'full_sample',
 4 |     1: 'rolling',
 5 |     2: 'expanding'
 6 | }
 7 | # also allow 'rolling' as key
 8 | _WINDOW_TYPES.update((v, v) for k,v in list(_WINDOW_TYPES.items()))
 9 | _ADDITIONAL_CLUSTER_TYPES = set(("entity", "time"))
10 | 
11 | def _get_cluster_type(cluster_type):
12 |     # this was previous behavior
13 |     if cluster_type is None:
14 |         return cluster_type
15 |     try:
16 |         return _get_window_type(cluster_type)
17 |     except ValueError:
18 |         final_type = str(cluster_type).lower().replace("_", " ")
19 |         if final_type in _ADDITIONAL_CLUSTER_TYPES:
20 |             return final_type
21 |         raise ValueError('Unrecognized cluster type: %s' % cluster_type)
22 | 
23 | def _get_window_type(window_type):
24 |     # e.g., 0, 1, 2
25 |     final_type = _WINDOW_TYPES.get(window_type)
26 |     # e.g., 'full_sample'
27 |     final_type = final_type or _WINDOW_TYPES.get(str(window_type).lower().replace(" ", "_"))
28 |     if final_type is None:
29 |         raise ValueError('Unrecognized window type: %s' % window_type)
30 |     return final_type
31 | 
32 | def banner(text, width=80):
33 |     """
34 | 
35 |     """
36 |     toFill = width - len(text)
37 | 
38 |     left = toFill // 2
39 |     right = toFill - left
40 | 
41 |     return '%s%s%s' % ('-' * left, text, '-' * right)
42 | 


--------------------------------------------------------------------------------
/pandas/stats/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/stats/tests/__init__.py


--------------------------------------------------------------------------------
/pandas/stats/tests/test_fama_macbeth.py:
--------------------------------------------------------------------------------
 1 | from pandas import DataFrame, Panel
 2 | from pandas.stats.api import fama_macbeth
 3 | from .common import assert_almost_equal, BaseTest
 4 | 
 5 | from pandas.compat import range
 6 | from pandas import compat
 7 | import numpy as np
 8 | 
 9 | 
10 | class TestFamaMacBeth(BaseTest):
11 |     def testFamaMacBethRolling(self):
12 |         # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y,
13 |         #                               nw_lags_beta=2)
14 | 
15 |         # df = DataFrame(np.random.randn(50, 10))
16 |         x = dict((k, DataFrame(np.random.randn(50, 10))) for k in 'abcdefg')
17 |         x = Panel.from_dict(x)
18 |         y = (DataFrame(np.random.randn(50, 10)) +
19 |              DataFrame(0.01 * np.random.randn(50, 10)))
20 |         self.checkFamaMacBethExtended('rolling', x, y, nw_lags_beta=2)
21 |         self.checkFamaMacBethExtended('expanding', x, y, nw_lags_beta=2)
22 | 
23 |     def checkFamaMacBethExtended(self, window_type, x, y, **kwds):
24 |         window = 25
25 | 
26 |         result = fama_macbeth(y=y, x=x, window_type=window_type, window=window,
27 |                               **kwds)
28 |         self._check_stuff_works(result)
29 | 
30 |         index = result._index
31 |         time = len(index)
32 | 
33 |         for i in range(time - window + 1):
34 |             if window_type == 'rolling':
35 |                 start = index[i]
36 |             else:
37 |                 start = index[0]
38 | 
39 |             end = index[i + window - 1]
40 | 
41 |             x2 = {}
42 |             for k, v in compat.iteritems(x):
43 |                 x2[k] = v.truncate(start, end)
44 |             y2 = y.truncate(start, end)
45 | 
46 |             reference = fama_macbeth(y=y2, x=x2, **kwds)
47 |             assert_almost_equal(reference._stats, result._stats[:, i])
48 | 
49 |         static = fama_macbeth(y=y2, x=x2, **kwds)
50 |         self._check_stuff_works(static)
51 | 
52 |     def _check_stuff_works(self, result):
53 |         # does it work?
54 |         attrs = ['mean_beta', 'std_beta', 't_stat']
55 |         for attr in attrs:
56 |             getattr(result, attr)
57 | 
58 |         # does it work?
59 |         result.summary
60 | 
61 | if __name__ == '__main__':
62 |     import nose
63 |     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
64 |                    exit=False)
65 | 


--------------------------------------------------------------------------------
/pandas/stats/tests/test_math.py:
--------------------------------------------------------------------------------
 1 | import nose
 2 | 
 3 | from datetime import datetime
 4 | from numpy.random import randn
 5 | import numpy as np
 6 | 
 7 | from pandas.core.api import Series, DataFrame, date_range
 8 | from pandas.util.testing import assert_almost_equal
 9 | import pandas.core.datetools as datetools
10 | import pandas.stats.moments as mom
11 | import pandas.util.testing as tm
12 | import pandas.stats.math as pmath
13 | import pandas.tests.test_series as ts
14 | from pandas import ols
15 | 
16 | N, K = 100, 10
17 | 
18 | _have_statsmodels = True
19 | try:
20 |     import statsmodels.api as sm
21 | except ImportError:
22 |     try:
23 |         import scikits.statsmodels.api as sm
24 |     except ImportError:
25 |         _have_statsmodels = False
26 | 
27 | 
28 | class TestMath(tm.TestCase):
29 | 
30 |     _nan_locs = np.arange(20, 40)
31 |     _inf_locs = np.array([])
32 | 
33 |     def setUp(self):
34 |         arr = randn(N)
35 |         arr[self._nan_locs] = np.NaN
36 | 
37 |         self.arr = arr
38 |         self.rng = date_range(datetime(2009, 1, 1), periods=N)
39 | 
40 |         self.series = Series(arr.copy(), index=self.rng)
41 | 
42 |         self.frame = DataFrame(randn(N, K), index=self.rng,
43 |                                columns=np.arange(K))
44 | 
45 |     def test_rank_1d(self):
46 |         self.assertEqual(1, pmath.rank(self.series))
47 |         self.assertEqual(0, pmath.rank(Series(0, self.series.index)))
48 | 
49 |     def test_solve_rect(self):
50 |         if not _have_statsmodels:
51 |             raise nose.SkipTest("no statsmodels")
52 | 
53 |         b = Series(np.random.randn(N), self.frame.index)
54 |         result = pmath.solve(self.frame, b)
55 |         expected = ols(y=b, x=self.frame, intercept=False).beta
56 |         self.assertTrue(np.allclose(result, expected))
57 | 
58 |     def test_inv_illformed(self):
59 |         singular = DataFrame(np.array([[1, 1], [2, 2]]))
60 |         rs = pmath.inv(singular)
61 |         expected = np.array([[0.1, 0.2], [0.1, 0.2]])
62 |         self.assertTrue(np.allclose(rs, expected))
63 | 
64 | if __name__ == '__main__':
65 |     import nose
66 |     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
67 |                    exit=False)
68 | 


--------------------------------------------------------------------------------
/pandas/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tests/__init__.py


--------------------------------------------------------------------------------
/pandas/tests/data/mindex_073.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tests/data/mindex_073.pickle


--------------------------------------------------------------------------------
/pandas/tests/data/multiindex_v1.pickle:
--------------------------------------------------------------------------------
  1 | cnumpy.core.multiarray
  2 | _reconstruct
  3 | p0
  4 | (cpandas.core.index
  5 | MultiIndex
  6 | p1
  7 | (I0
  8 | tp2
  9 | S'b'
 10 | p3
 11 | tp4
 12 | Rp5
 13 | ((I1
 14 | (I10
 15 | tp6
 16 | cnumpy
 17 | dtype
 18 | p7
 19 | (S'O8'
 20 | p8
 21 | I0
 22 | I1
 23 | tp9
 24 | Rp10
 25 | (I3
 26 | S'|'
 27 | p11
 28 | NNNI-1
 29 | I-1
 30 | I63
 31 | tp12
 32 | bI00
 33 | (lp13
 34 | I0
 35 | aI1
 36 | aI2
 37 | aI3
 38 | aI4
 39 | aI5
 40 | aI6
 41 | aI7
 42 | aI8
 43 | aI9
 44 | atp14
 45 | ((lp15
 46 | g0
 47 | (cpandas.core.index
 48 | Index
 49 | p16
 50 | (I0
 51 | tp17
 52 | g3
 53 | tp18
 54 | Rp19
 55 | ((I1
 56 | (I4
 57 | tp20
 58 | g10
 59 | I00
 60 | (lp21
 61 | S'foo'
 62 | p22
 63 | aS'bar'
 64 | p23
 65 | aS'baz'
 66 | p24
 67 | aS'qux'
 68 | p25
 69 | atp26
 70 | (S'first'
 71 | p27
 72 | tp28
 73 | tp29
 74 | bag0
 75 | (g16
 76 | (I0
 77 | tp30
 78 | g3
 79 | tp31
 80 | Rp32
 81 | ((I1
 82 | (I3
 83 | tp33
 84 | g10
 85 | I00
 86 | (lp34
 87 | S'one'
 88 | p35
 89 | aS'two'
 90 | p36
 91 | aS'three'
 92 | p37
 93 | atp38
 94 | (S'second'
 95 | p39
 96 | tp40
 97 | tp41
 98 | ba(lp42
 99 | g0
100 | (cnumpy
101 | ndarray
102 | p43
103 | (I0
104 | tp44
105 | g3
106 | tp45
107 | Rp46
108 | (I1
109 | (I10
110 | tp47
111 | g7
112 | (S'i4'
113 | p48
114 | I0
115 | I1
116 | tp49
117 | Rp50
118 | (I3
119 | S'<'
120 | p51
121 | NNNI-1
122 | I-1
123 | I0
124 | tp52
125 | bI00
126 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00'
127 | p53
128 | tp54
129 | bag0
130 | (g43
131 | (I0
132 | tp55
133 | g3
134 | tp56
135 | Rp57
136 | (I1
137 | (I10
138 | tp58
139 | g50
140 | I00
141 | S'\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00'
142 | p59
143 | tp60
144 | baN(lp61
145 | g27
146 | ag39
147 | atp62
148 | tp63
149 | b.


--------------------------------------------------------------------------------
/pandas/tests/data/unicode_series.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tests/data/unicode_series.csv


--------------------------------------------------------------------------------
/pandas/tests/test_msgpack/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tests/test_msgpack/__init__.py


--------------------------------------------------------------------------------
/pandas/tests/test_msgpack/test_buffer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from pandas.msgpack import packb, unpackb
 5 | 
 6 | 
 7 | def test_unpack_buffer():
 8 |     from array import array
 9 |     buf = array('b')
10 |     buf.fromstring(packb(('foo', 'bar')))
11 |     obj = unpackb(buf, use_list=1)
12 |     assert [b'foo', b'bar'] == obj
13 | 


--------------------------------------------------------------------------------
/pandas/tests/test_msgpack/test_except.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import unittest
 5 | import nose
 6 | 
 7 | import datetime
 8 | from pandas.msgpack import packb, unpackb
 9 | 
10 | class DummyException(Exception):
11 |     pass
12 | 
13 | class TestExceptions(unittest.TestCase):
14 | 
15 |     def test_raise_on_find_unsupported_value(self):
16 |         import datetime
17 |         self.assertRaises(TypeError, packb, datetime.datetime.now())
18 | 
19 |     def test_raise_from_object_hook(self):
20 |         def hook(obj):
21 |             raise DummyException
22 |         self.assertRaises(DummyException, unpackb, packb({}), object_hook=hook)
23 |         self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_hook=hook)
24 |         self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_pairs_hook=hook)
25 |         self.assertRaises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_hook=hook)
26 |         self.assertRaises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook)
27 | 
28 |     def test_invalidvalue(self):
29 |         self.assertRaises(ValueError, unpackb, b'\xd9\x97#DL_')
30 | 


--------------------------------------------------------------------------------
/pandas/tests/test_msgpack/test_format.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from pandas.msgpack import unpackb
 5 | 
 6 | def check(src, should, use_list=0):
 7 |     assert unpackb(src, use_list=use_list) == should
 8 | 
 9 | def testSimpleValue():
10 |     check(b"\x93\xc0\xc2\xc3",
11 |             (None, False, True,))
12 | 
13 | def testFixnum():
14 |     check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff",
15 |           ((0,64,127,), (-32,-16,-1,),)
16 |           )
17 | 
18 | def testFixArray():
19 |     check(b"\x92\x90\x91\x91\xc0",
20 |           ((),((None,),),),
21 |           )
22 | 
23 | def testFixRaw():
24 |     check(b"\x94\xa0\xa1a\xa2bc\xa3def",
25 |           (b"", b"a", b"bc", b"def",),
26 |           )
27 | 
28 | def testFixMap():
29 |     check(
30 |           b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80",
31 |           {False: {None: None}, True:{None:{}}},
32 |           )
33 | 
34 | def testUnsignedInt():
35 |     check(
36 |           b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00"
37 |           b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00"
38 |           b"\xce\xff\xff\xff\xff",
39 |           (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,),
40 |           )
41 | 
42 | def testSignedInt():
43 |     check(b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00"
44 |           b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00"
45 |           b"\xd2\xff\xff\xff\xff",
46 |           (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,))
47 | 
48 | def testRaw():
49 |     check(b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00"
50 |         b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
51 |         (b"", b"a", b"ab", b"", b"a", b"ab"))
52 | 
53 | def testArray():
54 |     check(b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00"
55 |         b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02"
56 |         b"\xc2\xc3",
57 |         ((), (None,), (False,True), (), (None,), (False,True))
58 |         )
59 | 
60 | def testMap():
61 |     check(
62 |         b"\x96"
63 |             b"\xde\x00\x00"
64 |             b"\xde\x00\x01\xc0\xc2"
65 |             b"\xde\x00\x02\xc0\xc2\xc3\xc2"
66 |             b"\xdf\x00\x00\x00\x00"
67 |             b"\xdf\x00\x00\x00\x01\xc0\xc2"
68 |             b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2",
69 |         ({}, {None: False}, {True: False, None: False}, {},
70 |             {None: False}, {True: False, None: False}))
71 | 


--------------------------------------------------------------------------------
/pandas/tests/test_msgpack/test_read_size.py:
--------------------------------------------------------------------------------
 1 | """Test Unpacker's read_array_header and read_map_header methods"""
 2 | from pandas.msgpack import packb, Unpacker, OutOfData
 3 | UnexpectedTypeException = ValueError
 4 | 
 5 | def test_read_array_header():
 6 |     unpacker = Unpacker()
 7 |     unpacker.feed(packb(['a', 'b', 'c']))
 8 |     assert unpacker.read_array_header() == 3
 9 |     assert unpacker.unpack() == b'a'
10 |     assert unpacker.unpack() == b'b'
11 |     assert unpacker.unpack() == b'c'
12 |     try:
13 |         unpacker.unpack()
14 |         assert 0, 'should raise exception'
15 |     except OutOfData:
16 |         assert 1, 'okay'
17 | 
18 | 
19 | def test_read_map_header():
20 |     unpacker = Unpacker()
21 |     unpacker.feed(packb({'a': 'A'}))
22 |     assert unpacker.read_map_header() == 1
23 |     assert unpacker.unpack() == B'a'
24 |     assert unpacker.unpack() == B'A'
25 |     try:
26 |         unpacker.unpack()
27 |         assert 0, 'should raise exception'
28 |     except OutOfData:
29 |         assert 1, 'okay'
30 | 
31 | def test_incorrect_type_array():
32 |     unpacker = Unpacker()
33 |     unpacker.feed(packb(1))
34 |     try:
35 |         unpacker.read_array_header()
36 |         assert 0, 'should raise exception'
37 |     except UnexpectedTypeException:
38 |         assert 1, 'okay'
39 | 
40 | def test_incorrect_type_map():
41 |     unpacker = Unpacker()
42 |     unpacker.feed(packb(1))
43 |     try:
44 |         unpacker.read_map_header()
45 |         assert 0, 'should raise exception'
46 |     except UnexpectedTypeException:
47 |         assert 1, 'okay'
48 | 
49 | def test_correct_type_nested_array():
50 |     unpacker = Unpacker()
51 |     unpacker.feed(packb({'a': ['b', 'c', 'd']}))
52 |     try:
53 |         unpacker.read_array_header()
54 |         assert 0, 'should raise exception'
55 |     except UnexpectedTypeException:
56 |         assert 1, 'okay'
57 | 
58 | def test_incorrect_type_nested_map():
59 |     unpacker = Unpacker()
60 |     unpacker.feed(packb([{'a': 'b'}]))
61 |     try:
62 |         unpacker.read_map_header()
63 |         assert 0, 'should raise exception'
64 |     except UnexpectedTypeException:
65 |         assert 1, 'okay'
66 | 


--------------------------------------------------------------------------------
/pandas/tests/test_msgpack/test_seq.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from pandas import compat
 5 | from pandas.compat import u
 6 | import pandas.msgpack as msgpack
 7 | 
 8 | binarydata = [chr(i) for i in range(256)]
 9 | binarydata = "".join(binarydata)
10 | if compat.PY3:
11 |     binarydata = binarydata.encode('utf-8')
12 | 
13 | def gen_binary_data(idx):
14 |     data = binarydata[:idx % 300]
15 |     return data
16 | 
17 | def test_exceeding_unpacker_read_size():
18 |     dumpf = compat.BytesIO()
19 | 
20 |     packer = msgpack.Packer()
21 | 
22 |     NUMBER_OF_STRINGS = 6
23 |     read_size = 16
24 |                                 # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop):
25 |                                 # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev)
26 |                                 # 40 ok for read_size=1024, while 50 introduces errors
27 |                                 # 7000 ok for read_size=1024*1024, while 8000 leads to  glibc detected *** python: double free or corruption (!prev):
28 | 
29 |     for idx in range(NUMBER_OF_STRINGS):
30 |         data = gen_binary_data(idx)
31 |         dumpf.write(packer.pack(data))
32 | 
33 |     f = compat.BytesIO(dumpf.getvalue())
34 |     dumpf.close()
35 | 
36 |     unpacker = msgpack.Unpacker(f, read_size=read_size, use_list=1)
37 | 
38 |     read_count = 0
39 |     for idx, o in enumerate(unpacker):
40 |         assert type(o) == bytes
41 |         assert o == gen_binary_data(idx)
42 |         read_count += 1
43 | 
44 |     assert read_count == NUMBER_OF_STRINGS
45 | 


--------------------------------------------------------------------------------
/pandas/tests/test_msgpack/test_subtype.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from pandas.msgpack import packb, unpackb
 5 | from collections import namedtuple
 6 | 
 7 | class MyList(list):
 8 |     pass
 9 | 
10 | class MyDict(dict):
11 |     pass
12 | 
13 | class MyTuple(tuple):
14 |     pass
15 | 
16 | MyNamedTuple = namedtuple('MyNamedTuple', 'x y')
17 | 
18 | def test_types():
19 |     assert packb(MyDict()) == packb(dict())
20 |     assert packb(MyList()) == packb(list())
21 |     assert packb(MyNamedTuple(1, 2)) == packb((1, 2))
22 | 


--------------------------------------------------------------------------------
/pandas/tests/test_msgpack/test_unpack_raw.py:
--------------------------------------------------------------------------------
 1 | """Tests for cases where the user seeks to obtain packed msgpack objects"""
 2 | 
 3 | from pandas import compat
 4 | from pandas.msgpack import Unpacker, packb
 5 | 
 6 | def test_write_bytes():
 7 |     unpacker = Unpacker()
 8 |     unpacker.feed(b'abc')
 9 |     f = compat.BytesIO()
10 |     assert unpacker.unpack(f.write) == ord('a')
11 |     assert f.getvalue() == b'a'
12 |     f = compat.BytesIO()
13 |     assert unpacker.skip(f.write) is None
14 |     assert f.getvalue() == b'b'
15 |     f = compat.BytesIO()
16 |     assert unpacker.skip() is None
17 |     assert f.getvalue() == b''
18 | 
19 | 
20 | def test_write_bytes_multi_buffer():
21 |     long_val = (5) * 100
22 |     expected = packb(long_val)
23 |     unpacker = Unpacker(compat.BytesIO(expected), read_size=3, max_buffer_size=3)
24 | 
25 |     f = compat.BytesIO()
26 |     unpacked = unpacker.unpack(f.write)
27 |     assert unpacked == long_val
28 |     assert f.getvalue() == expected
29 | 


--------------------------------------------------------------------------------
/pandas/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tools/__init__.py


--------------------------------------------------------------------------------
/pandas/tools/describe.py:
--------------------------------------------------------------------------------
 1 | from pandas.core.series import Series
 2 | 
 3 | 
 4 | def value_range(df):
 5 |     """
 6 |     Return the minimum and maximum of a dataframe in a series object
 7 | 
 8 |     Parameters
 9 |     ----------
10 |     df : DataFrame
11 | 
12 |     Returns
13 |     -------
14 |     (maximum, minimum) : Series
15 | 
16 |     """
17 |     return Series((min(df.min()), max(df.max())), ('Minimum', 'Maximum'))
18 | 


--------------------------------------------------------------------------------
/pandas/tools/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tools/tests/__init__.py


--------------------------------------------------------------------------------
/pandas/tools/tests/test_tools.py:
--------------------------------------------------------------------------------
 1 | from pandas import DataFrame
 2 | from pandas.tools.describe import value_range
 3 | 
 4 | import numpy as np
 5 | import pandas.util.testing as tm
 6 | 
 7 | 
 8 | class TestTools(tm.TestCase):
 9 | 
10 |     def test_value_range(self):
11 |         df = DataFrame(np.random.randn(5, 5))
12 |         df.ix[0, 2] = -5
13 |         df.ix[2, 0] = 5
14 | 
15 |         res = value_range(df)
16 | 
17 |         self.assertEqual(res['Minimum'], -5)
18 |         self.assertEqual(res['Maximum'], 5)
19 | 
20 |         df.ix[0, 1] = np.NaN
21 | 
22 |         self.assertEqual(res['Minimum'], -5)
23 |         self.assertEqual(res['Maximum'], 5)
24 | 


--------------------------------------------------------------------------------
/pandas/tools/util.py:
--------------------------------------------------------------------------------
 1 | import operator
 2 | from pandas.compat import reduce
 3 | from pandas.core.index import Index
 4 | import numpy as np
 5 | from pandas import algos
 6 | from pandas.core import common as com
 7 | 
 8 | 
 9 | def match(needles, haystack):
10 |     haystack = Index(haystack)
11 |     needles = Index(needles)
12 |     return haystack.get_indexer(needles)
13 | 
14 | 
15 | def cartesian_product(X):
16 |     '''
17 |     Numpy version of itertools.product or pandas.compat.product.
18 |     Sometimes faster (for large inputs)...
19 | 
20 |     Examples
21 |     --------
22 |     >>> cartesian_product([list('ABC'), [1, 2]])
23 |     [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'),
24 |     array([1, 2, 1, 2, 1, 2])]
25 | 
26 |     '''
27 | 
28 |     lenX = np.fromiter((len(x) for x in X), dtype=int)
29 |     cumprodX = np.cumproduct(lenX)
30 | 
31 |     a = np.roll(cumprodX, 1)
32 |     a[0] = 1
33 | 
34 |     b = cumprodX[-1] / cumprodX
35 | 
36 |     return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]),
37 |                     np.product(a[i]))
38 |                for i, x in enumerate(X)]
39 | 
40 | 
41 | def _compose2(f, g):
42 |     """Compose 2 callables"""
43 |     return lambda *args, **kwargs: f(g(*args, **kwargs))
44 | 
45 | 
46 | def compose(*funcs):
47 |     """Compose 2 or more callables"""
48 |     assert len(funcs) > 1, 'At least 2 callables must be passed to compose'
49 |     return reduce(_compose2, funcs)
50 | 


--------------------------------------------------------------------------------
/pandas/tseries/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/__init__.py


--------------------------------------------------------------------------------
/pandas/tseries/api.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | """
 4 | 
 5 | 
 6 | from pandas.tseries.index import DatetimeIndex, date_range, bdate_range
 7 | from pandas.tseries.frequencies import infer_freq
 8 | from pandas.tseries.period import Period, PeriodIndex, period_range, pnow
 9 | from pandas.tseries.resample import TimeGrouper
10 | from pandas.tseries.timedeltas import to_timedelta
11 | from pandas.lib import NaT
12 | import pandas.tseries.offsets as offsets
13 | 


--------------------------------------------------------------------------------
/pandas/tseries/interval.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from pandas.core.index import Index
 4 | 
 5 | 
 6 | class Interval(object):
 7 |     """
 8 |     Represents an interval of time defined by two timestamps
 9 |     """
10 | 
11 |     def __init__(self, start, end):
12 |         self.start = start
13 |         self.end = end
14 | 
15 | 
16 | class PeriodInterval(object):
17 |     """
18 |     Represents an interval of time defined by two Period objects (time ordinals)
19 |     """
20 | 
21 |     def __init__(self, start, end):
22 |         self.start = start
23 |         self.end = end
24 | 
25 | 
26 | class IntervalIndex(Index):
27 |     """
28 | 
29 |     """
30 |     def __new__(self, starts, ends):
31 |         pass
32 | 
33 |     def dtype(self):
34 |         return self.values.dtype
35 | 
36 | if __name__ == '__main__':
37 |     pass
38 | 


--------------------------------------------------------------------------------
/pandas/tseries/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/tests/__init__.py


--------------------------------------------------------------------------------
/pandas/tseries/tests/data/daterange_073.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/tests/data/daterange_073.pickle


--------------------------------------------------------------------------------
/pandas/tseries/tests/data/frame.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/tests/data/frame.pickle


--------------------------------------------------------------------------------
/pandas/tseries/tests/data/series.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/tests/data/series.pickle


--------------------------------------------------------------------------------
/pandas/tseries/tests/data/series_daterange0.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/tests/data/series_daterange0.pickle


--------------------------------------------------------------------------------
/pandas/tslib.pxd:
--------------------------------------------------------------------------------
1 | from numpy cimport ndarray, int64_t
2 | 
3 | cdef convert_to_tsobject(object, object, object)
4 | cdef convert_to_timedelta64(object, object, object)
5 | 


--------------------------------------------------------------------------------
/pandas/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/util/__init__.py


--------------------------------------------------------------------------------
/pandas/util/misc.py:
--------------------------------------------------------------------------------
 1 | """ various miscellaneous utilities """
 2 | 
 3 | def is_little_endian():
 4 |     """ am I little endian """
 5 |     import sys
 6 |     return sys.byteorder == 'little'
 7 | 
 8 | def exclusive(*args):
 9 |     count = sum([arg is not None for arg in args])
10 |     return count == 1
11 | 


--------------------------------------------------------------------------------
/scripts/bench_join.R:
--------------------------------------------------------------------------------
 1 | library(xts)
 2 | 
 3 | iterations <- 50
 4 | 
 5 | ns = c(100, 1000, 10000, 100000, 1000000)
 6 | kinds = c("outer", "left", "inner")
 7 | 
 8 | result = matrix(0, nrow=3, ncol=length(ns))
 9 | n <- 100000
10 | pct.overlap <- 0.2
11 | 
12 | k <- 1
13 | 
14 | for (ni in 1:length(ns)){
15 |  n <- ns[ni]
16 |  rng1 <- 1:n
17 |  offset <- as.integer(n * pct.overlap)
18 |  rng2 <- rng1 + offset
19 |  x <- xts(matrix(rnorm(n * k), nrow=n, ncol=k),
20 |           as.POSIXct(Sys.Date()) + rng1)
21 |  y <- xts(matrix(rnorm(n * k), nrow=n, ncol=k),
22 |           as.POSIXct(Sys.Date()) + rng2)
23 |  timing <- numeric()
24 |  for (i in 1:3) {
25 |      kind = kinds[i]
26 |      for(j in 1:iterations) {
27 |        gc()  # just to be sure
28 |        timing[j] <- system.time(merge(x,y,join=kind))[3]
29 |      }
30 |      #timing <- system.time(for (j in 1:iterations) merge.xts(x, y, join=kind),
31 |      #                      gcFirst=F)
32 |      #timing <- as.list(timing)
33 |      result[i, ni] <- mean(timing) * 1000
34 |      #result[i, ni] = (timing$elapsed / iterations) * 1000
35 |    }
36 | }
37 | 
38 | rownames(result) <- kinds
39 | colnames(result) <- log10(ns)
40 | 
41 | mat <- matrix(rnorm(500000), nrow=100000, ncol=5)
42 | set.seed(12345)
43 | indexer <- sample(1:100000)
44 | 
45 | timing <- rep(0, 10)
46 | for (i in 1:10) {
47 |   gc()
48 |   timing[i] = system.time(mat[indexer,])[3]
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/scripts/bench_join_multi.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | 
 3 | import numpy as np
 4 | from pandas.compat import zip, range, lzip
 5 | from pandas.util.testing import rands
 6 | import pandas.lib as lib
 7 | 
 8 | N = 100000
 9 | 
10 | key1 = [rands(10) for _ in range(N)]
11 | key2 = [rands(10) for _ in range(N)]
12 | 
13 | zipped = lzip(key1, key2)
14 | 
15 | 
16 | def _zip(*args):
17 |     arr = np.empty(N, dtype=object)
18 |     arr[:] = lzip(*args)
19 |     return arr
20 | 
21 | 
22 | def _zip2(*args):
23 |     return lib.list_to_object_array(lzip(*args))
24 | 
25 | index = MultiIndex.from_arrays([key1, key2])
26 | to_join = DataFrame({'j1': np.random.randn(100000)}, index=index)
27 | 
28 | data = DataFrame({'A': np.random.randn(500000),
29 |                   'key1': np.repeat(key1, 5),
30 |                   'key2': np.repeat(key2, 5)})
31 | 
32 | # data.join(to_join, on=['key1', 'key2'])
33 | 


--------------------------------------------------------------------------------
/scripts/bench_refactor.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | from pandas.compat import range
 3 | try:
 4 |     import pandas.core.internals as internals
 5 |     reload(internals)
 6 |     import pandas.core.frame as frame
 7 |     reload(frame)
 8 |     from pandas.core.frame import DataFrame as DataMatrix
 9 | except ImportError:
10 |     pass
11 | 
12 | N = 1000
13 | K = 500
14 | 
15 | 
16 | def horribly_unconsolidated():
17 |     index = np.arange(N)
18 | 
19 |     df = DataMatrix(index=index)
20 | 
21 |     for i in range(K):
22 |         df[i] = float(K)
23 | 
24 |     return df
25 | 
26 | 
27 | def bench_reindex_index(df, it=100):
28 |     new_idx = np.arange(0, N, 2)
29 |     for i in range(it):
30 |         df.reindex(new_idx)
31 | 
32 | 
33 | def bench_reindex_columns(df, it=100):
34 |     new_cols = np.arange(0, K, 2)
35 |     for i in range(it):
36 |         df.reindex(columns=new_cols)
37 | 
38 | 
39 | def bench_join_index(df, it=10):
40 |     left = df.reindex(index=np.arange(0, N, 2),
41 |                       columns=np.arange(K // 2))
42 |     right = df.reindex(columns=np.arange(K // 2 + 1, K))
43 |     for i in range(it):
44 |         joined = left.join(right)
45 | 
46 | if __name__ == '__main__':
47 |     df = horribly_unconsolidated()
48 |     left = df.reindex(index=np.arange(0, N, 2),
49 |                       columns=np.arange(K // 2))
50 |     right = df.reindex(columns=np.arange(K // 2 + 1, K))
51 |     bench_join_index(df)
52 | 


--------------------------------------------------------------------------------
/scripts/boxplot_test.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | import random
 4 | import pandas.util.testing as tm
 5 | tm.N = 1000
 6 | df = tm.makeTimeDataFrame()
 7 | import string
 8 | foo = list(string.letters[:5]) * 200
 9 | df['indic'] = list(string.letters[:5]) * 200
10 | random.shuffle(foo)
11 | df['indic2'] = foo
12 | df.boxplot(by=['indic', 'indic2'], fontsize=8, rot=90)
13 | 
14 | plt.show()
15 | 


--------------------------------------------------------------------------------
/scripts/count_code.sh:
--------------------------------------------------------------------------------
1 | cloc pandas --force-lang=Python,pyx --not-match-f="parser.c|lib.c|tslib.c|sandbox.c|hashtable.c|sparse.c|algos.c|index.c"


--------------------------------------------------------------------------------
/scripts/faster_xs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import pandas.util.testing as tm
 4 | 
 5 | from pandas.core.internals import _interleaved_dtype
 6 | 
 7 | df = tm.makeDataFrame()
 8 | 
 9 | df['E'] = 'foo'
10 | df['F'] = 'foo'
11 | df['G'] = 2
12 | df['H'] = df['A'] > 0
13 | 
14 | blocks = df._data.blocks
15 | items = df.columns
16 | 


--------------------------------------------------------------------------------
/scripts/git_code_churn.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | import re
 4 | import sys
 5 | 
 6 | import numpy as np
 7 | 
 8 | from pandas import *
 9 | 
10 | 
11 | if __name__ == '__main__':
12 |     from vbench.git import GitRepo
13 |     repo = GitRepo('/Users/wesm/code/pandas')
14 |     churn = repo.get_churn_by_file()
15 | 
16 |     file_include = []
17 |     for path in churn.major_axis:
18 |         if path.endswith('.pyx') or path.endswith('.py'):
19 |             file_include.append(path)
20 |     commits_include = [sha for sha in churn.minor_axis
21 |                        if 'LF' not in repo.messages[sha]]
22 |     commits_include.remove('dcf3490')
23 | 
24 |     clean_churn = churn.reindex(major=file_include, minor=commits_include)
25 | 
26 |     by_commit = clean_churn.sum('major').sum(1)
27 | 
28 |     by_date = by_commit.groupby(repo.commit_date).sum()
29 | 
30 |     by_date = by_date.drop([datetime(2011, 6, 10)])
31 | 
32 |     # clean out days where I touched Cython
33 | 
34 |     by_date = by_date[by_date < 5000]
35 | 


--------------------------------------------------------------------------------
/scripts/groupby_sample.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | import numpy as np
 3 | import string
 4 | import pandas.compat as compat
 5 | 
 6 | g1 = np.array(list(string.letters))[:-1]
 7 | g2 = np.arange(510)
 8 | df_small = DataFrame({'group1': ["a", "b", "a", "a", "b", "c", "c", "c", "c",
 9 |                                  "c", "a", "a", "a", "b", "b", "b", "b"],
10 |                       'group2': [1, 2, 3, 4, 1, 3, 5, 6, 5, 4, 1, 2, 3, 4, 3, 2, 1],
11 |                       'value': ["apple", "pear", "orange", "apple",
12 |                                 "banana", "durian", "lemon", "lime",
13 |                                 "raspberry", "durian", "peach", "nectarine",
14 |                                 "banana", "lemon", "guava", "blackberry",
15 |                                 "grape"]})
16 | value = df_small['value'].values.repeat(3)
17 | df = DataFrame({'group1': g1.repeat(4000 * 5),
18 |                 'group2': np.tile(g2, 400 * 5),
19 |                 'value': value.repeat(4000 * 5)})
20 | 
21 | 
22 | def random_sample():
23 |     grouped = df.groupby(['group1', 'group2'])['value']
24 |     from random import choice
25 |     choose = lambda group: choice(group.index)
26 |     indices = grouped.apply(choose)
27 |     return df.reindex(indices)
28 | 
29 | 
30 | def random_sample_v2():
31 |     grouped = df.groupby(['group1', 'group2'])['value']
32 |     from random import choice
33 |     choose = lambda group: choice(group.index)
34 |     indices = [choice(v) for k, v in compat.iteritems(grouped.groups)]
35 |     return df.reindex(indices)
36 | 
37 | 
38 | def do_shuffle(arr):
39 |     from random import shuffle
40 |     result = arr.copy().values
41 |     shuffle(result)
42 |     return result
43 | 
44 | 
45 | def shuffle_uri(df, grouped):
46 |     perm = np.r_[tuple([np.random.permutation(
47 |         idxs) for idxs in compat.itervalues(grouped.groups)])]
48 |     df['state_permuted'] = np.asarray(df.ix[perm]['value'])
49 | 
50 | df2 = df.copy()
51 | grouped = df2.groupby('group1')
52 | shuffle_uri(df2, grouped)
53 | 
54 | df2['state_perm'] = grouped['value'].transform(do_shuffle)
55 | 


--------------------------------------------------------------------------------
/scripts/groupby_speed.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from pandas import *
 3 | 
 4 | rng = DatetimeIndex('1/3/2011', '11/30/2011', offset=datetools.Minute())
 5 | 
 6 | df = DataFrame(np.random.randn(len(rng), 5), index=rng,
 7 |                columns=list('OHLCV'))
 8 | 
 9 | rng5 = DatetimeIndex('1/3/2011', '11/30/2011', offset=datetools.Minute(5))
10 | gp = rng5.asof
11 | grouped = df.groupby(gp)
12 | 
13 | 
14 | def get1(dt):
15 |     k = gp(dt)
16 |     return grouped.get_group(k)
17 | 
18 | 
19 | def get2(dt):
20 |     k = gp(dt)
21 |     return df.ix[grouped.groups[k]]
22 | 
23 | 
24 | def f():
25 |     for i, date in enumerate(df.index):
26 |         if i % 10000 == 0:
27 |             print(i)
28 |         get1(date)
29 | 
30 | 
31 | def g():
32 |     for i, date in enumerate(df.index):
33 |         if i % 10000 == 0:
34 |             print(i)
35 |         get2(date)
36 | 


--------------------------------------------------------------------------------
/scripts/hdfstore_panel_perf.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | from pandas.util.testing import rands
 3 | from pandas.compat import range
 4 | 
 5 | i, j, k = 7, 771, 5532
 6 | 
 7 | panel = Panel(np.random.randn(i, j, k),
 8 |               items=[rands(10) for _ in range(i)],
 9 |               major_axis=DatetimeIndex('1/1/2000', periods=j,
10 |                                    offset=datetools.Minute()),
11 |               minor_axis=[rands(10) for _ in range(k)])
12 | 
13 | 
14 | store = HDFStore('test.h5')
15 | store.put('test_panel', panel, table=True)
16 | 
17 | retrieved = store['test_panel']
18 | 


--------------------------------------------------------------------------------
/scripts/leak.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | from pandas.compat import range
 3 | import numpy as np
 4 | import pandas.util.testing as tm
 5 | import os
 6 | import psutil
 7 | 
 8 | pid = os.getpid()
 9 | proc = psutil.Process(pid)
10 | 
11 | df = DataFrame(index=np.arange(100))
12 | for i in range(5000):
13 |     df[i] = 5
14 | 


--------------------------------------------------------------------------------
/scripts/parser_magic.py:
--------------------------------------------------------------------------------
 1 | from pandas.util.testing import set_trace
 2 | import pandas.util.testing as tm
 3 | import pandas.compat as compat
 4 | 
 5 | from pandas import *
 6 | import ast
 7 | import inspect
 8 | import sys
 9 | 
10 | 
11 | def merge(a, b):
12 |     f, args, _ = parse_stmt(inspect.currentframe().f_back)
13 |     return DataFrame({args[0]: a,
14 |                       args[1]: b})
15 | 
16 | 
17 | def parse_stmt(frame):
18 |     info = inspect.getframeinfo(frame)
19 |     call = info[-2][0]
20 |     mod = ast.parse(call)
21 |     body = mod.body[0]
22 |     if isinstance(body, (ast.Assign, ast.Expr)):
23 |         call = body.value
24 |     elif isinstance(body, ast.Call):
25 |         call = body
26 |     return _parse_call(call)
27 | 
28 | 
29 | def _parse_call(call):
30 |     func = _maybe_format_attribute(call.func)
31 | 
32 |     str_args = []
33 |     for arg in call.args:
34 |         if isinstance(arg, ast.Name):
35 |             str_args.append(arg.id)
36 |         elif isinstance(arg, ast.Call):
37 |             formatted = _format_call(arg)
38 |             str_args.append(formatted)
39 | 
40 |     return func, str_args, {}
41 | 
42 | 
43 | def _format_call(call):
44 |     func, args, kwds = _parse_call(call)
45 |     content = ''
46 |     if args:
47 |         content += ', '.join(args)
48 |     if kwds:
49 |         fmt_kwds = ['%s=%s' % item for item in compat.iteritems(kwds)]
50 |         joined_kwds = ', '.join(fmt_kwds)
51 |         if args:
52 |             content = content + ', ' + joined_kwds
53 |         else:
54 |             content += joined_kwds
55 |     return '%s(%s)' % (func, content)
56 | 
57 | 
58 | def _maybe_format_attribute(name):
59 |     if isinstance(name, ast.Attribute):
60 |         return _format_attribute(name)
61 |     return name.id
62 | 
63 | 
64 | def _format_attribute(attr):
65 |     obj = attr.value
66 |     if isinstance(attr.value, ast.Attribute):
67 |         obj = _format_attribute(attr.value)
68 |     else:
69 |         obj = obj.id
70 |     return '.'.join((obj, attr.attr))
71 | 
72 | a = tm.makeTimeSeries()
73 | b = tm.makeTimeSeries()
74 | df = merge(a, b)
75 | 


--------------------------------------------------------------------------------
/scripts/preepoch_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pandas import *
 3 | 
 4 | 
 5 | def panda_test():
 6 | 
 7 |     # generate some data
 8 |     data = np.random.rand(50, 5)
 9 |     # generate some dates
10 |     dates = DatetimeIndex('1/1/1969', periods=50)
11 |     # generate column headings
12 |     cols = ['A', 'B', 'C', 'D', 'E']
13 | 
14 |     df = DataFrame(data, index=dates, columns=cols)
15 | 
16 |     # save to HDF5Store
17 |     store = HDFStore('bugzilla.h5', mode='w')
18 |     store['df'] = df  # This gives: OverflowError: mktime argument out of range
19 |     store.close()
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     panda_test()
24 | 


--------------------------------------------------------------------------------
/scripts/roll_median_leak.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from pandas import *
 3 | 
 4 | import numpy as np
 5 | import os
 6 | 
 7 | from vbench.api import Benchmark
 8 | from pandas.util.testing import rands
 9 | from pandas.compat import range
10 | import pandas.lib as lib
11 | import pandas._sandbox as sbx
12 | import time
13 | 
14 | import psutil
15 | 
16 | pid = os.getpid()
17 | proc = psutil.Process(pid)
18 | 
19 | lst = SparseList()
20 | lst.append([5] * 10000)
21 | lst.append(np.repeat(np.nan, 1000000))
22 | 
23 | for _ in range(10000):
24 |     print(proc.get_memory_info())
25 |     sdf = SparseDataFrame({'A': lst.to_array()})
26 |     chunk = sdf[sdf['A'] == 5]
27 | 


--------------------------------------------------------------------------------
/scripts/runtests.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | print(os.getpid())
4 | import nose
5 | nose.main('pandas.core')
6 | 


--------------------------------------------------------------------------------
/scripts/test_py25.bat:
--------------------------------------------------------------------------------
1 | SET PATH=C:\MinGW\bin;C:\Python25;C:\Python25\Scripts;%PATH%
2 | del pandas\_tseries.pyd
3 | del pandas\_sparse.pyd
4 | del pandas\src\tseries.c
5 | del pandas\src\sparse.c
6 | python setup.py clean
7 | python setup.py build_ext -c mingw32 --inplace
8 | nosetests pandas


--------------------------------------------------------------------------------
/scripts/test_py26.bat:
--------------------------------------------------------------------------------
1 | SET PATH=C:\MinGW\bin;E:\Python26;E:\Python26\Scripts;%PATH%
2 | del pandas\_tseries.pyd
3 | del pandas\_sparse.pyd
4 | del pandas\src\tseries.c
5 | del pandas\src\sparse.c
6 | python setup.py clean
7 | python setup.py build_ext -c mingw32 --inplace
8 | nosetests pandas


--------------------------------------------------------------------------------
/scripts/test_py27.bat:
--------------------------------------------------------------------------------
1 | SET PATH=C:\MinGW\bin;C:\Python27;C:\Python27\Scripts;%PATH%
2 | 
3 | python setup.py clean
4 | python setup.py build_ext -c mingw32 --inplace
5 | 
6 | nosetests pandas


--------------------------------------------------------------------------------
/scripts/test_py31.bat:
--------------------------------------------------------------------------------
1 | set BASE=E:\python31
2 | set PYTHON=%BASE%\python.exe
3 | set NOSETESTS=%BASE%\scripts\nosetests-script.py
4 | 
5 | %PYTHON% setup.py install
6 | cd bench
7 | %PYTHON% %NOSETESTS% pandas
8 | cd ..


--------------------------------------------------------------------------------
/scripts/test_py32.bat:
--------------------------------------------------------------------------------
1 | set BASE=E:\python32
2 | set PYTHON=%BASE%\python.exe
3 | set NOSETESTS=%BASE%\scripts\nosetests-script.py
4 | 
5 | %PYTHON% setup.py install
6 | cd bench
7 | %PYTHON% %NOSETESTS% pandas
8 | cd ..


--------------------------------------------------------------------------------
/scripts/touchup_gh_issues.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | from collections import OrderedDict
 6 | import sys
 7 | import re
 8 | 
 9 | """
10 | Reads in stdin, replace all occurences of '#num' or 'GH #num' with
11 | links to github issue. dumps the issue anchors before the next
12 | section header
13 | """
14 | 
15 | pat = "((?:\s*GH\s*)?)#(\d{3,4})([^_]|$)?"
16 | rep_pat = r"\1GH\2_\3"
17 | anchor_pat = ".. _GH{id}: https://github.com/pydata/pandas/issues/{id}"
18 | section_pat = "^pandas\s[\d\.]+\s*$"
19 | 
20 | 
21 | def main():
22 |     issues = OrderedDict()
23 |     while True:
24 | 
25 |         line = sys.stdin.readline()
26 |         if not line:
27 |             break
28 | 
29 |         if re.search(section_pat, line):
30 |             for id in issues:
31 |                 print(anchor_pat.format(id=id).rstrip())
32 |             if issues:
33 |                 print("\n")
34 |             issues = OrderedDict()
35 | 
36 |         for m in re.finditer(pat, line):
37 |             id = m.group(2)
38 |             if id not in issues:
39 |                 issues[id] = True
40 |         print(re.sub(pat, rep_pat, line).rstrip())
41 |     pass
42 | 
43 | if __name__ == "__main__":
44 |     main()
45 | 


--------------------------------------------------------------------------------
/scripts/winbuild_py25.bat:
--------------------------------------------------------------------------------
1 | SET PATH=C:\MinGW\bin;C:\Python25;C:\Python25\Scripts;%PATH%
2 | python setup.py build -c mingw32 bdist_wininst
3 | 


--------------------------------------------------------------------------------
/scripts/winbuild_py27.bat:
--------------------------------------------------------------------------------
1 | SET PATH=C:\MinGW\bin;C:\Python27;C:\Python27\Scripts;%PATH%
2 | python setup.py build -c mingw32 bdist_wininst
3 | 


--------------------------------------------------------------------------------
/scripts/windows_builder/build_26-32.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | echo "starting 26-32"
 3 | 
 4 | 
 5 | title 26-32 build
 6 | echo "building"
 7 | cd "c:\users\Jeff Reback\documents\github\pandas"
 8 | C:\python26-32\python.exe setup.py build > build.26-32.log 2>&1
 9 | 
10 | echo "installing"
11 | C:\python26-32\python.exe setup.py bdist --formats=wininst > install.26-32.log 2>&1
12 | 
13 | echo "testing"
14 | C:\python26-32\scripts\nosetests -A "not slow" build\lib.win32-2.6\pandas > test.26-32.log 2>&1
15 | 
16 | echo "versions"
17 | cd build\lib.win32-2.6
18 | C:\python26-32\python.exe ../../ci/print_versions.py > ../../versions.26-32.log 2>&1
19 | 
20 | 
21 | exit
22 | 


--------------------------------------------------------------------------------
/scripts/windows_builder/build_26-64.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | echo "starting 26-64"
 3 | 
 4 | setlocal EnableDelayedExpansion
 5 | set MSSdk=1
 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x64 /release
 7 | set DISTUTILS_USE_SDK=1
 8 | 
 9 | title 26-64 build
10 | echo "building"
11 | cd "c:\users\Jeff Reback\documents\github\pandas"
12 | C:\python26-64\python.exe setup.py build > build.26-64.log 2>&1
13 | 
14 | echo "installing"
15 | C:\python26-64\python.exe setup.py bdist --formats=wininst > install.26-64.log 2>&1
16 | 
17 | echo "testing"
18 | C:\python26-64\scripts\nosetests -A "not slow" build\lib.win-amd64-2.6\pandas > test.26-64.log 2>&1
19 | 
20 | echo "versions"
21 | cd build\lib.win-amd64-2.6
22 | C:\python26-64\python.exe ../../ci/print_versions.py > ../../versions.26-64.log 2>&1
23 | 
24 | 
25 | exit
26 | 


--------------------------------------------------------------------------------
/scripts/windows_builder/build_27-32.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | echo "starting 27-32"
 3 | 
 4 | setlocal EnableDelayedExpansion
 5 | set MSSdk=1
 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x86 /release
 7 | set DISTUTILS_USE_SDK=1
 8 | 
 9 | title 27-32 build
10 | echo "building"
11 | cd "c:\users\Jeff Reback\documents\github\pandas"
12 | C:\python27-32\python.exe setup.py build > build.27-32.log 2>&1
13 | 
14 | title "installing"
15 | C:\python27-32\python.exe setup.py bdist --formats=wininst > install.27-32.log 2>&1
16 | 
17 | echo "testing"
18 | C:\python27-32\scripts\nosetests -A "not slow" build\lib.win32-2.7\pandas > test.27-32.log 2>&1
19 | 
20 | echo "versions"
21 | cd build\lib.win32-2.7
22 | C:\python27-32\python.exe ../../ci/print_versions.py > ../../versions.27-32.log 2>&1
23 | 
24 | exit
25 | 
26 | 


--------------------------------------------------------------------------------
/scripts/windows_builder/build_27-64.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | echo "starting 27-64"
 3 | 
 4 | setlocal EnableDelayedExpansion
 5 | set MSSdk=1
 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x64 /release
 7 | set DISTUTILS_USE_SDK=1
 8 | 
 9 | title 27-64 build
10 | echo "building"
11 | cd "c:\users\Jeff Reback\documents\github\pandas"
12 | C:\python27-64\python.exe setup.py build > build.27-64.log 2>&1
13 | 
14 | echo "installing"
15 | C:\python27-64\python.exe setup.py bdist --formats=wininst > install.27-64.log 2>&1
16 | 
17 | echo "testing"
18 | C:\python27-64\scripts\nosetests -A "not slow" build\lib.win-amd64-2.7\pandas > test.27-64.log 2>&1
19 | 
20 | echo "versions"
21 | cd build\lib.win-amd64-2.7
22 | C:\python27-64\python.exe ../../ci/print_versions.py > ../../versions.27-64.log 2>&1
23 | 
24 | exit
25 | 
26 | 


--------------------------------------------------------------------------------
/scripts/windows_builder/build_33-32.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | echo "starting 33-32"
 3 | 
 4 | setlocal EnableDelayedExpansion
 5 | set MSSdk=1
 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x86 /release
 7 | set DISTUTILS_USE_SDK=1
 8 | 
 9 | title 33-32 build
10 | echo "building"
11 | cd "c:\users\Jeff Reback\documents\github\pandas"
12 | C:\python33-32\python.exe setup.py build > build.33-32.log 2>&1
13 | 
14 | echo "installing"
15 | C:\python33-32\python.exe setup.py bdist --formats=wininst > install.33-32.log 2>&1
16 | 
17 | echo "testing"
18 | C:\python33-32\scripts\nosetests -A "not slow" build\lib.win32-3.3\pandas > test.33-32.log 2>&1
19 | 
20 | echo "versions"
21 | cd build\lib.win32-3.3
22 | C:\python33-32\python.exe ../../ci/print_versions.py > ../../versions.33-32.log 2>&1
23 | 
24 | exit
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/scripts/windows_builder/build_33-64.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | echo "starting 33-64"
 3 | 
 4 | setlocal EnableDelayedExpansion
 5 | set MSSdk=1
 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release
 7 | set DISTUTILS_USE_SDK=1
 8 | 
 9 | title 33-64 build
10 | echo "building"
11 | cd "c:\users\Jeff Reback\documents\github\pandas"
12 | C:\python33-64\python.exe setup.py build > build.33-64.log 2>&1
13 | 
14 | echo "installing"
15 | C:\python33-64\python.exe setup.py bdist --formats=wininst > install.33-64.log 2>&1
16 | 
17 | echo "testing"
18 | C:\python33-64\scripts\nosetests -A "not slow" build\lib.win-amd64-3.3\pandas > test.33-64.log 2>&1
19 | 
20 | echo "versions"
21 | cd build\lib.win-amd64-3.3
22 | C:\python33-64\python.exe ../../ci/print_versions.py > ../../versions.33-64.log 2>&1
23 | 
24 | exit
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/scripts/windows_builder/build_34-32.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | echo "starting 34-32"
 3 | 
 4 | setlocal EnableDelayedExpansion
 5 | set MSSdk=1
 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x86 /release
 7 | set DISTUTILS_USE_SDK=1
 8 | 
 9 | title 34-32 build
10 | echo "building"
11 | cd "c:\users\Jeff Reback\documents\github\pandas"
12 | C:\python34-32\python.exe setup.py build > build.34-32.log 2>&1
13 | 
14 | echo "installing"
15 | C:\python34-32\python.exe setup.py bdist --formats=wininst > install.34-32.log 2>&1
16 | 
17 | echo "testing"
18 | C:\python34-32\scripts\nosetests -A "not slow" build\lib.win32-3.4\pandas > test.34-32.log 2>&1
19 | 
20 | echo "versions"
21 | cd build\lib.win32-3.4
22 | C:\python34-32\python.exe ../../ci/print_versions.py > ../../versions.34-32.log 2>&1
23 | 
24 | exit
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/scripts/windows_builder/build_34-64.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | echo "starting 34-64"
 3 | 
 4 | setlocal EnableDelayedExpansion
 5 | set MSSdk=1
 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release
 7 | set DISTUTILS_USE_SDK=1
 8 | 
 9 | title 34-64 build
10 | echo "building"
11 | cd "c:\users\Jeff Reback\documents\github\pandas"
12 | C:\python34-64\python.exe setup.py build > build.34-64.log 2>&1
13 | 
14 | echo "installing"
15 | C:\python34-64\python.exe setup.py bdist --formats=wininst > install.34-64.log 2>&1
16 | 
17 | echo "testing"
18 | C:\python34-64\scripts\nosetests -A "not slow" build\lib.win-amd64-3.4\pandas > test.34-64.log 2>&1
19 | 
20 | echo "versions"
21 | cd build\lib.win-amd64-3.4
22 | C:\python34-64\python.exe ../../ci/print_versions.py > ../../versions.34-64.log 2>&1
23 | 
24 | exit
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/scripts/windows_builder/check_and_build.bat:
--------------------------------------------------------------------------------
1 | set PYTHONPATH=c:/python27-64/lib
2 | c:/python27-64/python.exe c:/Builds/check_and_build.py %1 %2 %3 %4 %4 %6 %7 %8 %9
3 | 


--------------------------------------------------------------------------------
/scripts/windows_builder/readme.txt:
--------------------------------------------------------------------------------
 1 | This is a collection of windows batch scripts (and a python script)
 2 | to rebuild the binaries, test, and upload the binaries for public distribution
 3 | upon a commit on github.
 4 | 
 5 | Obviously requires that these be setup on windows
 6 | Requires an install of Windows SDK 3.5 and 4.0
 7 | Full python installs for each version with the deps
 8 | 
 9 | Currently supporting
10 | 
11 | 26-32,26-64,27-32,27-64,33-32,33-64,34-32,34-64
12 | 
13 | Note that 33 and 34 use the 4.0 SDK, while the other suse 3.5 SDK
14 | 
15 | I installed these scripts in C:\Builds
16 | 
17 | Installed libaries in C:\Installs
18 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | command -v coverage >/dev/null && coverage erase
 3 | command -v python-coverage >/dev/null && python-coverage erase
 4 | # nosetests pandas/tests/test_index.py --with-coverage --cover-package=pandas.core --pdb-failure --pdb
 5 | #nosetests -w pandas --with-coverage --cover-package=pandas --pdb-failure --pdb #--cover-inclusive
 6 | #nosetests -A "not slow" -w pandas/tseries --with-coverage --cover-package=pandas.tseries $* #--cover-inclusive
 7 | nosetests -w pandas --with-coverage --cover-package=pandas $*
 8 | # nosetests -w pandas/io --with-coverage --cover-package=pandas.io --pdb-failure --pdb
 9 | # nosetests -w pandas/core --with-coverage --cover-package=pandas.core --pdb-failure --pdb
10 | # nosetests -w pandas/stats --with-coverage --cover-package=pandas.stats
11 | # coverage run runtests.py
12 | 


--------------------------------------------------------------------------------
/test_fast.sh:
--------------------------------------------------------------------------------
1 | nosetests -A "not slow and not network" pandas --with-id $*
2 | 


--------------------------------------------------------------------------------
/test_multi.sh:
--------------------------------------------------------------------------------
1 | nosetests -A "not slow and not network" pandas --processes=4 $*
2 | 


--------------------------------------------------------------------------------
/test_perf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | CURDIR=$(pwd)
4 | BASEDIR=$(cd "$(dirname "$0")"; pwd)
5 | python "$BASEDIR"/vb_suite/test_perf.py $@
6 | 


--------------------------------------------------------------------------------
/test_rebuild.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | python setup.py clean
 4 | python setup.py build_ext --inplace
 5 | coverage erase
 6 | # nosetests pandas/tests/test_index.py --with-coverage --cover-package=pandas.core --pdb-failure --pdb
 7 | #nosetests -w pandas --with-coverage --cover-package=pandas --pdb-failure --pdb #--cover-inclusive
 8 | nosetests -w pandas --with-coverage --cover-package=pandas $* #--cover-inclusive
 9 | # nosetests -w pandas/io --with-coverage --cover-package=pandas.io --pdb-failure --pdb
10 | # nosetests -w pandas/core --with-coverage --cover-package=pandas.core --pdb-failure --pdb
11 | # nosetests -w pandas/stats --with-coverage --cover-package=pandas.stats
12 | # coverage run runtests.py
13 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # Tox (http://tox.testrun.org/) is a tool for running tests
 2 | # in multiple virtualenvs. This configuration file will run the
 3 | # test suite on all supported python versions. To use it, "pip install tox"
 4 | # and then run "tox" from this directory.
 5 | 
 6 | [tox]
 7 | envlist = py26, py27, py32, py33, py34
 8 | 
 9 | [testenv]
10 | deps =
11 |     cython
12 |     nose
13 |     pytz>=2011k
14 |     python-dateutil
15 |     beautifulsoup4
16 |     lxml
17 |     openpyxl<2.0.0
18 |     xlsxwriter
19 |     xlrd
20 |     six
21 |     sqlalchemy
22 | 
23 | # cd to anything but the default {toxinidir} which
24 | # contains the pandas subdirectory and confuses
25 | # nose away from the fresh install in site-packages
26 | changedir = {envdir}
27 | 
28 | commands =
29 |     # TODO:  --exe because of GH #761
30 |     {envbindir}/nosetests --exe pandas {posargs:-A "not network and not disabled"}
31 |     # cleanup the temp. build dir created by the tox build
32 | #    /bin/rm -rf {toxinidir}/build
33 | 
34 |     # quietly rollback the install.
35 |     # Note this line will only be reached if the
36 |     # previous lines succeed (in particular, the tests),
37 |     # but an uninstall is really only required when
38 |     # files are removed from the source tree, in which case,
39 |     # stale versions of files will will remain in the venv
40 |     # until the next time uninstall is run.
41 |     #
42 |     # tox should provide a preinstall-commands hook.
43 |     pip uninstall pandas -qy
44 | 
45 | [testenv:py26]
46 | deps =
47 |     numpy==1.6.1
48 |     boto
49 |     bigquery
50 |     {[testenv]deps}
51 | 
52 | [testenv:py27]
53 | deps =
54 |     numpy==1.8.1
55 |     boto
56 |     bigquery
57 |     {[testenv]deps}
58 | 
59 | [testenv:py32]
60 | deps =
61 |     numpy==1.7.1
62 |     {[testenv]deps}
63 | 
64 | [testenv:py33]
65 | deps =
66 |     numpy==1.8.0
67 |     {[testenv]deps}
68 | 
69 | [testenv:py34]
70 | deps =
71 |     numpy==1.8.0
72 |     {[testenv]deps}
73 | 


--------------------------------------------------------------------------------
/vb_suite/.gitignore:
--------------------------------------------------------------------------------
1 | benchmarks.db
2 | build/*
3 | source/vbench/*
4 | source/*.rst


--------------------------------------------------------------------------------
/vb_suite/attrs_caching.py:
--------------------------------------------------------------------------------
 1 | from vbench.benchmark import Benchmark
 2 | 
 3 | common_setup = """from pandas_vb_common import *
 4 | """
 5 | 
 6 | #----------------------------------------------------------------------
 7 | # DataFrame.index / columns property lookup time
 8 | 
 9 | setup = common_setup + """
10 | df = DataFrame(np.random.randn(10, 6))
11 | cur_index = df.index
12 | """
13 | stmt = "foo = df.index"
14 | 
15 | getattr_dataframe_index = Benchmark(stmt, setup,
16 |                                     name="getattr_dataframe_index")
17 | 
18 | stmt = "df.index = cur_index"
19 | setattr_dataframe_index = Benchmark(stmt, setup,
20 |                                     name="setattr_dataframe_index")
21 | 


--------------------------------------------------------------------------------
/vb_suite/ctors.py:
--------------------------------------------------------------------------------
 1 | from vbench.benchmark import Benchmark
 2 | from datetime import datetime
 3 | 
 4 | common_setup = """from pandas_vb_common import *
 5 | """
 6 | 
 7 | #----------------------------------------------------------------------
 8 | # Series constructors
 9 | 
10 | setup = common_setup + """
11 | data = np.random.randn(100)
12 | index = Index(np.arange(100))
13 | """
14 | 
15 | ctor_series_ndarray = \
16 |     Benchmark("Series(data, index=index)", setup=setup,
17 |               name='series_constructor_ndarray')
18 | 
19 | setup = common_setup + """
20 | arr = np.random.randn(100, 100)
21 | """
22 | 
23 | ctor_frame_ndarray = \
24 |     Benchmark("DataFrame(arr)", setup=setup,
25 |               name='frame_constructor_ndarray')
26 | 
27 | setup = common_setup + """
28 | data = np.array(['foo', 'bar', 'baz'], dtype=object)
29 | """
30 | 
31 | ctor_index_array_string = Benchmark('Index(data)', setup=setup)
32 | 
33 | # index constructors
34 | setup = common_setup + """
35 | s = Series([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')]*1000)
36 | """
37 | index_from_series_ctor = Benchmark('Index(s)', setup=setup)
38 | 
39 | dtindex_from_series_ctor = Benchmark('DatetimeIndex(s)', setup=setup)
40 | 


--------------------------------------------------------------------------------
/vb_suite/generate_rst_files.py:
--------------------------------------------------------------------------------
1 | from suite import benchmarks, generate_rst_files
2 | generate_rst_files(benchmarks)
3 | 


--------------------------------------------------------------------------------
/vb_suite/inference.py:
--------------------------------------------------------------------------------
 1 | from vbench.api import Benchmark
 2 | from datetime import datetime
 3 | import sys
 4 | 
 5 | # from GH 7332
 6 | 
 7 | setup = """from pandas_vb_common import *
 8 | import pandas as pd
 9 | N = 500000
10 | df_int64 = DataFrame(dict(A = np.arange(N,dtype='int64'), B = np.arange(N,dtype='int64')))
11 | df_int32 = DataFrame(dict(A = np.arange(N,dtype='int32'), B = np.arange(N,dtype='int32')))
12 | df_uint32 = DataFrame(dict(A = np.arange(N,dtype='uint32'), B = np.arange(N,dtype='uint32')))
13 | df_float64 = DataFrame(dict(A = np.arange(N,dtype='float64'), B = np.arange(N,dtype='float64')))
14 | df_float32 = DataFrame(dict(A = np.arange(N,dtype='float32'), B = np.arange(N,dtype='float32')))
15 | df_datetime64 = DataFrame(dict(A = pd.to_datetime(np.arange(N,dtype='int64'),unit='ms'),
16 |                                B = pd.to_datetime(np.arange(N,dtype='int64'),unit='ms')))
17 | df_timedelta64 = DataFrame(dict(A = df_datetime64['A']-df_datetime64['B'],
18 |                                 B = df_datetime64['B']))
19 | """
20 | 
21 | dtype_infer_int64 = Benchmark('df_int64["A"] + df_int64["B"]', setup,
22 |                                start_date=datetime(2014, 1, 1))
23 | dtype_infer_int32 = Benchmark('df_int32["A"] + df_int32["B"]', setup,
24 |                                start_date=datetime(2014, 1, 1))
25 | dtype_infer_uint32 = Benchmark('df_uint32["A"] + df_uint32["B"]', setup,
26 |                                start_date=datetime(2014, 1, 1))
27 | dtype_infer_float64 = Benchmark('df_float64["A"] + df_float64["B"]', setup,
28 |                                start_date=datetime(2014, 1, 1))
29 | dtype_infer_float32 = Benchmark('df_float32["A"] + df_float32["B"]', setup,
30 |                                start_date=datetime(2014, 1, 1))
31 | dtype_infer_datetime64 = Benchmark('df_datetime64["A"] - df_datetime64["B"]', setup,
32 |                                start_date=datetime(2014, 1, 1))
33 | dtype_infer_timedelta64_1 = Benchmark('df_timedelta64["A"] + df_timedelta64["B"]', setup,
34 |                                start_date=datetime(2014, 1, 1))
35 | dtype_infer_timedelta64_2 = Benchmark('df_timedelta64["A"] + df_timedelta64["A"]', setup,
36 |                                start_date=datetime(2014, 1, 1))
37 | 


--------------------------------------------------------------------------------
/vb_suite/measure_memory_consumption.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | 
 6 | """Short one-line summary
 7 | 
 8 | long summary
 9 | """
10 | 
11 | 
12 | def main():
13 |     import shutil
14 |     import tempfile
15 |     import warnings
16 | 
17 |     from pandas import Series
18 | 
19 |     from vbench.api import BenchmarkRunner
20 |     from suite import (REPO_PATH, BUILD, DB_PATH, PREPARE,
21 |                        dependencies, benchmarks)
22 | 
23 |     from memory_profiler import memory_usage
24 | 
25 |     warnings.filterwarnings('ignore', category=FutureWarning)
26 | 
27 |     try:
28 |         TMP_DIR = tempfile.mkdtemp()
29 |         runner = BenchmarkRunner(
30 |             benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH,
31 |             TMP_DIR, PREPARE, always_clean=True,
32 |             # run_option='eod', start_date=START_DATE,
33 |             module_dependencies=dependencies)
34 |         results = {}
35 |         for b in runner.benchmarks:
36 |             k = b.name
37 |             try:
38 |                 vs = memory_usage((b.run,))
39 |                 v = max(vs)
40 |                 # print(k, v)
41 |                 results[k] = v
42 |             except Exception as e:
43 |                 print("Exception caught in %s\n" % k)
44 |                 print(str(e))
45 | 
46 |         s = Series(results)
47 |         s.sort()
48 |         print((s))
49 | 
50 |     finally:
51 |         shutil.rmtree(TMP_DIR)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     main()
56 | 


--------------------------------------------------------------------------------
/vb_suite/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | from vbench.benchmark import Benchmark
 2 | from datetime import datetime
 3 | 
 4 | common_setup = """from pandas_vb_common import *
 5 | """
 6 | 
 7 | #----------------------------------------------------------------------
 8 | # cache_readonly
 9 | 
10 | setup = common_setup + """
11 | from pandas.util.decorators import cache_readonly
12 | 
13 | class Foo:
14 | 
15 |     @cache_readonly
16 |     def prop(self):
17 |         return 5
18 | obj = Foo()
19 | """
20 | misc_cache_readonly = Benchmark("obj.prop", setup, name="misc_cache_readonly",
21 |                                 ncalls=2000000)
22 | 
23 | #----------------------------------------------------------------------
24 | # match
25 | 
26 | setup = common_setup + """
27 | from pandas.util.testing import rands
28 | 
29 | uniques = np.array([rands(10) for _ in xrange(1000)], dtype='O')
30 | all = uniques.repeat(10)
31 | """
32 | 
33 | match_strings = Benchmark("match(all, uniques)", setup,
34 |                           start_date=datetime(2012, 5, 12))
35 | 


--------------------------------------------------------------------------------
/vb_suite/pandas_vb_common.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | from pandas.util.testing import rands
 3 | from datetime import timedelta
 4 | from numpy.random import randn
 5 | from numpy.random import randint
 6 | from numpy.random import permutation
 7 | import pandas.util.testing as tm
 8 | import random
 9 | import numpy as np
10 | 
11 | try:
12 |     import pandas._tseries as lib
13 | except:
14 |     import pandas.lib as lib
15 | 
16 | try:
17 |     Panel = WidePanel
18 | except Exception:
19 |     pass
20 | 
21 | # didn't add to namespace until later
22 | try:
23 |     from pandas.core.index import MultiIndex
24 | except ImportError:
25 |     pass
26 | 


--------------------------------------------------------------------------------
/vb_suite/panel_methods.py:
--------------------------------------------------------------------------------
 1 | from vbench.api import Benchmark
 2 | from datetime import datetime
 3 | 
 4 | common_setup = """from pandas_vb_common import *
 5 | """
 6 | 
 7 | #----------------------------------------------------------------------
 8 | # shift
 9 | 
10 | setup = common_setup + """
11 | index = date_range(start="2000", freq="D", periods=1000)
12 | panel = Panel(np.random.randn(100, len(index), 1000))
13 | """
14 | 
15 | panel_shift = Benchmark('panel.shift(1)', setup,
16 |                                start_date=datetime(2012, 1, 12))
17 | 
18 | panel_shift_minor = Benchmark('panel.shift(1, axis="minor")', setup,
19 |                                start_date=datetime(2012, 1, 12))
20 | 
21 | panel_pct_change_major = Benchmark('panel.pct_change(1, axis="major")', setup,
22 |                                    start_date=datetime(2014, 4, 19))
23 | 
24 | panel_pct_change_minor = Benchmark('panel.pct_change(1, axis="minor")', setup,
25 |                                    start_date=datetime(2014, 4, 19))
26 | 
27 | panel_pct_change_items = Benchmark('panel.pct_change(1, axis="items")', setup,
28 |                                    start_date=datetime(2014, 4, 19))
29 | 


--------------------------------------------------------------------------------
/vb_suite/plotting.py:
--------------------------------------------------------------------------------
 1 | from vbench.benchmark import Benchmark
 2 | from datetime import datetime
 3 | 
 4 | common_setup = """from pandas_vb_common import *
 5 | 
 6 | try:
 7 |     from pandas import date_range
 8 | except ImportError:
 9 |     def date_range(start=None, end=None, periods=None, freq=None):
10 |         return DatetimeIndex(start, end, periods=periods, offset=freq)
11 | 
12 | """
13 | 
14 | #-----------------------------------------------------------------------------
15 | # Timeseries plotting
16 | 
17 | setup = common_setup + """
18 | N = 2000
19 | M = 5
20 | df = DataFrame(np.random.randn(N,M), index=date_range('1/1/1975', periods=N))
21 | """
22 | 
23 | plot_timeseries_period = Benchmark("df.plot()", setup=setup, 
24 |                                    name='plot_timeseries_period')
25 | 
26 | 


--------------------------------------------------------------------------------
/vb_suite/replace.py:
--------------------------------------------------------------------------------
 1 | from vbench.api import Benchmark
 2 | from datetime import datetime
 3 | 
 4 | common_setup = """from pandas_vb_common import *
 5 | from datetime import timedelta
 6 | 
 7 | N = 1000000
 8 | 
 9 | try:
10 |     rng = date_range('1/1/2000', periods=N, freq='min')
11 | except NameError:
12 |     rng = DatetimeIndex('1/1/2000', periods=N, offset=datetools.Minute())
13 |     date_range = DateRange
14 | 
15 | ts = Series(np.random.randn(N), index=rng)
16 | """
17 | 
18 | large_dict_setup = """from pandas_vb_common import *
19 | from pandas.compat import range
20 | n = 10 ** 6
21 | start_value = 10 ** 5
22 | to_rep = dict((i, start_value + i) for i in range(n))
23 | s = Series(np.random.randint(n, size=10 ** 3))
24 | """
25 | 
26 | replace_fillna = Benchmark('ts.fillna(0., inplace=True)', common_setup,
27 |                            name='replace_fillna',
28 |                            start_date=datetime(2012, 4, 4))
29 | replace_replacena = Benchmark('ts.replace(np.nan, 0., inplace=True)',
30 |                               common_setup,
31 |                               name='replace_replacena',
32 |                               start_date=datetime(2012, 5, 15))
33 | replace_large_dict = Benchmark('s.replace(to_rep, inplace=True)',
34 |                                large_dict_setup,
35 |                                name='replace_large_dict',
36 |                                start_date=datetime(2014, 4, 6))
37 | 


--------------------------------------------------------------------------------
/vb_suite/run_suite.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from vbench.api import BenchmarkRunner
 3 | from suite import *
 4 | 
 5 | 
 6 | def run_process():
 7 |     runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_URL,
 8 |                              BUILD, DB_PATH, TMP_DIR, PREPARE,
 9 |                              always_clean=True,
10 |                              run_option='eod', start_date=START_DATE,
11 |                              module_dependencies=dependencies)
12 |     runner.run()
13 | 
14 | if __name__ == '__main__':
15 |     run_process()
16 | 


--------------------------------------------------------------------------------
/vb_suite/series_methods.py:
--------------------------------------------------------------------------------
 1 | from vbench.api import Benchmark
 2 | from datetime import datetime
 3 | 
 4 | common_setup = """from pandas_vb_common import *
 5 | """
 6 | 
 7 | setup = common_setup + """
 8 | s1 = Series(np.random.randn(10000))
 9 | s2 = Series(np.random.randint(1, 10, 10000))
10 | """
11 | 
12 | series_nlargest1 = Benchmark('s1.nlargest(3, take_last=True);'
13 |                              's1.nlargest(3, take_last=False)',
14 |                              setup,
15 |                              start_date=datetime(2014, 1, 25))
16 | series_nlargest2 = Benchmark('s2.nlargest(3, take_last=True);'
17 |                              's2.nlargest(3, take_last=False)',
18 |                              setup,
19 |                              start_date=datetime(2014, 1, 25))
20 | 
21 | series_nsmallest2 = Benchmark('s1.nsmallest(3, take_last=True);'
22 |                               's1.nsmallest(3, take_last=False)',
23 |                               setup,
24 |                               start_date=datetime(2014, 1, 25))
25 | 
26 | series_nsmallest2 = Benchmark('s2.nsmallest(3, take_last=True);'
27 |                               's2.nsmallest(3, take_last=False)',
28 |                               setup,
29 |                               start_date=datetime(2014, 1, 25))
30 | 


--------------------------------------------------------------------------------
/vb_suite/source/_static/stub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/vb_suite/source/_static/stub


--------------------------------------------------------------------------------
/vb_suite/source/themes/agogo/static/bgfooter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/vb_suite/source/themes/agogo/static/bgfooter.png


--------------------------------------------------------------------------------
/vb_suite/source/themes/agogo/static/bgtop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/vb_suite/source/themes/agogo/static/bgtop.png


--------------------------------------------------------------------------------
/vb_suite/source/themes/agogo/theme.conf:
--------------------------------------------------------------------------------
 1 | [theme]
 2 | inherit = basic
 3 | stylesheet = agogo.css
 4 | pygments_style = tango
 5 | 
 6 | [options]
 7 | bodyfont = "Verdana", Arial, sans-serif
 8 | headerfont = "Georgia", "Times New Roman", serif
 9 | pagewidth = 70em
10 | documentwidth = 50em
11 | sidebarwidth = 20em
12 | bgcolor = #eeeeec
13 | headerbg = url(bgtop.png) top left repeat-x
14 | footerbg = url(bgfooter.png) top left repeat-x
15 | linkcolor = #ce5c00
16 | headercolor1 = #204a87
17 | headercolor2 = #3465a4
18 | headerlinkcolor = #fcaf3e
19 | textalign = justify


--------------------------------------------------------------------------------
/vb_suite/sparse.py:
--------------------------------------------------------------------------------
 1 | from vbench.benchmark import Benchmark
 2 | from datetime import datetime
 3 | 
 4 | common_setup = """from pandas_vb_common import *
 5 | """
 6 | 
 7 | #----------------------------------------------------------------------
 8 | 
 9 | setup = common_setup + """
10 | from pandas.core.sparse import SparseSeries, SparseDataFrame
11 | 
12 | K = 50
13 | N = 50000
14 | rng = np.asarray(date_range('1/1/2000', periods=N,
15 |                            freq='T'))
16 | 
17 | # rng2 = np.asarray(rng).astype('M8[ns]').astype('i8')
18 | 
19 | series = {}
20 | for i in range(1, K + 1):
21 |     data = np.random.randn(N)[:-i]
22 |     this_rng = rng[:-i]
23 |     data[100:] = np.nan
24 |     series[i] = SparseSeries(data, index=this_rng)
25 | """
26 | stmt = "SparseDataFrame(series)"
27 | 
28 | bm_sparse1 = Benchmark(stmt, setup, name="sparse_series_to_frame",
29 |                        start_date=datetime(2011, 6, 1))
30 | 
31 | 
32 | setup = common_setup + """
33 | from pandas.core.sparse import SparseDataFrame
34 | """
35 | 
36 | stmt = "SparseDataFrame(columns=np.arange(100), index=np.arange(1000))"
37 | 
38 | sparse_constructor = Benchmark(stmt, setup, name="sparse_frame_constructor",
39 |                                start_date=datetime(2012, 6, 1))
40 | 


--------------------------------------------------------------------------------
/vb_suite/test.py:
--------------------------------------------------------------------------------
 1 | from pandas import *
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | import sqlite3
 5 | 
 6 | from vbench.git import GitRepo
 7 | 
 8 | 
 9 | REPO_PATH = '/home/adam/code/pandas'
10 | repo = GitRepo(REPO_PATH)
11 | 
12 | con = sqlite3.connect('vb_suite/benchmarks.db')
13 | 
14 | bmk = '36900a889961162138c140ce4ae3c205'
15 | # bmk = '9d7b8c04b532df6c2d55ef497039b0ce'
16 | bmk = '4481aa4efa9926683002a673d2ed3dac'
17 | bmk = '00593cd8c03d769669d7b46585161726'
18 | bmk = '3725ab7cd0a0657d7ae70f171c877cea'
19 | bmk = '3cd376d6d6ef802cdea49ac47a67be21'
20 | bmk2 = '459225186023853494bc345fd180f395'
21 | bmk = 'c22ca82e0cfba8dc42595103113c7da3'
22 | bmk = 'e0e651a8e9fbf0270ab68137f8b9df5f'
23 | bmk = '96bda4b9a60e17acf92a243580f2a0c3'
24 | 
25 | 
26 | def get_results(bmk):
27 |     results = con.execute(
28 |         "select * from results where checksum='%s'" % bmk).fetchall()
29 |     x = Series(dict((t[1], t[3]) for t in results))
30 |     x.index = x.index.map(repo.timestamps.get)
31 |     x = x.sort_index()
32 |     return x
33 | 
34 | x = get_results(bmk)
35 | 
36 | 
37 | def graph1():
38 |     dm_getitem = get_results('459225186023853494bc345fd180f395')
39 |     dm_getvalue = get_results('c22ca82e0cfba8dc42595103113c7da3')
40 | 
41 |     plt.figure()
42 |     ax = plt.gca()
43 | 
44 |     dm_getitem.plot(label='df[col][idx]', ax=ax)
45 |     dm_getvalue.plot(label='df.get_value(idx, col)', ax=ax)
46 | 
47 |     plt.ylabel('ms')
48 |     plt.legend(loc='best')
49 | 
50 | 
51 | def graph2():
52 |     bm = get_results('96bda4b9a60e17acf92a243580f2a0c3')
53 |     plt.figure()
54 |     ax = plt.gca()
55 | 
56 |     bm.plot(ax=ax)
57 |     plt.ylabel('ms')
58 | 
59 | bm = get_results('36900a889961162138c140ce4ae3c205')
60 | fig = plt.figure()
61 | ax = plt.gca()
62 | bm.plot(ax=ax)
63 | fig.autofmt_xdate()
64 | 
65 | plt.xlim([bm.dropna().index[0] - datetools.MonthEnd(),
66 |           bm.dropna().index[-1] + datetools.MonthEnd()])
67 | plt.ylabel('ms')
68 | 


--------------------------------------------------------------------------------
/vb_suite/timedelta.py:
--------------------------------------------------------------------------------
 1 | from vbench.api import Benchmark
 2 | from datetime import datetime
 3 | 
 4 | common_setup = """from pandas_vb_common import *
 5 | from pandas import to_timedelta
 6 | """
 7 | 
 8 | #----------------------------------------------------------------------
 9 | # conversion
10 | 
11 | setup = common_setup + """
12 | arr = np.random.randint(0,1000,size=10000)
13 | """
14 | 
15 | stmt = "to_timedelta(arr,unit='s')"
16 | timedelta_convert_int = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1))
17 | 
18 | setup = common_setup + """
19 | arr = np.random.randint(0,1000,size=10000)
20 | arr = [ '{0} days'.format(i) for i in arr ]
21 | """
22 | 
23 | stmt = "to_timedelta(arr)"
24 | timedelta_convert_string = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1))
25 | 
26 | setup = common_setup + """
27 | arr = np.random.randint(0,60,size=10000)
28 | arr = [ '00:00:{0:02d}'.format(i) for i in arr ]
29 | """
30 | 
31 | stmt = "to_timedelta(arr)"
32 | timedelta_convert_string_seconds = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1))
33 | 


--------------------------------------------------------------------------------