├── .coveragerc ├── .gitattributes ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── LICENSES ├── MSGPACK_LICENSE ├── MSGPACK_NUMPY_LICENSE ├── NUMPY_LICENSE ├── OTHER ├── PSF_LICENSE ├── SCIPY_LICENSE ├── SIX └── ULTRAJSON_LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── RELEASE.md ├── bench ├── alignment.py ├── bench_dense_to_sparse.py ├── bench_get_put_value.py ├── bench_groupby.py ├── bench_join_panel.py ├── bench_khash_dict.py ├── bench_merge.R ├── bench_merge.py ├── bench_merge_sqlite.py ├── bench_pivot.R ├── bench_pivot.py ├── bench_sparse.py ├── bench_take_indexing.py ├── bench_unique.py ├── bench_with_subset.R ├── bench_with_subset.py ├── better_unique.py ├── duplicated.R ├── io_roundtrip.py ├── larry.py ├── serialize.py ├── test.py ├── zoo_bench.R └── zoo_bench.py ├── ci ├── README.txt ├── after_script.sh ├── before_install.sh ├── build_docs.sh ├── cron │ └── go_doc.sh ├── install.sh ├── ironcache │ ├── get.py │ └── put.py ├── prep_ccache.sh ├── print_skipped.py ├── print_versions.py ├── requirements-2.6.txt ├── requirements-2.7.txt ├── requirements-2.7_LOCALE.txt ├── requirements-2.7_NUMPY_DEV_1_8_x.txt ├── requirements-2.7_NUMPY_DEV_master.txt ├── requirements-3.2.txt ├── requirements-3.3.txt ├── requirements-3.4.txt ├── script.sh ├── speedpack │ ├── Vagrantfile │ ├── build.sh │ └── nginx │ │ └── nginx.conf.template └── submit_ccache.sh ├── doc ├── README.rst ├── _templates │ └── autosummary │ │ └── class.rst ├── data │ ├── baseball.csv │ ├── fx_prices │ ├── iris.data │ ├── mindex_ex.csv │ ├── test.xls │ └── tips.csv ├── make.py ├── plots │ └── stats │ │ ├── moment_plots.py │ │ ├── moments_ewma.py │ │ ├── moments_ewmvol.py │ │ ├── moments_expw.py │ │ ├── moments_rolling.py │ │ └── moments_rolling_binary.py ├── source │ ├── 10min.rst │ ├── _static │ │ ├── banklist.html │ │ ├── df_repr_truncated.png │ │ ├── eval-perf-small.png │ │ ├── eval-perf.png │ │ ├── legacy_0.10.h5 │ │ ├── query-perf-small.png │ │ ├── query-perf.png │ │ ├── stub │ │ ├── trunc_after.png │ │ └── trunc_before.png │ ├── api.rst │ ├── basics.rst │ ├── categorical.rst │ ├── comparison_with_r.rst │ ├── comparison_with_sql.rst │ ├── computation.rst │ ├── conf.py │ ├── contributing.rst │ ├── cookbook.rst │ ├── dsintro.rst │ ├── ecosystem.rst │ ├── enhancingperf.rst │ ├── faq.rst │ ├── gotchas.rst │ ├── groupby.rst │ ├── index.rst.template │ ├── indexing.rst │ ├── install.rst │ ├── io.rst │ ├── merging.rst │ ├── missing_data.rst │ ├── options.rst │ ├── overview.rst │ ├── r_interface.rst │ ├── release.rst │ ├── remote_data.rst │ ├── reshaping.rst │ ├── rplot.rst │ ├── sparse.rst │ ├── themes │ │ └── nature_with_gtoc │ │ │ ├── layout.html │ │ │ ├── static │ │ │ └── nature.css_t │ │ │ └── theme.conf │ ├── timeseries.rst │ ├── tutorials.rst │ ├── v0.10.0.txt │ ├── v0.10.1.txt │ ├── v0.11.0.txt │ ├── v0.12.0.txt │ ├── v0.13.0.txt │ ├── v0.13.1.txt │ ├── v0.14.0.txt │ ├── v0.14.1.txt │ ├── v0.15.0.txt │ ├── v0.4.x.txt │ ├── v0.5.0.txt │ ├── v0.6.0.txt │ ├── v0.6.1.txt │ ├── v0.7.0.txt │ ├── v0.7.1.txt │ ├── v0.7.2.txt │ ├── v0.7.3.txt │ ├── v0.8.0.txt │ ├── v0.8.1.txt │ ├── v0.9.0.txt │ ├── v0.9.1.txt │ ├── visualization.rst │ └── whatsnew.rst └── sphinxext │ ├── README.rst │ ├── ipython_sphinxext │ ├── __init__.py │ ├── ipython_console_highlighting.py │ └── ipython_directive.py │ └── numpydoc │ ├── LICENSE.txt │ ├── README.rst │ ├── __init__.py │ ├── comment_eater.py │ ├── compiler_unparse.py │ ├── docscrape.py │ ├── docscrape_sphinx.py │ ├── linkcode.py │ ├── numpydoc.py │ ├── phantom_import.py │ ├── plot_directive.py │ ├── tests │ ├── test_docscrape.py │ ├── test_linkcode.py │ ├── test_phantom_import.py │ ├── test_plot_directive.py │ └── test_traitsdoc.py │ └── traitsdoc.py ├── examples ├── data │ └── SOURCES ├── finance.py └── regressions.py ├── ez_setup.py ├── fake_pyrex └── Pyrex │ ├── Distutils │ ├── __init__.py │ └── build_ext.py │ └── __init__.py ├── pandas ├── __init__.py ├── algos.pyx ├── compat │ ├── __init__.py │ ├── chainmap.py │ ├── chainmap_impl.py │ ├── openpyxl_compat.py │ └── pickle_compat.py ├── computation │ ├── __init__.py │ ├── align.py │ ├── api.py │ ├── common.py │ ├── engines.py │ ├── eval.py │ ├── expr.py │ ├── expressions.py │ ├── ops.py │ ├── pytables.py │ ├── scope.py │ └── tests │ │ ├── __init__.py │ │ └── test_eval.py ├── core │ ├── __init__.py │ ├── algorithms.py │ ├── api.py │ ├── array.py │ ├── base.py │ ├── categorical.py │ ├── common.py │ ├── config.py │ ├── config_init.py │ ├── datetools.py │ ├── format.py │ ├── frame.py │ ├── generic.py │ ├── groupby.py │ ├── index.py │ ├── indexing.py │ ├── internals.py │ ├── matrix.py │ ├── nanops.py │ ├── ops.py │ ├── panel.py │ ├── panel4d.py │ ├── panelnd.py │ ├── reshape.py │ ├── series.py │ ├── sparse.py │ └── strings.py ├── hashtable.pxd ├── hashtable.pyx ├── index.pyx ├── info.py ├── io │ ├── __init__.py │ ├── api.py │ ├── auth.py │ ├── clipboard.py │ ├── common.py │ ├── data.py │ ├── date_converters.py │ ├── excel.py │ ├── ga.py │ ├── gbq.py │ ├── html.py │ ├── json.py │ ├── packers.py │ ├── parsers.py │ ├── pickle.py │ ├── pytables.py │ ├── sql.py │ ├── stata.py │ ├── tests │ │ ├── __init__.py │ │ ├── data │ │ │ ├── banklist.csv │ │ │ ├── banklist.html │ │ │ ├── computer_sales_page.html │ │ │ ├── gbq_fake_job.txt │ │ │ ├── html_encoding │ │ │ │ ├── chinese_utf-16.html │ │ │ │ ├── chinese_utf-32.html │ │ │ │ ├── chinese_utf-8.html │ │ │ │ └── letz_latin1.html │ │ │ ├── iris.csv │ │ │ ├── legacy_hdf │ │ │ │ ├── legacy.h5 │ │ │ │ ├── legacy_0.10.h5 │ │ │ │ ├── legacy_table.h5 │ │ │ │ ├── legacy_table_0.11.h5 │ │ │ │ ├── pytables_native.h5 │ │ │ │ └── pytables_native2.h5 │ │ │ ├── legacy_pickle │ │ │ │ ├── 0.10.1 │ │ │ │ │ ├── AMD64_windows_2.7.3.pickle │ │ │ │ │ └── x86_64_linux_2.7.3.pickle │ │ │ │ ├── 0.11.0 │ │ │ │ │ ├── 0.11.0_x86_64_linux_3.3.0.pickle │ │ │ │ │ ├── x86_64_linux_2.7.3.pickle │ │ │ │ │ └── x86_64_linux_3.3.0.pickle │ │ │ │ ├── 0.12.0 │ │ │ │ │ ├── 0.12.0_AMD64_windows_2.7.3.pickle │ │ │ │ │ └── 0.12.0_x86_64_linux_2.7.3.pickle │ │ │ │ ├── 0.13.0 │ │ │ │ │ ├── 0.13.0_AMD64_windows_2.7.3.pickle │ │ │ │ │ ├── 0.13.0_i686_linux_2.6.5.pickle │ │ │ │ │ ├── 0.13.0_i686_linux_2.7.3.pickle │ │ │ │ │ ├── 0.13.0_i686_linux_3.2.3.pickle │ │ │ │ │ ├── 0.13.0_x86_64_darwin_2.7.5.pickle │ │ │ │ │ ├── 0.13.0_x86_64_darwin_2.7.6.pickle │ │ │ │ │ ├── 0.13.0_x86_64_linux_2.7.3.pickle │ │ │ │ │ ├── 0.13.0_x86_64_linux_2.7.8.pickle │ │ │ │ │ └── 0.13.0_x86_64_linux_3.3.0.pickle │ │ │ │ ├── 0.14.0 │ │ │ │ │ ├── 0.14.0_x86_64_darwin_2.7.6.pickle │ │ │ │ │ └── 0.14.0_x86_64_linux_2.7.8.pickle │ │ │ │ └── 0.14.1 │ │ │ │ │ └── 0.14.1_x86_64_linux_2.7.8.pickle │ │ │ ├── macau.html │ │ │ ├── nyse_wsj.html │ │ │ ├── salary.table │ │ │ ├── spam.html │ │ │ ├── stata1_114.dta │ │ │ ├── stata1_117.dta │ │ │ ├── stata1_encoding.dta │ │ │ ├── stata2_113.dta │ │ │ ├── stata2_114.dta │ │ │ ├── stata2_115.dta │ │ │ ├── stata2_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats │ │ │ ├── stata2_117.dta │ │ │ ├── stata3.csv │ │ │ ├── stata3_113.dta │ │ │ ├── stata3_114.dta │ │ │ ├── stata3_115.dta │ │ │ ├── stata3_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats │ │ │ ├── stata3_117.dta │ │ │ ├── stata4_113.dta │ │ │ ├── stata4_114.dta │ │ │ ├── stata4_115.dta │ │ │ ├── stata4_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats │ │ │ ├── stata4_117.dta │ │ │ ├── stata5.csv │ │ │ ├── stata5_113.dta │ │ │ ├── stata5_114.dta │ │ │ ├── stata5_115.dta │ │ │ ├── stata5_117.dta │ │ │ ├── stata6.csv │ │ │ ├── stata6_113.dta │ │ │ ├── stata6_114.dta │ │ │ ├── stata6_115.dta │ │ │ ├── stata6_117.dta │ │ │ ├── stata7_115.dta │ │ │ ├── stata7_117.dta │ │ │ ├── test.xls │ │ │ ├── test.xlsm │ │ │ ├── test.xlsx │ │ │ ├── test1.csv │ │ │ ├── test2.csv │ │ │ ├── test2.xls │ │ │ ├── test2.xlsx │ │ │ ├── test3.xls │ │ │ ├── test_types.xls │ │ │ ├── test_types.xlsx │ │ │ ├── times_1900.xls │ │ │ ├── times_1904.xls │ │ │ ├── tips.csv │ │ │ ├── unicode_series.csv │ │ │ ├── utf16_ex.txt │ │ │ ├── valid_markup.html │ │ │ ├── wikipedia_states.html │ │ │ ├── yahoo_options1.html │ │ │ └── yahoo_options2.html │ │ ├── generate_legacy_pickles.py │ │ ├── test_clipboard.py │ │ ├── test_cparser.py │ │ ├── test_data.py │ │ ├── test_date_converters.py │ │ ├── test_excel.py │ │ ├── test_ga.py │ │ ├── test_gbq.py │ │ ├── test_html.py │ │ ├── test_json │ │ │ ├── __init__.py │ │ │ ├── data │ │ │ │ ├── tsframe_iso_v012.json │ │ │ │ └── tsframe_v012.json │ │ │ ├── test_pandas.py │ │ │ └── test_ujson.py │ │ ├── test_json_norm.py │ │ ├── test_packers.py │ │ ├── test_parsers.py │ │ ├── test_pickle.py │ │ ├── test_pytables.py │ │ ├── test_sql.py │ │ ├── test_stata.py │ │ └── test_wb.py │ └── wb.py ├── lib.pyx ├── msgpack.pyx ├── parser.pyx ├── rpy │ ├── __init__.py │ ├── base.py │ ├── common.py │ ├── mass.py │ ├── tests │ │ ├── __init__.py │ │ └── test_common.py │ └── vars.py ├── sandbox │ ├── __init__.py │ └── qtpandas.py ├── sparse │ ├── __init__.py │ ├── api.py │ ├── array.py │ ├── frame.py │ ├── list.py │ ├── panel.py │ ├── series.py │ └── tests │ │ ├── __init__.py │ │ ├── test_array.py │ │ ├── test_libsparse.py │ │ ├── test_list.py │ │ └── test_sparse.py ├── src │ ├── datetime.pxd │ ├── datetime │ │ ├── np_datetime.c │ │ ├── np_datetime.h │ │ ├── np_datetime_strings.c │ │ └── np_datetime_strings.h │ ├── datetime_helper.h │ ├── generate_code.py │ ├── generated.pyx │ ├── headers │ │ ├── math.h │ │ ├── ms_inttypes.h │ │ ├── ms_stdint.h │ │ ├── portable.h │ │ └── stdint.h │ ├── helper.h │ ├── inference.pyx │ ├── join.pyx │ ├── khash.pxd │ ├── klib │ │ ├── khash.h │ │ ├── khash_python.h │ │ ├── ktypes.h │ │ └── kvec.h │ ├── msgpack │ │ ├── pack.h │ │ ├── pack_template.h │ │ ├── sysdep.h │ │ ├── unpack.h │ │ ├── unpack_define.h │ │ └── unpack_template.h │ ├── numpy.pxd │ ├── numpy_helper.h │ ├── offsets.pyx │ ├── parse_helper.h │ ├── parser │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── io.c │ │ ├── io.h │ │ ├── tokenizer.c │ │ └── tokenizer.h │ ├── period.c │ ├── period.h │ ├── properties.pyx │ ├── reduce.pyx │ ├── skiplist.h │ ├── skiplist.pxd │ ├── skiplist.pyx │ ├── sparse.pyx │ ├── testing.pyx │ ├── ujson │ │ ├── lib │ │ │ ├── ultrajson.h │ │ │ ├── ultrajsondec.c │ │ │ └── ultrajsonenc.c │ │ └── python │ │ │ ├── JSONtoObj.c │ │ │ ├── objToJSON.c │ │ │ ├── py_defines.h │ │ │ ├── ujson.c │ │ │ └── version.h │ └── util.pxd ├── stats │ ├── __init__.py │ ├── api.py │ ├── common.py │ ├── fama_macbeth.py │ ├── interface.py │ ├── math.py │ ├── misc.py │ ├── moments.py │ ├── ols.py │ ├── plm.py │ ├── tests │ │ ├── __init__.py │ │ ├── common.py │ │ ├── test_fama_macbeth.py │ │ ├── test_math.py │ │ ├── test_moments.py │ │ ├── test_ols.py │ │ └── test_var.py │ └── var.py ├── tests │ ├── __init__.py │ ├── data │ │ ├── iris.csv │ │ ├── mindex_073.pickle │ │ ├── multiindex_v1.pickle │ │ ├── tips.csv │ │ └── unicode_series.csv │ ├── test_algos.py │ ├── test_base.py │ ├── test_categorical.py │ ├── test_common.py │ ├── test_compat.py │ ├── test_config.py │ ├── test_expressions.py │ ├── test_format.py │ ├── test_frame.py │ ├── test_generic.py │ ├── test_graphics.py │ ├── test_groupby.py │ ├── test_index.py │ ├── test_indexing.py │ ├── test_internals.py │ ├── test_msgpack │ │ ├── __init__.py │ │ ├── test_buffer.py │ │ ├── test_case.py │ │ ├── test_except.py │ │ ├── test_format.py │ │ ├── test_obj.py │ │ ├── test_pack.py │ │ ├── test_read_size.py │ │ ├── test_seq.py │ │ ├── test_sequnpack.py │ │ ├── test_subtype.py │ │ └── test_unpack_raw.py │ ├── test_multilevel.py │ ├── test_nanops.py │ ├── test_panel.py │ ├── test_panel4d.py │ ├── test_panelnd.py │ ├── test_reshape.py │ ├── test_rplot.py │ ├── test_series.py │ ├── test_stats.py │ ├── test_strings.py │ ├── test_testing.py │ └── test_tseries.py ├── tools │ ├── __init__.py │ ├── describe.py │ ├── merge.py │ ├── pivot.py │ ├── plotting.py │ ├── rplot.py │ ├── tests │ │ ├── __init__.py │ │ ├── cut_data.csv │ │ ├── test_merge.py │ │ ├── test_pivot.py │ │ ├── test_tile.py │ │ ├── test_tools.py │ │ └── test_util.py │ ├── tile.py │ └── util.py ├── tseries │ ├── __init__.py │ ├── api.py │ ├── common.py │ ├── converter.py │ ├── frequencies.py │ ├── holiday.py │ ├── index.py │ ├── interval.py │ ├── offsets.py │ ├── period.py │ ├── plotting.py │ ├── resample.py │ ├── tests │ │ ├── __init__.py │ │ ├── data │ │ │ ├── daterange_073.pickle │ │ │ ├── frame.pickle │ │ │ ├── series.pickle │ │ │ └── series_daterange0.pickle │ │ ├── test_converter.py │ │ ├── test_daterange.py │ │ ├── test_frequencies.py │ │ ├── test_holiday.py │ │ ├── test_offsets.py │ │ ├── test_period.py │ │ ├── test_plotting.py │ │ ├── test_resample.py │ │ ├── test_timedeltas.py │ │ ├── test_timeseries.py │ │ ├── test_timeseries_legacy.py │ │ ├── test_timezones.py │ │ ├── test_tslib.py │ │ └── test_util.py │ ├── timedeltas.py │ ├── tools.py │ └── util.py ├── tslib.pxd ├── tslib.pyx └── util │ ├── __init__.py │ ├── clipboard.py │ ├── decorators.py │ ├── misc.py │ ├── print_versions.py │ ├── terminal.py │ └── testing.py ├── scripts ├── bench_join.R ├── bench_join.py ├── bench_join_multi.py ├── bench_refactor.py ├── boxplot_test.py ├── count_code.sh ├── faster_xs.py ├── file_sizes.py ├── find_commits_touching_func.py ├── find_undoc_args.py ├── gen_release_notes.py ├── git-mrb ├── git_code_churn.py ├── groupby_sample.py ├── groupby_speed.py ├── groupby_test.py ├── hdfstore_panel_perf.py ├── json_manip.py ├── leak.py ├── parser_magic.py ├── preepoch_test.py ├── pypistats.py ├── roll_median_leak.py ├── runtests.py ├── test_py25.bat ├── test_py26.bat ├── test_py27.bat ├── test_py31.bat ├── test_py32.bat ├── testmed.py ├── touchup_gh_issues.py ├── use_build_cache.py ├── winbuild_py25.bat ├── winbuild_py27.bat └── windows_builder │ ├── build_26-32.bat │ ├── build_26-64.bat │ ├── build_27-32.bat │ ├── build_27-64.bat │ ├── build_33-32.bat │ ├── build_33-64.bat │ ├── build_34-32.bat │ ├── build_34-64.bat │ ├── check_and_build.bat │ ├── check_and_build.py │ └── readme.txt ├── setup.py ├── test.sh ├── test_fast.sh ├── test_multi.sh ├── test_perf.sh ├── test_rebuild.sh ├── tox.ini └── vb_suite ├── .gitignore ├── attrs_caching.py ├── binary_ops.py ├── ctors.py ├── eval.py ├── frame_ctor.py ├── frame_methods.py ├── generate_rst_files.py ├── groupby.py ├── hdfstore_bench.py ├── index_object.py ├── indexing.py ├── inference.py ├── io_bench.py ├── join_merge.py ├── make.py ├── measure_memory_consumption.py ├── miscellaneous.py ├── packers.py ├── pandas_vb_common.py ├── panel_ctor.py ├── panel_methods.py ├── parser_vb.py ├── perf_HEAD.py ├── plotting.py ├── reindex.py ├── replace.py ├── reshape.py ├── run_suite.py ├── series_methods.py ├── source ├── _static │ └── stub ├── conf.py └── themes │ └── agogo │ ├── layout.html │ ├── static │ ├── agogo.css_t │ ├── bgfooter.png │ └── bgtop.png │ └── theme.conf ├── sparse.py ├── stat_ops.py ├── strings.py ├── suite.py ├── test.py ├── test_perf.py ├── timedelta.py └── timeseries.py /.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | [run] 3 | branch = False 4 | 5 | [report] 6 | # Regexes for lines to exclude from consideration 7 | exclude_lines = 8 | # Have to re-enable the standard pragma 9 | pragma: no cover 10 | 11 | # Don't complain about missing debug-only code: 12 | def __repr__ 13 | if self\.debug 14 | 15 | # Don't complain if tests don't hit defensive assertion code: 16 | raise AssertionError 17 | raise NotImplementedError 18 | 19 | # Don't complain if non-runnable code isn't run: 20 | if 0: 21 | if __name__ == .__main__.: 22 | 23 | ignore_errors = False 24 | 25 | [html] 26 | directory = coverage_html_report -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | # enforce text on certain files 3 | *.py text 4 | *.pyx text 5 | *.pyd text 6 | *.c text 7 | *.h text 8 | *.html text 9 | *.csv text 10 | *.json text 11 | *.pickle binary 12 | *.h5 binary 13 | *.dta binary 14 | *.xls binary 15 | *.xlsx binary 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ######################################### 2 | # Editor temporary/working/backup files # 3 | .#* 4 | *\#*\# 5 | [#]*# 6 | *~ 7 | *$ 8 | *.bak 9 | *flymake* 10 | *.kdev4 11 | *.log 12 | *.swp 13 | *.pdb 14 | .project 15 | .pydevproject 16 | .settings 17 | .idea 18 | .vagrant 19 | .noseids 20 | 21 | # Compiled source # 22 | ################### 23 | *.a 24 | *.com 25 | *.class 26 | *.dll 27 | *.exe 28 | *.o 29 | *.py[ocd] 30 | *.so 31 | .build_cache_dir 32 | MANIFEST 33 | 34 | # Python files # 35 | ################ 36 | # setup.py working directory 37 | build 38 | # sphinx build directory 39 | doc/_build 40 | # setup.py dist directory 41 | dist 42 | # Egg metadata 43 | *.egg-info 44 | # tox testing tool 45 | .tox 46 | # rope 47 | .ropeproject 48 | # wheel files 49 | *.whl 50 | **/wheelhouse/* 51 | # coverage 52 | .coverage 53 | 54 | # OS generated files # 55 | ###################### 56 | .directory 57 | .gdb_history 58 | .DS_Store? 59 | ehthumbs.db 60 | Icon? 61 | Thumbs.db 62 | 63 | # Data files # 64 | ############## 65 | *.dta 66 | *.h5 67 | pandas/io/*.dat 68 | pandas/io/*.json 69 | scikits 70 | 71 | # Generated Sources # 72 | ##################### 73 | !skts.c 74 | !np_datetime.c 75 | !np_datetime_strings.c 76 | *.c 77 | *.cpp 78 | 79 | # Things specific to this project # 80 | ################################### 81 | pandas/version.py 82 | 83 | # Documentation generated files # 84 | ################################# 85 | doc/source/generated 86 | doc/source/_static 87 | doc/source/vbench 88 | doc/source/vbench.rst 89 | doc/source/index.rst 90 | doc/build/html/index.html 91 | # Windows specific leftover: 92 | doc/tmp.sv 93 | -------------------------------------------------------------------------------- /LICENSES/MSGPACK_LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2008-2011 INADA Naoki 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /LICENSES/MSGPACK_NUMPY_LICENSE: -------------------------------------------------------------------------------- 1 | .. -*- rst -*- 2 | 3 | License 4 | ======= 5 | 6 | Copyright (c) 2013, Lev Givon. 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | 13 | * Redistributions of source code must retain the above copyright 14 | notice, this list of conditions and the following disclaimer. 15 | * Redistributions in binary form must reproduce the above 16 | copyright notice, this list of conditions and the following 17 | disclaimer in the documentation and/or other materials provided 18 | with the distribution. 19 | * Neither the name of Lev Givon nor the names of any 20 | contributors may be used to endorse or promote products derived 21 | from this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 29 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 33 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 | -------------------------------------------------------------------------------- /LICENSES/NUMPY_LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2005-2011, NumPy Developers. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of the NumPy Developers nor the names of any 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /LICENSES/SCIPY_LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2001, 2002 Enthought, Inc. 2 | All rights reserved. 3 | 4 | Copyright (c) 2003-2012 SciPy Developers. 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | a. Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | b. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | c. Neither the name of Enthought nor the names of the SciPy Developers 16 | may be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 30 | DAMAGE. 31 | 32 | -------------------------------------------------------------------------------- /LICENSES/SIX: -------------------------------------------------------------------------------- 1 | six license (substantial portions used in the python 3 compatibility module) 2 | =========================================================================== 3 | Copyright (c) 2010-2013 Benjamin Peterson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | # 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | # 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSES/ULTRAJSON_LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the ESN Social Software AB nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | 26 | 27 | Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) 28 | http://code.google.com/p/stringencoders/ 29 | Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. 30 | 31 | Numeric decoder derived from from TCL library 32 | http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms 33 | * Copyright (c) 1988-1993 The Regents of the University of California. 34 | * Copyright (c) 1994 Sun Microsystems, Inc. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include LICENSE 3 | include RELEASE.md 4 | include README.rst 5 | include setup.py 6 | 7 | graft doc 8 | prune doc/build 9 | 10 | graft examples 11 | graft pandas 12 | 13 | global-exclude *.so 14 | global-exclude *.pyd 15 | global-exclude *.pyc 16 | global-exclude *~ 17 | global-exclude \#* 18 | global-exclude .git* 19 | global-exclude .DS_Store 20 | global-exclude *.png 21 | 22 | # include examples/data/* 23 | # recursive-include examples *.py 24 | # recursive-include doc/source * 25 | # recursive-include doc/sphinxext * 26 | # recursive-include LICENSES * 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | tseries: pandas/lib.pyx pandas/tslib.pyx pandas/hashtable.pyx 2 | python setup.py build_ext --inplace 3 | 4 | .PHONY : develop build clean clean_pyc tseries doc 5 | 6 | clean: 7 | -python setup.py clean 8 | 9 | clean_pyc: 10 | -find . -name '*.py[co]' -exec rm {} \; 11 | 12 | sparse: pandas/src/sparse.pyx 13 | python setup.py build_ext --inplace 14 | 15 | build: clean_pyc 16 | python setup.py build_ext --inplace 17 | 18 | develop: build 19 | -python setup.py develop 20 | 21 | doc: 22 | -rm -rf doc/build doc/source/generated 23 | cd doc; \ 24 | python make.py clean; \ 25 | python make.py html 26 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | Release Notes 2 | ============= 3 | 4 | The list of changes to pandas between each release can be found 5 | [here](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html). For full 6 | details, see the commit logs at http://github.com/pydata/pandas. 7 | -------------------------------------------------------------------------------- /bench/alignment.py: -------------------------------------------------------------------------------- 1 | # Setup 2 | from pandas.compat import range, lrange 3 | import numpy as np 4 | import pandas 5 | import la 6 | N = 1000 7 | K = 50 8 | arr1 = np.random.randn(N, K) 9 | arr2 = np.random.randn(N, K) 10 | idx1 = lrange(N) 11 | idx2 = lrange(K) 12 | 13 | # pandas 14 | dma1 = pandas.DataFrame(arr1, idx1, idx2) 15 | dma2 = pandas.DataFrame(arr2, idx1[::-1], idx2[::-1]) 16 | 17 | # larry 18 | lar1 = la.larry(arr1, [idx1, idx2]) 19 | lar2 = la.larry(arr2, [idx1[::-1], idx2[::-1]]) 20 | 21 | for i in range(100): 22 | result = lar1 + lar2 23 | -------------------------------------------------------------------------------- /bench/bench_dense_to_sparse.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | 3 | K = 100 4 | N = 100000 5 | rng = DatetimeIndex('1/1/2000', periods=N, offset=datetools.Minute()) 6 | 7 | rng2 = np.asarray(rng).astype('M8[us]').astype('i8') 8 | 9 | series = {} 10 | for i in range(1, K + 1): 11 | data = np.random.randn(N)[:-i] 12 | this_rng = rng2[:-i] 13 | data[100:] = np.nan 14 | series[i] = SparseSeries(data, index=this_rng) 15 | -------------------------------------------------------------------------------- /bench/bench_get_put_value.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | from pandas.util.testing import rands 3 | from pandas.compat import range 4 | 5 | N = 1000 6 | K = 50 7 | 8 | 9 | def _random_index(howmany): 10 | return Index([rands(10) for _ in range(howmany)]) 11 | 12 | df = DataFrame(np.random.randn(N, K), index=_random_index(N), 13 | columns=_random_index(K)) 14 | 15 | 16 | def get1(): 17 | for col in df.columns: 18 | for row in df.index: 19 | _ = df[col][row] 20 | 21 | 22 | def get2(): 23 | for col in df.columns: 24 | for row in df.index: 25 | _ = df.get_value(row, col) 26 | 27 | 28 | def put1(): 29 | for col in df.columns: 30 | for row in df.index: 31 | df[col][row] = 0 32 | 33 | 34 | def put2(): 35 | for col in df.columns: 36 | for row in df.index: 37 | df.set_value(row, col, 0) 38 | 39 | 40 | def resize1(): 41 | buf = DataFrame() 42 | for col in df.columns: 43 | for row in df.index: 44 | buf = buf.set_value(row, col, 5.) 45 | return buf 46 | 47 | 48 | def resize2(): 49 | from collections import defaultdict 50 | 51 | buf = defaultdict(dict) 52 | for col in df.columns: 53 | for row in df.index: 54 | buf[col][row] = 5. 55 | 56 | return DataFrame(buf) 57 | -------------------------------------------------------------------------------- /bench/bench_groupby.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | from pandas.util.testing import rands 3 | from pandas.compat import range 4 | 5 | import string 6 | import random 7 | 8 | k = 20000 9 | n = 10 10 | 11 | foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n) 12 | foo2 = list(foo) 13 | random.shuffle(foo) 14 | random.shuffle(foo2) 15 | 16 | df = DataFrame({'A': foo, 17 | 'B': foo2, 18 | 'C': np.random.randn(n * k)}) 19 | 20 | import pandas._sandbox as sbx 21 | 22 | 23 | def f(): 24 | table = sbx.StringHashTable(len(df)) 25 | ret = table.factorize(df['A']) 26 | return ret 27 | 28 | 29 | def g(): 30 | table = sbx.PyObjectHashTable(len(df)) 31 | ret = table.factorize(df['A']) 32 | return ret 33 | 34 | ret = f() 35 | 36 | """ 37 | import pandas._tseries as lib 38 | 39 | f = np.std 40 | 41 | 42 | grouped = df.groupby(['A', 'B']) 43 | 44 | label_list = [ping.labels for ping in grouped.groupings] 45 | shape = [len(ping.ids) for ping in grouped.groupings] 46 | 47 | from pandas.core.groupby import get_group_index 48 | 49 | 50 | group_index = get_group_index(label_list, shape).astype('i4') 51 | 52 | ngroups = np.prod(shape) 53 | 54 | indexer = lib.groupsort_indexer(group_index, ngroups) 55 | 56 | values = df['C'].values.take(indexer) 57 | group_index = group_index.take(indexer) 58 | 59 | f = lambda x: x.std(ddof=1) 60 | 61 | grouper = lib.Grouper(df['C'], np.ndarray.std, group_index, ngroups) 62 | result = grouper.get_result() 63 | 64 | expected = grouped.std() 65 | """ 66 | -------------------------------------------------------------------------------- /bench/bench_khash_dict.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some comparisons of khash.h to Python dict 3 | """ 4 | from __future__ import print_function 5 | 6 | import numpy as np 7 | import os 8 | 9 | from vbench.api import Benchmark 10 | from pandas.util.testing import rands 11 | from pandas.compat import range 12 | import pandas._tseries as lib 13 | import pandas._sandbox as sbx 14 | import time 15 | 16 | import psutil 17 | 18 | pid = os.getpid() 19 | proc = psutil.Process(pid) 20 | 21 | 22 | def object_test_data(n): 23 | pass 24 | 25 | 26 | def string_test_data(n): 27 | return np.array([rands(10) for _ in range(n)], dtype='O') 28 | 29 | 30 | def int_test_data(n): 31 | return np.arange(n, dtype='i8') 32 | 33 | N = 1000000 34 | 35 | #---------------------------------------------------------------------- 36 | # Benchmark 1: map_locations 37 | 38 | 39 | def map_locations_python_object(): 40 | arr = string_test_data(N) 41 | return _timeit(lambda: lib.map_indices_object(arr)) 42 | 43 | 44 | def map_locations_khash_object(): 45 | arr = string_test_data(N) 46 | 47 | def f(): 48 | table = sbx.PyObjectHashTable(len(arr)) 49 | table.map_locations(arr) 50 | return _timeit(f) 51 | 52 | 53 | def _timeit(f, iterations=10): 54 | start = time.time() 55 | for _ in range(iterations): 56 | foo = f() 57 | elapsed = time.time() - start 58 | return elapsed 59 | 60 | #---------------------------------------------------------------------- 61 | # Benchmark 2: lookup_locations 62 | 63 | 64 | def lookup_python(values): 65 | table = lib.map_indices_object(values) 66 | return _timeit(lambda: lib.merge_indexer_object(values, table)) 67 | 68 | 69 | def lookup_khash(values): 70 | table = sbx.PyObjectHashTable(len(values)) 71 | table.map_locations(values) 72 | locs = table.lookup_locations(values) 73 | # elapsed = _timeit(lambda: table.lookup_locations2(values)) 74 | return table 75 | 76 | 77 | def leak(values): 78 | for _ in range(100): 79 | print(proc.get_memory_info()) 80 | table = lookup_khash(values) 81 | # table.destroy() 82 | 83 | arr = string_test_data(N) 84 | 85 | #---------------------------------------------------------------------- 86 | # Benchmark 3: unique 87 | 88 | #---------------------------------------------------------------------- 89 | # Benchmark 4: factorize 90 | -------------------------------------------------------------------------------- /bench/bench_pivot.R: -------------------------------------------------------------------------------- 1 | library(reshape2) 2 | 3 | 4 | n <- 100000 5 | a.size <- 5 6 | b.size <- 5 7 | 8 | data <- data.frame(a=sample(letters[1:a.size], n, replace=T), 9 | b=sample(letters[1:b.size], n, replace=T), 10 | c=rnorm(n), 11 | d=rnorm(n)) 12 | 13 | timings <- numeric() 14 | 15 | # acast(melt(data, id=c("a", "b")), a ~ b, mean) 16 | # acast(melt(data, id=c("a", "b")), a + b ~ variable, mean) 17 | 18 | for (i in 1:10) { 19 | gc() 20 | tim <- system.time(acast(melt(data, id=c("a", "b")), a ~ b, mean, 21 | subset=.(variable=="c"))) 22 | timings[i] = tim[3] 23 | } 24 | 25 | mean(timings) 26 | 27 | acast(melt(data, id=c("a", "b")), a ~ b, mean, subset=.(variable="c")) 28 | -------------------------------------------------------------------------------- /bench/bench_pivot.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | import string 3 | 4 | 5 | n = 100000 6 | asize = 5 7 | bsize = 5 8 | 9 | letters = np.asarray(list(string.letters), dtype=object) 10 | 11 | data = DataFrame(dict(foo=letters[:asize][np.random.randint(0, asize, n)], 12 | bar=letters[:bsize][np.random.randint(0, bsize, n)], 13 | baz=np.random.randn(n), 14 | qux=np.random.randn(n))) 15 | 16 | table = pivot_table(data, xby=['foo', 'bar']) 17 | -------------------------------------------------------------------------------- /bench/bench_sparse.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | from pandas import * 5 | import pandas.core.sparse as spm 6 | import pandas.compat as compat 7 | reload(spm) 8 | from pandas.core.sparse import * 9 | 10 | N = 10000. 11 | 12 | arr1 = np.arange(N) 13 | index = Index(np.arange(N)) 14 | 15 | off = N // 10 16 | arr1[off: 2 * off] = np.NaN 17 | arr1[4 * off: 5 * off] = np.NaN 18 | arr1[8 * off: 9 * off] = np.NaN 19 | 20 | arr2 = np.arange(N) 21 | arr2[3 * off // 2: 2 * off + off // 2] = np.NaN 22 | arr2[8 * off + off // 2: 9 * off + off // 2] = np.NaN 23 | 24 | s1 = SparseSeries(arr1, index=index) 25 | s2 = SparseSeries(arr2, index=index) 26 | 27 | is1 = SparseSeries(arr1, kind='integer', index=index) 28 | is2 = SparseSeries(arr2, kind='integer', index=index) 29 | 30 | s1_dense = s1.to_dense() 31 | s2_dense = s2.to_dense() 32 | 33 | if 'linux' in sys.platform: 34 | pth = '/home/wesm/code/pandas/example' 35 | else: 36 | pth = '/Users/wesm/code/pandas/example' 37 | 38 | dm = DataFrame.load(pth) 39 | 40 | sdf = dm.to_sparse() 41 | 42 | 43 | def new_data_like(sdf): 44 | new_data = {} 45 | for col, series in compat.iteritems(sdf): 46 | new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)), 47 | index=sdf.index, 48 | sparse_index=series.sp_index, 49 | fill_value=series.fill_value) 50 | 51 | return SparseDataFrame(new_data) 52 | 53 | # data = {} 54 | # for col, ser in dm.iteritems(): 55 | # data[col] = SparseSeries(ser) 56 | 57 | dwp = Panel.fromDict({'foo': dm}) 58 | # sdf = SparseDataFrame(data) 59 | 60 | 61 | lp = stack_sparse_frame(sdf) 62 | 63 | 64 | swp = SparsePanel({'A': sdf}) 65 | swp = SparsePanel({'A': sdf, 66 | 'B': sdf, 67 | 'C': sdf, 68 | 'D': sdf}) 69 | 70 | y = sdf 71 | x = SparsePanel({'x1': sdf + new_data_like(sdf) / 10, 72 | 'x2': sdf + new_data_like(sdf) / 10}) 73 | 74 | dense_y = sdf 75 | dense_x = x.to_dense() 76 | 77 | # import hotshot, hotshot.stats 78 | # prof = hotshot.Profile('test.prof') 79 | 80 | # benchtime, stones = prof.runcall(ols, y=y, x=x) 81 | 82 | # prof.close() 83 | 84 | # stats = hotshot.stats.load('test.prof') 85 | 86 | dense_model = ols(y=dense_y, x=dense_x) 87 | 88 | import pandas.stats.plm as plm 89 | import pandas.stats.interface as face 90 | reload(plm) 91 | reload(face) 92 | 93 | # model = face.ols(y=y, x=x) 94 | -------------------------------------------------------------------------------- /bench/bench_take_indexing.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | 4 | from pandas import * 5 | import pandas._tseries as lib 6 | 7 | from pandas import DataFrame 8 | import timeit 9 | from pandas.compat import zip 10 | 11 | setup = """ 12 | from pandas import Series 13 | import pandas._tseries as lib 14 | import random 15 | import numpy as np 16 | 17 | import random 18 | n = %d 19 | k = %d 20 | arr = np.random.randn(n, k) 21 | indexer = np.arange(n, dtype=np.int32) 22 | indexer = indexer[::-1] 23 | """ 24 | 25 | sizes = [100, 1000, 10000, 100000] 26 | iters = [1000, 1000, 100, 1] 27 | 28 | fancy_2d = [] 29 | take_2d = [] 30 | cython_2d = [] 31 | 32 | n = 1000 33 | 34 | 35 | def _timeit(stmt, size, k=5, iters=1000): 36 | timer = timeit.Timer(stmt=stmt, setup=setup % (sz, k)) 37 | return timer.timeit(n) / n 38 | 39 | for sz, its in zip(sizes, iters): 40 | print(sz) 41 | fancy_2d.append(_timeit('arr[indexer]', sz, iters=its)) 42 | take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its)) 43 | cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its)) 44 | 45 | df = DataFrame({'fancy': fancy_2d, 46 | 'take': take_2d, 47 | 'cython': cython_2d}) 48 | 49 | print(df) 50 | 51 | from pandas.rpy.common import r 52 | r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)') 53 | r('set.seed(12345') 54 | r('indexer <- sample(1:10000)') 55 | r('mat[indexer,]') 56 | -------------------------------------------------------------------------------- /bench/bench_with_subset.R: -------------------------------------------------------------------------------- 1 | library(microbenchmark) 2 | library(data.table) 3 | 4 | 5 | data.frame.subset.bench <- function (n=1e7, times=30) { 6 | df <- data.frame(a=rnorm(n), b=rnorm(n), c=rnorm(n)) 7 | print(microbenchmark(subset(df, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c), 8 | times=times)) 9 | } 10 | 11 | 12 | # data.table allows something very similar to query with an expression 13 | # but we have chained comparisons AND we're faster BOO YAH! 14 | data.table.subset.expression.bench <- function (n=1e7, times=30) { 15 | dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n)) 16 | print(microbenchmark(dt[, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c], 17 | times=times)) 18 | } 19 | 20 | 21 | # compare against subset with data.table for good measure 22 | data.table.subset.bench <- function (n=1e7, times=30) { 23 | dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n)) 24 | print(microbenchmark(subset(dt, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c), 25 | times=times)) 26 | } 27 | 28 | 29 | data.frame.with.bench <- function (n=1e7, times=30) { 30 | df <- data.frame(a=rnorm(n), b=rnorm(n), c=rnorm(n)) 31 | 32 | print(microbenchmark(with(df, a + b * (c ^ 2 + b ^ 2 - a) / (a * c) ^ 3), 33 | times=times)) 34 | } 35 | 36 | 37 | data.table.with.bench <- function (n=1e7, times=30) { 38 | dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n)) 39 | print(microbenchmark(with(dt, a + b * (c ^ 2 + b ^ 2 - a) / (a * c) ^ 3), 40 | times=times)) 41 | } 42 | 43 | 44 | bench <- function () { 45 | data.frame.subset.bench() 46 | data.table.subset.expression.bench() 47 | data.table.subset.bench() 48 | data.frame.with.bench() 49 | data.table.with.bench() 50 | } 51 | 52 | 53 | bench() 54 | -------------------------------------------------------------------------------- /bench/duplicated.R: -------------------------------------------------------------------------------- 1 | N <- 100000 2 | 3 | k1 = rep(NA, N) 4 | k2 = rep(NA, N) 5 | for (i in 1:N){ 6 | k1[i] <- paste(sample(letters, 1), collapse="") 7 | k2[i] <- paste(sample(letters, 1), collapse="") 8 | } 9 | df <- data.frame(a=k1, b=k2, c=rep(1:100, N / 100)) 10 | df2 <- data.frame(a=k1, b=k2) 11 | 12 | timings <- numeric() 13 | timings2 <- numeric() 14 | for (i in 1:50) { 15 | gc() 16 | timings[i] = system.time(deduped <- df[!duplicated(df),])[3] 17 | gc() 18 | timings2[i] = system.time(deduped <- df[!duplicated(df[,c("a", "b")]),])[3] 19 | } 20 | 21 | mean(timings) 22 | mean(timings2) 23 | -------------------------------------------------------------------------------- /bench/larry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/bench/larry.py -------------------------------------------------------------------------------- /bench/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import itertools 3 | import collections 4 | import scipy.ndimage as ndi 5 | from pandas.compat import zip, range 6 | 7 | N = 10000 8 | 9 | lat = np.random.randint(0, 360, N) 10 | lon = np.random.randint(0, 360, N) 11 | data = np.random.randn(N) 12 | 13 | 14 | def groupby1(lat, lon, data): 15 | indexer = np.lexsort((lon, lat)) 16 | lat = lat.take(indexer) 17 | lon = lon.take(indexer) 18 | sorted_data = data.take(indexer) 19 | 20 | keys = 1000. * lat + lon 21 | unique_keys = np.unique(keys) 22 | bounds = keys.searchsorted(unique_keys) 23 | 24 | result = group_agg(sorted_data, bounds, lambda x: x.mean()) 25 | 26 | decoder = keys.searchsorted(unique_keys) 27 | 28 | return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result)) 29 | 30 | 31 | def group_mean(lat, lon, data): 32 | indexer = np.lexsort((lon, lat)) 33 | lat = lat.take(indexer) 34 | lon = lon.take(indexer) 35 | sorted_data = data.take(indexer) 36 | 37 | keys = 1000 * lat + lon 38 | unique_keys = np.unique(keys) 39 | 40 | result = ndi.mean(sorted_data, labels=keys, index=unique_keys) 41 | decoder = keys.searchsorted(unique_keys) 42 | 43 | return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result)) 44 | 45 | 46 | def group_mean_naive(lat, lon, data): 47 | grouped = collections.defaultdict(list) 48 | for lt, ln, da in zip(lat, lon, data): 49 | grouped[(lt, ln)].append(da) 50 | 51 | averaged = dict((ltln, np.mean(da)) for ltln, da in grouped.items()) 52 | 53 | return averaged 54 | 55 | 56 | def group_agg(values, bounds, f): 57 | N = len(values) 58 | result = np.empty(len(bounds), dtype=float) 59 | for i, left_bound in enumerate(bounds): 60 | if i == len(bounds) - 1: 61 | right_bound = N 62 | else: 63 | right_bound = bounds[i + 1] 64 | 65 | result[i] = f(values[left_bound: right_bound]) 66 | 67 | return result 68 | 69 | # for i in range(10): 70 | # groupby1(lat, lon, data) 71 | -------------------------------------------------------------------------------- /bench/zoo_bench.R: -------------------------------------------------------------------------------- 1 | library(zoo) 2 | library(xts) 3 | library(fts) 4 | library(tseries) 5 | library(its) 6 | library(xtable) 7 | 8 | ## indices = rep(NA, 100000) 9 | ## for (i in 1:100000) 10 | ## indices[i] <- paste(sample(letters, 10), collapse="") 11 | 12 | 13 | 14 | ## x <- zoo(rnorm(100000), indices) 15 | ## y <- zoo(rnorm(90000), indices[sample(1:100000, 90000)]) 16 | 17 | ## indices <- as.POSIXct(1:100000) 18 | 19 | indices <- as.POSIXct(Sys.Date()) + seq(1, 100000000, 100) 20 | 21 | sz <- 500000 22 | 23 | ## x <- xts(rnorm(sz), sample(indices, sz)) 24 | ## y <- xts(rnorm(sz), sample(indices, sz)) 25 | 26 | zoo.bench <- function(){ 27 | x <- zoo(rnorm(sz), sample(indices, sz)) 28 | y <- zoo(rnorm(sz), sample(indices, sz)) 29 | timeit(function() {x + y}) 30 | } 31 | 32 | xts.bench <- function(){ 33 | x <- xts(rnorm(sz), sample(indices, sz)) 34 | y <- xts(rnorm(sz), sample(indices, sz)) 35 | timeit(function() {x + y}) 36 | } 37 | 38 | fts.bench <- function(){ 39 | x <- fts(rnorm(sz), sort(sample(indices, sz))) 40 | y <- fts(rnorm(sz), sort(sample(indices, sz)) 41 | timeit(function() {x + y}) 42 | } 43 | 44 | its.bench <- function(){ 45 | x <- its(rnorm(sz), sort(sample(indices, sz))) 46 | y <- its(rnorm(sz), sort(sample(indices, sz))) 47 | timeit(function() {x + y}) 48 | } 49 | 50 | irts.bench <- function(){ 51 | x <- irts(sort(sample(indices, sz)), rnorm(sz)) 52 | y <- irts(sort(sample(indices, sz)), rnorm(sz)) 53 | timeit(function() {x + y}) 54 | } 55 | 56 | timeit <- function(f){ 57 | timings <- numeric() 58 | for (i in 1:10) { 59 | gc() 60 | timings[i] = system.time(f())[3] 61 | } 62 | mean(timings) 63 | } 64 | 65 | bench <- function(){ 66 | results <- c(xts.bench(), fts.bench(), its.bench(), zoo.bench()) 67 | names <- c("xts", "fts", "its", "zoo") 68 | data.frame(results, names) 69 | } 70 | 71 | result <- bench() 72 | -------------------------------------------------------------------------------- /bench/zoo_bench.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | from pandas.util.testing import rands 3 | 4 | n = 1000000 5 | # indices = Index([rands(10) for _ in xrange(n)]) 6 | 7 | 8 | def sample(values, k): 9 | sampler = np.random.permutation(len(values)) 10 | return values.take(sampler[:k]) 11 | sz = 500000 12 | rng = np.arange(0, 10000000000000, 10000000) 13 | stamps = np.datetime64(datetime.now()).view('i8') + rng 14 | idx1 = np.sort(sample(stamps, sz)) 15 | idx2 = np.sort(sample(stamps, sz)) 16 | ts1 = Series(np.random.randn(sz), idx1) 17 | ts2 = Series(np.random.randn(sz), idx2) 18 | 19 | 20 | # subsample_size = 90000 21 | 22 | # x = Series(np.random.randn(100000), indices) 23 | # y = Series(np.random.randn(subsample_size), 24 | # index=sample(indices, subsample_size)) 25 | 26 | 27 | # lx = larry(np.random.randn(100000), [list(indices)]) 28 | # ly = larry(np.random.randn(subsample_size), [list(y.index)]) 29 | 30 | # Benchmark 1: Two 1-million length time series (int64-based index) with 31 | # randomly chosen timestamps 32 | 33 | # Benchmark 2: Join two 5-variate time series DataFrames (outer and inner join) 34 | 35 | # df1 = DataFrame(np.random.randn(1000000, 5), idx1, columns=range(5)) 36 | # df2 = DataFrame(np.random.randn(1000000, 5), idx2, columns=range(5, 10)) 37 | -------------------------------------------------------------------------------- /ci/README.txt: -------------------------------------------------------------------------------- 1 | Travis is a ci service that's well-integrated with GitHub. 2 | The following types of breakage should be detected 3 | by Travis builds: 4 | 5 | 1) Failing tests on any supported version of Python. 6 | 2) Pandas should install and the tests should run if no optional deps are installed. 7 | That also means tests which rely on optional deps need to raise SkipTest() 8 | if the dep is missing. 9 | 3) unicode related fails when running under exotic locales. 10 | 11 | We tried running the vbench suite for a while, but with varying load 12 | on Travis machines, that wasn't useful. 13 | 14 | Travis currently (4/2013) has a 5-job concurrency limit. Exceeding it 15 | basically doubles the total runtime for a commit through travis, and 16 | since dep+pandas installation is already quite long, this should become 17 | a hard limit on concurrent travis runs. 18 | -------------------------------------------------------------------------------- /ci/after_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #wget https://raw.github.com/y-p/ScatterCI-CLI/master/scatter_cli.py 4 | #chmod u+x scatter_cli.py 5 | 6 | pip install -I requests==2.1.0 7 | echo "${TRAVIS_PYTHON_VERSION:0:4}" 8 | if [ x"${TRAVIS_PYTHON_VERSION:0:4}" == x"2.6" ]; then 9 | pip install simplejson; 10 | fi 11 | 12 | # ScatterCI accepts a build log, but currently does nothing with it. 13 | echo '' > /tmp/build.log 14 | 15 | # nore exposed in the build logs 16 | #export SCATTERCI_ACCESS_KEY= 17 | #export SCATTERCI_HOST= 18 | 19 | # Generate a json file describing system and dep versions 20 | ci/print_versions.py -j /tmp/env.json 21 | 22 | # nose ran using "--with-xunit --xunit-file nosetest.xml" and generated /tmp/nosetest.xml 23 | # Will timeout if server not available, and should not fail the build 24 | #python scatter_cli.py --xunit-file /tmp/nosetests.xml --log-file /tmp/build.log --env-file /tmp/env.json --build-name "$JOB_NAME" --succeed 25 | 26 | true # never fail because bad things happened here 27 | -------------------------------------------------------------------------------- /ci/before_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # If envars.sh determined we're running in an authorized fork 4 | # and the user opted in to the network cache,and that cached versions 5 | # are available on the cache server, download and deploy the cached 6 | # files to the local filesystem 7 | 8 | echo "inside $0" 9 | 10 | # overview 11 | sudo apt-get update $APT_ARGS # run apt-get update for all versions 12 | 13 | true # never fail because bad things happened here 14 | -------------------------------------------------------------------------------- /ci/build_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | cd "$TRAVIS_BUILD_DIR" 5 | 6 | git show --pretty="format:" --name-only HEAD~5.. --first-parent | grep -P "rst|txt|doc" 7 | 8 | if [ "$?" != "0" ]; then 9 | echo "Skipping doc build, none were modified" 10 | # nope, skip docs build 11 | exit 0 12 | fi 13 | 14 | 15 | if [ x"$DOC_BUILD" != x"" ]; then 16 | # we're running network tests, let's build the docs in the meantim 17 | echo "Will build docs" 18 | pip install sphinx==1.1.3 ipython==1.1.0 19 | 20 | mv "$TRAVIS_BUILD_DIR"/doc /tmp 21 | cd /tmp/doc 22 | 23 | rm /tmp/doc/source/api.rst # no R 24 | rm /tmp/doc/source/r_interface.rst # no R 25 | 26 | echo ############################### > /tmp/doc.log 27 | echo # Log file for the doc build # > /tmp/doc.log 28 | echo ############################### > /tmp/doc.log 29 | echo "" > /tmp/doc.log 30 | echo -e "y\n" | ./make.py --no-api 2>&1 31 | 32 | cd /tmp/doc/build/html 33 | git config --global user.email "pandas-docs-bot@localhost.foo" 34 | git config --global user.name "pandas-docs-bot" 35 | 36 | git init 37 | touch README 38 | git add README 39 | git commit -m "Initial commit" --allow-empty 40 | git branch gh-pages 41 | git checkout gh-pages 42 | touch .nojekyll 43 | git add --all . 44 | git commit -m "Version" --allow-empty 45 | git remote add origin https://$GH_TOKEN@github.com/pandas-docs/pandas-docs-travis 46 | git push origin gh-pages -f 47 | fi 48 | 49 | exit 0 50 | -------------------------------------------------------------------------------- /ci/ironcache/get.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | import re 6 | import os 7 | import time 8 | import json 9 | import base64 10 | from hashlib import sha1 11 | from iron_cache import * 12 | import traceback as tb 13 | 14 | key='KEY.%s.%s' %(os.environ.get('TRAVIS_REPO_SLUG','unk'), 15 | os.environ.get('JOB_NAME','unk')) 16 | print(key) 17 | 18 | if sys.version_info[0] > 2: 19 | key = bytes(key,encoding='utf8') 20 | 21 | key = sha1(key).hexdigest()[:8]+'.' 22 | 23 | b = b'' 24 | cache = IronCache() 25 | for i in range(20): 26 | print("getting %s" % key+str(i)) 27 | try: 28 | item = cache.get(cache="travis", key=key+str(i)) 29 | v = item.value 30 | if sys.version_info[0] > 2: 31 | v = bytes(v,encoding='utf8') 32 | b += bytes(base64.b64decode(v)) 33 | except Exception as e: 34 | try: 35 | print(tb.format_exc(e)) 36 | except: 37 | print("exception during exception, oh my") 38 | break 39 | 40 | with open(os.path.join(os.environ.get('HOME',''),"ccache.7z"),'wb') as f: 41 | f.write(b) 42 | -------------------------------------------------------------------------------- /ci/ironcache/put.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | import re 6 | import os 7 | import time 8 | import json 9 | import base64 10 | from hashlib import sha1 11 | from iron_cache import * 12 | 13 | key='KEY.%s.%s' %(os.environ.get('TRAVIS_REPO_SLUG','unk'), 14 | os.environ.get('JOB_NAME','unk')) 15 | 16 | key='KEY.%s.%s' %(os.environ.get('TRAVIS_REPO_SLUG','unk'), 17 | os.environ.get('JOB_NAME','unk')) 18 | print(key) 19 | 20 | if sys.version_info[0] > 2: 21 | key = bytes(key,encoding='utf8') 22 | 23 | key = sha1(key).hexdigest()[:8]+'.' 24 | 25 | os.chdir(os.environ.get('HOME')) 26 | 27 | cache = IronCache() 28 | 29 | i=0 30 | 31 | for i, fname in enumerate(sorted([x for x in os.listdir('.') if re.match("ccache.\d+$",x)])): 32 | print("Putting %s" % key+str(i)) 33 | with open(fname,"rb") as f: 34 | s= f.read() 35 | value=base64.b64encode(s) 36 | if isinstance(value, bytes): 37 | value = value.decode('ascii') 38 | item = cache.put(cache="travis", key=key+str(i), value=value,options=dict(expires_in=24*60*60)) 39 | 40 | # print("foo") 41 | for i in range(i+1,20): 42 | 43 | try: 44 | item = cache.delete(key+str(i),cache='travis') 45 | print("Deleted %s" % key+str(i)) 46 | except: 47 | break 48 | pass 49 | -------------------------------------------------------------------------------- /ci/prep_ccache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$IRON_TOKEN" ]; then 4 | 5 | home_dir=$(pwd) 6 | 7 | # install the compiler cache 8 | sudo apt-get $APT_ARGS install ccache p7zip-full 9 | # iron_cache, pending py3 fixes upstream 10 | pip install -I --allow-external --allow-insecure git+https://github.com/iron-io/iron_cache_python.git@8a451c7d7e4d16e0c3bedffd0f280d5d9bd4fe59#egg=iron_cache 11 | 12 | python ci/ironcache/get.py 13 | ccache -C 14 | 15 | clear_cache=0 16 | if [ -f ~/ccache.7z ]; then 17 | echo "Cache retrieved" 18 | clear_cache=1 19 | cd $HOME 20 | 7za e $HOME/ccache.7z 21 | # ls -l $HOME 22 | cd / 23 | tar xvf $HOME/ccache 24 | rm -rf $HOME/ccache.7z 25 | rm -rf $HOME/ccache 26 | 27 | fi 28 | 29 | # did the last commit change cython files? 30 | cd $home_dir 31 | 32 | retval=$(git diff HEAD~3 --numstat | grep -P "pyx|pxd"|wc -l) 33 | echo "number of cython files changed: $retval" 34 | 35 | if [ $clear_cache -eq 1 ] && [ $retval -eq 0 ] 36 | then 37 | # nope, reuse cython files 38 | echo "Will reuse cached cython file" 39 | touch "$TRAVIS_BUILD_DIR"/pandas/*.c 40 | touch "$TRAVIS_BUILD_DIR"/pandas/src/*.c 41 | touch "$TRAVIS_BUILD_DIR"/pandas/*.cpp 42 | else 43 | echo "Rebuilding cythonized files" 44 | fi 45 | fi 46 | 47 | exit 0 48 | -------------------------------------------------------------------------------- /ci/print_skipped.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import math 5 | import xml.etree.ElementTree as et 6 | 7 | 8 | def parse_results(filename): 9 | tree = et.parse(filename) 10 | root = tree.getroot() 11 | skipped = [] 12 | 13 | current_class = old_class = '' 14 | i = 1 15 | assert i - 1 == len(skipped) 16 | for el in root.findall('testcase'): 17 | cn = el.attrib['classname'] 18 | for sk in el.findall('skipped'): 19 | old_class = current_class 20 | current_class = cn 21 | name = '{classname}.{name}'.format(classname=current_class, 22 | name=el.attrib['name']) 23 | msg = sk.attrib['message'] 24 | out = '' 25 | if old_class != current_class: 26 | ndigits = int(math.log(i, 10) + 1) 27 | out += ('-' * (len(name + msg) + 4 + ndigits) + '\n') # 4 for : + space + # + space 28 | out += '#{i} {name}: {msg}'.format(i=i, name=name, msg=msg) 29 | skipped.append(out) 30 | i += 1 31 | assert i - 1 == len(skipped) 32 | assert i - 1 == len(skipped) 33 | assert len(skipped) == int(root.attrib['skip']) 34 | return '\n'.join(skipped) 35 | 36 | 37 | def main(args): 38 | print('SKIPPED TESTS:') 39 | print(parse_results(args.filename)) 40 | return 0 41 | 42 | 43 | def parse_args(): 44 | import argparse 45 | parser = argparse.ArgumentParser() 46 | parser.add_argument('filename', help='XUnit file to parse') 47 | return parser.parse_args() 48 | 49 | 50 | if __name__ == '__main__': 51 | sys.exit(main(parse_args())) 52 | -------------------------------------------------------------------------------- /ci/print_versions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | def show_versions(as_json=False): 5 | import imp 6 | import os 7 | fn = __file__ 8 | this_dir = os.path.dirname(fn) 9 | pandas_dir = os.path.abspath(os.path.join(this_dir, "..")) 10 | sv_path = os.path.join(pandas_dir, 'pandas', 'util') 11 | mod = imp.load_module( 12 | 'pvmod', *imp.find_module('print_versions', [sv_path])) 13 | return mod.show_versions(as_json) 14 | 15 | 16 | if __name__ == '__main__': 17 | # optparse is 2.6-safe 18 | from optparse import OptionParser 19 | parser = OptionParser() 20 | parser.add_option("-j", "--json", metavar="FILE", nargs=1, 21 | help="Save output as JSON into file, pass in '-' to output to stdout") 22 | 23 | (options, args) = parser.parse_args() 24 | 25 | if options.json == "-": 26 | options.json = True 27 | 28 | show_versions(as_json=options.json) 29 | -------------------------------------------------------------------------------- /ci/requirements-2.6.txt: -------------------------------------------------------------------------------- 1 | numpy==1.6.1 2 | cython==0.19.1 3 | python-dateutil==1.5 4 | pytz==2013b 5 | http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz 6 | html5lib==1.0b2 7 | numexpr==1.4.2 8 | sqlalchemy==0.7.1 9 | pymysql==0.6.0 10 | psycopg2==2.5 11 | scipy==0.11.0 12 | statsmodels==0.4.3 13 | xlwt==0.7.5 14 | openpyxl==2.0.3 15 | xlsxwriter==0.4.6 16 | xlrd==0.9.2 17 | -------------------------------------------------------------------------------- /ci/requirements-2.7.txt: -------------------------------------------------------------------------------- 1 | python-dateutil==2.1 2 | pytz==2013b 3 | xlwt==0.7.5 4 | numpy==1.8.1 5 | cython==0.19.1 6 | bottleneck==0.6.0 7 | numexpr==2.2.2 8 | tables==2.3.1 9 | matplotlib==1.3.1 10 | openpyxl==1.6.2 11 | xlsxwriter==0.4.6 12 | xlrd==0.9.2 13 | patsy==0.1.0 14 | sqlalchemy==0.9.6 15 | pymysql==0.6.1 16 | psycopg2==2.5.2 17 | html5lib==1.0b2 18 | lxml==3.2.1 19 | scipy==0.13.3 20 | beautifulsoup4==4.2.1 21 | statsmodels==0.5.0 22 | boto==2.26.1 23 | httplib2==0.8 24 | python-gflags==2.0 25 | google-api-python-client==1.2 26 | -------------------------------------------------------------------------------- /ci/requirements-2.7_LOCALE.txt: -------------------------------------------------------------------------------- 1 | python-dateutil 2 | pytz==2013b 3 | xlwt==0.7.5 4 | openpyxl==1.6.2 5 | xlsxwriter==0.4.6 6 | xlrd==0.9.2 7 | numpy==1.6.1 8 | cython==0.19.1 9 | bottleneck==0.6.0 10 | matplotlib==1.3.0 11 | patsy==0.1.0 12 | sqlalchemy==0.8.1 13 | html5lib==1.0b2 14 | lxml==3.2.1 15 | scipy==0.10.0 16 | beautifulsoup4==4.2.1 17 | statsmodels==0.4.3 18 | bigquery==2.0.17 19 | -------------------------------------------------------------------------------- /ci/requirements-2.7_NUMPY_DEV_1_8_x.txt: -------------------------------------------------------------------------------- 1 | python-dateutil 2 | pytz==2013b 3 | cython==0.19.1 4 | -------------------------------------------------------------------------------- /ci/requirements-2.7_NUMPY_DEV_master.txt: -------------------------------------------------------------------------------- 1 | python-dateutil 2 | pytz 3 | cython==0.19.1 4 | -------------------------------------------------------------------------------- /ci/requirements-3.2.txt: -------------------------------------------------------------------------------- 1 | python-dateutil==2.1 2 | pytz==2013b 3 | xlsxwriter==0.4.6 4 | xlrd==0.9.2 5 | numpy==1.7.1 6 | cython==0.19.1 7 | numexpr==2.1 8 | tables==3.0.0 9 | matplotlib==1.2.1 10 | patsy==0.1.0 11 | lxml==3.2.1 12 | scipy==0.12.0 13 | beautifulsoup4==4.2.1 14 | statsmodels==0.5.0 15 | -------------------------------------------------------------------------------- /ci/requirements-3.3.txt: -------------------------------------------------------------------------------- 1 | python-dateutil==2.2 2 | pytz==2013b 3 | openpyxl==1.6.2 4 | xlsxwriter==0.4.6 5 | xlrd==0.9.2 6 | html5lib==1.0b2 7 | numpy==1.8.0 8 | cython==0.19.1 9 | numexpr==2.3 10 | tables==3.1.0 11 | bottleneck==0.8.0 12 | matplotlib==1.2.1 13 | patsy==0.1.0 14 | lxml==3.2.1 15 | scipy==0.13.3 16 | beautifulsoup4==4.2.1 17 | statsmodels==0.5.0 18 | -------------------------------------------------------------------------------- /ci/requirements-3.4.txt: -------------------------------------------------------------------------------- 1 | python-dateutil 2 | pytz 3 | openpyxl 4 | xlsxwriter 5 | xlrd 6 | html5lib 7 | numpy==1.8.0 8 | cython==0.20.2 9 | scipy==0.13.3 10 | numexpr==2.4 11 | tables==3.1.0 12 | bottleneck==0.8.0 13 | matplotlib==1.3.1 14 | patsy 15 | lxml==3.3.5 16 | sqlalchemy==0.9.6 17 | pymysql==0.6.1 18 | psycopg2==2.5.2 19 | beautifulsoup4 20 | -------------------------------------------------------------------------------- /ci/script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "inside $0" 4 | 5 | if [ -n "$LOCALE_OVERRIDE" ]; then 6 | export LC_ALL="$LOCALE_OVERRIDE"; 7 | echo "Setting LC_ALL to $LOCALE_OVERRIDE" 8 | curdir="$(pwd)" 9 | cd /tmp 10 | pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))' 11 | python -c "$pycmd" 12 | cd "$curdir" 13 | fi 14 | 15 | # conditionally build and upload docs to GH/pandas-docs/pandas-docs/travis 16 | "$TRAVIS_BUILD_DIR"/ci/build_docs.sh 2>&1 > /tmp/doc.log & 17 | # doc build log will be shown after tests 18 | 19 | echo nosetests --exe -w /tmp -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml 20 | nosetests --exe -w /tmp -A "$NOSE_ARGS" pandas --with-xunit --xunit-file=/tmp/nosetests.xml 21 | 22 | RET="$?" 23 | 24 | # wait until subprocesses finish (build_docs.sh) 25 | wait 26 | 27 | exit "$RET" 28 | -------------------------------------------------------------------------------- /ci/speedpack/Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | Vagrant.configure("2") do |config| 4 | config.vm.box = "precise64" 5 | config.vm.box_url = "http://files.vagrantup.com/precise64.box" 6 | 7 | # config.vbguest.auto_update = true 8 | # config.vbguest.no_remote = true 9 | 10 | config.vm.synced_folder File.expand_path("..", Dir.pwd), "/reqf" 11 | config.vm.synced_folder "wheelhouse", "/wheelhouse" 12 | 13 | config.vm.provider :virtualbox do |vb| 14 | vb.customize ["modifyvm", :id, "--cpus", "4"] 15 | vb.customize ["modifyvm", :id, "--memory", "2048"] 16 | vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"] 17 | vb.customize ["modifyvm", :id, "--natdnsproxy1", "on"] 18 | end 19 | 20 | config.vm.provision :shell, :path => "build.sh" 21 | 22 | end 23 | -------------------------------------------------------------------------------- /ci/speedpack/nginx/nginx.conf.template: -------------------------------------------------------------------------------- 1 | #user nobody; 2 | worker_processes 1; 3 | 4 | #error_log logs/error.log; 5 | #error_log logs/error.log notice; 6 | #error_log logs/error.log info; 7 | 8 | #pid logs/nginx.pid; 9 | 10 | 11 | events { 12 | worker_connections 1024; 13 | } 14 | 15 | 16 | http { 17 | include mime.types; 18 | default_type application/octet-stream; 19 | 20 | #log_format main '$remote_addr - $remote_user [$time_local] "$request" ' 21 | # '$status $body_bytes_sent "$http_referer" ' 22 | # '"$http_user_agent" "$http_x_forwarded_for"'; 23 | 24 | #access_log logs/access.log on; 25 | 26 | sendfile on; 27 | #tcp_nopush on; 28 | 29 | #keepalive_timeout 0; 30 | keepalive_timeout 65; 31 | 32 | #gzip on; 33 | 34 | server { 35 | listen $OPENSHIFT_IP:$OPENSHIFT_PORT; 36 | 37 | access_log access.log ; 38 | sendfile on; 39 | 40 | location / { 41 | root ../../app-root/data/store/; 42 | autoindex on; 43 | } 44 | 45 | 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /ci/submit_ccache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | home_dir=$(pwd) 4 | ccache -s 5 | 6 | MISSES=$(ccache -s | grep "cache miss" | grep -Po "\d+") 7 | echo "MISSES: $MISSES" 8 | 9 | if [ x"$MISSES" == x"0" ]; then 10 | echo "No cache misses detected, skipping upload" 11 | exit 0 12 | fi 13 | 14 | if [ "$IRON_TOKEN" ]; then 15 | 16 | rm -rf $HOME/ccache.7z 17 | 18 | tar cf - $HOME/.ccache \ 19 | "$TRAVIS_BUILD_DIR"/pandas/{index,algos,lib,tslib,parser,hashtable}.c \ 20 | "$TRAVIS_BUILD_DIR"/pandas/src/{sparse,testing}.c \ 21 | "$TRAVIS_BUILD_DIR"/pandas/msgpack.cpp \ 22 | | 7za a -si $HOME/ccache.7z 23 | 24 | split -b 500000 -d $HOME/ccache.7z $HOME/ccache. 25 | 26 | python ci/ironcache/put.py 27 | fi; 28 | 29 | exit 0 30 | -------------------------------------------------------------------------------- /doc/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | {% extends "!autosummary/class.rst" %} 2 | 3 | {% block methods %} 4 | {% if methods %} 5 | 6 | .. 7 | HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages. 8 | .. autosummary:: 9 | :toctree: 10 | {% for item in all_methods %} 11 | {%- if not item.startswith('_') or item in ['__call__'] %} 12 | {{ name }}.{{ item }} 13 | {%- endif -%} 14 | {%- endfor %} 15 | 16 | {% endif %} 17 | {% endblock %} 18 | 19 | {% block attributes %} 20 | {% if attributes %} 21 | 22 | .. 23 | HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages. 24 | .. autosummary:: 25 | :toctree: 26 | {% for item in all_attributes %} 27 | {%- if not item.startswith('_') %} 28 | {{ name }}.{{ item }} 29 | {%- endif -%} 30 | {%- endfor %} 31 | 32 | {% endif %} 33 | {% endblock %} 34 | -------------------------------------------------------------------------------- /doc/data/fx_prices: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/data/fx_prices -------------------------------------------------------------------------------- /doc/data/mindex_ex.csv: -------------------------------------------------------------------------------- 1 | year,indiv,zit,xit 2 | 1977,"A",1.2,.6 3 | 1977,"B",1.5,.5 4 | 1977,"C",1.7,.8 5 | 1978,"A",.2,.06 6 | 1978,"B",.7,.2 7 | 1978,"C",.8,.3 8 | 1978,"D",.9,.5 9 | 1978,"E",1.4,.9 10 | 1979,"C",.2,.15 11 | 1979,"D",.14,.05 12 | 1979,"E",.5,.15 13 | 1979,"F",1.2,.5 14 | 1979,"G",3.4,1.9 15 | 1979,"H",5.4,2.7 16 | 1979,"I",6.4,1.2 17 | -------------------------------------------------------------------------------- /doc/data/test.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/data/test.xls -------------------------------------------------------------------------------- /doc/plots/stats/moment_plots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import matplotlib.pyplot as plt 4 | import pandas.util.testing as t 5 | import pandas.stats.moments as m 6 | 7 | 8 | def test_series(n=1000): 9 | t.N = n 10 | s = t.makeTimeSeries() 11 | return s 12 | 13 | 14 | def plot_timeseries(*args, **kwds): 15 | n = len(args) 16 | 17 | fig, axes = plt.subplots(n, 1, figsize=kwds.get('size', (10, 5)), 18 | sharex=True) 19 | titles = kwds.get('titles', None) 20 | 21 | for k in range(1, n + 1): 22 | ax = axes[k - 1] 23 | ts = args[k - 1] 24 | ax.plot(ts.index, ts.values) 25 | 26 | if titles: 27 | ax.set_title(titles[k - 1]) 28 | 29 | fig.autofmt_xdate() 30 | fig.subplots_adjust(bottom=0.10, top=0.95) 31 | -------------------------------------------------------------------------------- /doc/plots/stats/moments_ewma.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas.util.testing as t 3 | import pandas.stats.moments as m 4 | 5 | t.N = 200 6 | s = t.makeTimeSeries().cumsum() 7 | 8 | plt.figure(figsize=(10, 5)) 9 | plt.plot(s.index, s.values) 10 | plt.plot(s.index, m.ewma(s, 20, min_periods=1).values) 11 | f = plt.gcf() 12 | f.autofmt_xdate() 13 | 14 | plt.show() 15 | plt.close('all') 16 | -------------------------------------------------------------------------------- /doc/plots/stats/moments_ewmvol.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas.util.testing as t 3 | import pandas.stats.moments as m 4 | 5 | t.N = 500 6 | ts = t.makeTimeSeries() 7 | ts[::100] = 20 8 | 9 | s = ts.cumsum() 10 | 11 | 12 | plt.figure(figsize=(10, 5)) 13 | plt.plot(s.index, m.ewmvol(s, span=50, min_periods=1).values, color='b') 14 | plt.plot(s.index, m.rolling_std(s, 50, min_periods=1).values, color='r') 15 | 16 | plt.title('Exp-weighted std with shocks') 17 | plt.legend(('Exp-weighted', 'Equal-weighted')) 18 | 19 | f = plt.gcf() 20 | f.autofmt_xdate() 21 | 22 | plt.show() 23 | plt.close('all') 24 | -------------------------------------------------------------------------------- /doc/plots/stats/moments_expw.py: -------------------------------------------------------------------------------- 1 | from moment_plots import * 2 | 3 | np.random.seed(1) 4 | 5 | ts = test_series(500) * 10 6 | 7 | # ts[::100] = 20 8 | 9 | s = ts.cumsum() 10 | 11 | fig, axes = plt.subplots(3, 1, figsize=(8, 10), sharex=True) 12 | 13 | ax0, ax1, ax2 = axes 14 | 15 | ax0.plot(s.index, s.values) 16 | ax0.set_title('time series') 17 | 18 | ax1.plot(s.index, m.ewma(s, span=50, min_periods=1).values, color='b') 19 | ax1.plot(s.index, m.rolling_mean(s, 50, min_periods=1).values, color='r') 20 | ax1.set_title('rolling_mean vs. ewma') 21 | 22 | line1 = ax2.plot( 23 | s.index, m.ewmstd(s, span=50, min_periods=1).values, color='b') 24 | line2 = ax2.plot( 25 | s.index, m.rolling_std(s, 50, min_periods=1).values, color='r') 26 | ax2.set_title('rolling_std vs. ewmstd') 27 | 28 | fig.legend((line1, line2), 29 | ('Exp-weighted', 'Equal-weighted'), 30 | loc='upper right') 31 | fig.autofmt_xdate() 32 | fig.subplots_adjust(bottom=0.10, top=0.95) 33 | 34 | plt.show() 35 | plt.close('all') 36 | -------------------------------------------------------------------------------- /doc/plots/stats/moments_rolling.py: -------------------------------------------------------------------------------- 1 | from moment_plots import * 2 | 3 | ts = test_series() 4 | s = ts.cumsum() 5 | 6 | s[20:50] = np.NaN 7 | s[120:150] = np.NaN 8 | plot_timeseries(s, 9 | m.rolling_count(s, 50), 10 | m.rolling_sum(s, 50, min_periods=10), 11 | m.rolling_mean(s, 50, min_periods=10), 12 | m.rolling_std(s, 50, min_periods=10), 13 | m.rolling_skew(s, 50, min_periods=10), 14 | m.rolling_kurt(s, 50, min_periods=10), 15 | size=(10, 12), 16 | titles=('time series', 17 | 'rolling_count', 18 | 'rolling_sum', 19 | 'rolling_mean', 20 | 'rolling_std', 21 | 'rolling_skew', 22 | 'rolling_kurt')) 23 | plt.show() 24 | plt.close('all') 25 | -------------------------------------------------------------------------------- /doc/plots/stats/moments_rolling_binary.py: -------------------------------------------------------------------------------- 1 | from moment_plots import * 2 | 3 | np.random.seed(1) 4 | 5 | ts = test_series() 6 | s = ts.cumsum() 7 | ts2 = test_series() 8 | s2 = ts2.cumsum() 9 | 10 | s[20:50] = np.NaN 11 | s[120:150] = np.NaN 12 | fig, axes = plt.subplots(3, 1, figsize=(8, 10), sharex=True) 13 | 14 | ax0, ax1, ax2 = axes 15 | 16 | ax0.plot(s.index, s.values) 17 | ax0.plot(s2.index, s2.values) 18 | ax0.set_title('time series') 19 | 20 | ax1.plot(s.index, m.rolling_corr(s, s2, 50, min_periods=1).values) 21 | ax1.set_title('rolling_corr') 22 | 23 | ax2.plot(s.index, m.rolling_cov(s, s2, 50, min_periods=1).values) 24 | ax2.set_title('rolling_cov') 25 | 26 | fig.autofmt_xdate() 27 | fig.subplots_adjust(bottom=0.10, top=0.95) 28 | 29 | plt.show() 30 | plt.close('all') 31 | -------------------------------------------------------------------------------- /doc/source/_static/df_repr_truncated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/df_repr_truncated.png -------------------------------------------------------------------------------- /doc/source/_static/eval-perf-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/eval-perf-small.png -------------------------------------------------------------------------------- /doc/source/_static/eval-perf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/eval-perf.png -------------------------------------------------------------------------------- /doc/source/_static/legacy_0.10.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/legacy_0.10.h5 -------------------------------------------------------------------------------- /doc/source/_static/query-perf-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/query-perf-small.png -------------------------------------------------------------------------------- /doc/source/_static/query-perf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/query-perf.png -------------------------------------------------------------------------------- /doc/source/_static/stub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/stub -------------------------------------------------------------------------------- /doc/source/_static/trunc_after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/trunc_after.png -------------------------------------------------------------------------------- /doc/source/_static/trunc_before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/source/_static/trunc_before.png -------------------------------------------------------------------------------- /doc/source/contributing.rst: -------------------------------------------------------------------------------- 1 | .. _contributing: 2 | 3 | ********************** 4 | Contributing to pandas 5 | ********************** 6 | 7 | See the following links: 8 | 9 | - `The developer pages on the website 10 | `_ 11 | - `Guidelines on bug reports and pull requests 12 | `_ 13 | - `Some extra tips on using git 14 | `_ 15 | 16 | .. include:: ../README.rst 17 | -------------------------------------------------------------------------------- /doc/source/themes/nature_with_gtoc/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | stylesheet = nature.css 4 | pygments_style = tango 5 | -------------------------------------------------------------------------------- /doc/source/v0.7.1.txt: -------------------------------------------------------------------------------- 1 | .. _whatsnew_0701: 2 | 3 | v.0.7.1 (February 29, 2012) 4 | --------------------------- 5 | 6 | This release includes a few new features and addresses over a dozen bugs in 7 | 0.7.0. 8 | 9 | New features 10 | ~~~~~~~~~~~~ 11 | 12 | - Add ``to_clipboard`` function to pandas namespace for writing objects to 13 | the system clipboard (:issue:`774`) 14 | - Add ``itertuples`` method to DataFrame for iterating through the rows of a 15 | dataframe as tuples (:issue:`818`) 16 | - Add ability to pass fill_value and method to DataFrame and Series align 17 | method (:issue:`806`, :issue:`807`) 18 | - Add fill_value option to reindex, align methods (:issue:`784`) 19 | - Enable concat to produce DataFrame from Series (:issue:`787`) 20 | - Add ``between`` method to Series (:issue:`802`) 21 | - Add HTML representation hook to DataFrame for the IPython HTML notebook 22 | (:issue:`773`) 23 | - Support for reading Excel 2007 XML documents using openpyxl 24 | 25 | Performance improvements 26 | ~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | - Improve performance and memory usage of fillna on DataFrame 29 | - Can concatenate a list of Series along axis=1 to obtain a DataFrame (:issue:`787`) 30 | 31 | -------------------------------------------------------------------------------- /doc/source/v0.7.2.txt: -------------------------------------------------------------------------------- 1 | .. _whatsnew_0702: 2 | 3 | v.0.7.2 (March 16, 2012) 4 | --------------------------- 5 | 6 | This release targets bugs in 0.7.1, and adds a few minor features. 7 | 8 | New features 9 | ~~~~~~~~~~~~ 10 | 11 | - Add additional tie-breaking methods in DataFrame.rank (:issue:`874`) 12 | - Add ascending parameter to rank in Series, DataFrame (:issue:`875`) 13 | - Add coerce_float option to DataFrame.from_records (:issue:`893`) 14 | - Add sort_columns parameter to allow unsorted plots (:issue:`918`) 15 | - Enable column access via attributes on GroupBy (:issue:`882`) 16 | - Can pass dict of values to DataFrame.fillna (:issue:`661`) 17 | - Can select multiple hierarchical groups by passing list of values in .ix 18 | (:issue:`134`) 19 | - Add ``axis`` option to DataFrame.fillna (:issue:`174`) 20 | - Add level keyword to ``drop`` for dropping values from a level (:issue:`159`) 21 | 22 | Performance improvements 23 | ~~~~~~~~~~~~~~~~~~~~~~~~ 24 | 25 | - Use khash for Series.value_counts, add raw function to algorithms.py (:issue:`861`) 26 | - Intercept __builtin__.sum in groupby (:issue:`885`) 27 | 28 | -------------------------------------------------------------------------------- /doc/source/v0.8.1.txt: -------------------------------------------------------------------------------- 1 | .. _whatsnew_0801: 2 | 3 | v0.8.1 (July 22, 2012) 4 | ---------------------- 5 | 6 | This release includes a few new features, performance enhancements, and over 30 7 | bug fixes from 0.8.0. New features include notably NA friendly string 8 | processing functionality and a series of new plot types and options. 9 | 10 | New features 11 | ~~~~~~~~~~~~ 12 | 13 | - Add :ref:`vectorized string processing methods ` 14 | accessible via Series.str (:issue:`620`) 15 | - Add option to disable adjustment in EWMA (:issue:`1584`) 16 | - :ref:`Radviz plot ` (:issue:`1566`) 17 | - :ref:`Parallel coordinates plot ` 18 | - :ref:`Bootstrap plot ` 19 | - Per column styles and secondary y-axis plotting (:issue:`1559`) 20 | - New datetime converters millisecond plotting (:issue:`1599`) 21 | - Add option to disable "sparse" display of hierarchical indexes (:issue:`1538`) 22 | - Series/DataFrame's ``set_index`` method can :ref:`append levels 23 | ` to an existing Index/MultiIndex (:issue:`1569`, :issue:`1577`) 24 | 25 | Performance improvements 26 | ~~~~~~~~~~~~~~~~~~~~~~~~ 27 | 28 | - Improved implementation of rolling min and max (thanks to `Bottleneck 29 | `__ !) 30 | - Add accelerated ``'median'`` GroupBy option (:issue:`1358`) 31 | - Significantly improve the performance of parsing ISO8601-format date 32 | strings with ``DatetimeIndex`` or ``to_datetime`` (:issue:`1571`) 33 | - Improve the performance of GroupBy on single-key aggregations and use with 34 | Categorical types 35 | - Significant datetime parsing performance improvments 36 | 37 | -------------------------------------------------------------------------------- /doc/source/whatsnew.rst: -------------------------------------------------------------------------------- 1 | .. _whatsnew: 2 | 3 | .. currentmodule:: pandas 4 | 5 | .. ipython:: python 6 | :suppress: 7 | 8 | import numpy as np 9 | from pandas import * 10 | randn = np.random.randn 11 | np.set_printoptions(precision=4, suppress=True) 12 | options.display.max_rows = 15 13 | 14 | ********** 15 | What's New 16 | ********** 17 | 18 | These are new features and improvements of note in each release. 19 | 20 | .. include:: v0.15.0.txt 21 | 22 | .. include:: v0.14.1.txt 23 | 24 | .. include:: v0.14.0.txt 25 | 26 | .. include:: v0.13.1.txt 27 | 28 | .. include:: v0.13.0.txt 29 | 30 | .. include:: v0.12.0.txt 31 | 32 | .. include:: v0.11.0.txt 33 | 34 | .. include:: v0.10.1.txt 35 | 36 | .. include:: v0.10.0.txt 37 | 38 | .. include:: v0.9.1.txt 39 | 40 | .. include:: v0.9.0.txt 41 | 42 | .. include:: v0.8.1.txt 43 | 44 | .. include:: v0.8.0.txt 45 | 46 | .. include:: v0.7.3.txt 47 | 48 | .. include:: v0.7.2.txt 49 | 50 | .. include:: v0.7.1.txt 51 | 52 | .. include:: v0.7.0.txt 53 | 54 | .. include:: v0.6.1.txt 55 | 56 | .. include:: v0.6.0.txt 57 | 58 | .. include:: v0.5.0.txt 59 | 60 | .. include:: v0.4.x.txt 61 | -------------------------------------------------------------------------------- /doc/sphinxext/README.rst: -------------------------------------------------------------------------------- 1 | sphinxext 2 | ========= 3 | 4 | This directory contains copies of different sphinx extensions in use in the 5 | pandas documentation. These copies originate from other projects: 6 | 7 | - ``numpydoc`` - Numpy's Sphinx extensions: this can be found at its own 8 | repository: https://github.com/numpy/numpydoc 9 | - ``ipython_directive`` and ``ipython_console_highlighting`` in the folder 10 | `ipython_sphinxext` - Sphinx extensions from IPython: these are included 11 | in IPython: https://github.com/ipython/ipython/tree/master/IPython/sphinxext 12 | 13 | .. note:: 14 | 15 | These copies are maintained at the respective projects, so fixes should, 16 | to the extent possible, be pushed upstream instead of only adapting our 17 | local copy to avoid divergence between the the local and upstream version. 18 | -------------------------------------------------------------------------------- /doc/sphinxext/ipython_sphinxext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/doc/sphinxext/ipython_sphinxext/__init__.py -------------------------------------------------------------------------------- /doc/sphinxext/numpydoc/README.rst: -------------------------------------------------------------------------------- 1 | ===================================== 2 | numpydoc -- Numpy's Sphinx extensions 3 | ===================================== 4 | 5 | Numpy's documentation uses several custom extensions to Sphinx. These 6 | are shipped in this ``numpydoc`` package, in case you want to make use 7 | of them in third-party projects. 8 | 9 | The following extensions are available: 10 | 11 | - ``numpydoc``: support for the Numpy docstring format in Sphinx, and add 12 | the code description directives ``np:function``, ``np-c:function``, etc. 13 | that support the Numpy docstring syntax. 14 | 15 | - ``numpydoc.traitsdoc``: For gathering documentation about Traits attributes. 16 | 17 | - ``numpydoc.plot_directive``: Adaptation of Matplotlib's ``plot::`` 18 | directive. Note that this implementation may still undergo severe 19 | changes or eventually be deprecated. 20 | 21 | 22 | numpydoc 23 | ======== 24 | 25 | Numpydoc inserts a hook into Sphinx's autodoc that converts docstrings 26 | following the Numpy/Scipy format to a form palatable to Sphinx. 27 | 28 | Options 29 | ------- 30 | 31 | The following options can be set in conf.py: 32 | 33 | - numpydoc_use_plots: bool 34 | 35 | Whether to produce ``plot::`` directives for Examples sections that 36 | contain ``import matplotlib``. 37 | 38 | - numpydoc_show_class_members: bool 39 | 40 | Whether to show all members of a class in the Methods and Attributes 41 | sections automatically. 42 | 43 | - numpydoc_class_members_toctree: bool 44 | 45 | Whether to create a Sphinx table of contents for the lists of class 46 | methods and attributes. If a table of contents is made, Sphinx expects 47 | each entry to have a separate page. 48 | 49 | - numpydoc_edit_link: bool (DEPRECATED -- edit your HTML template instead) 50 | 51 | Whether to insert an edit link after docstrings. 52 | -------------------------------------------------------------------------------- /doc/sphinxext/numpydoc/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | 3 | from .numpydoc import setup 4 | -------------------------------------------------------------------------------- /doc/sphinxext/numpydoc/tests/test_linkcode.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | 3 | import numpydoc.linkcode 4 | 5 | # No tests at the moment... 6 | -------------------------------------------------------------------------------- /doc/sphinxext/numpydoc/tests/test_phantom_import.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | 3 | import numpydoc.phantom_import 4 | 5 | # No tests at the moment... 6 | -------------------------------------------------------------------------------- /doc/sphinxext/numpydoc/tests/test_plot_directive.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | 3 | import numpydoc.plot_directive 4 | 5 | # No tests at the moment... 6 | -------------------------------------------------------------------------------- /doc/sphinxext/numpydoc/tests/test_traitsdoc.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | 3 | import numpydoc.traitsdoc 4 | 5 | # No tests at the moment... 6 | -------------------------------------------------------------------------------- /examples/data/SOURCES: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/examples/data/SOURCES -------------------------------------------------------------------------------- /examples/finance.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some examples playing around with yahoo finance data 3 | """ 4 | 5 | from datetime import datetime 6 | from pandas.compat import zip 7 | 8 | import matplotlib.finance as fin 9 | import numpy as np 10 | from pylab import show 11 | 12 | 13 | from pandas import Index, DataFrame 14 | from pandas.core.datetools import BMonthEnd 15 | from pandas import ols 16 | 17 | startDate = datetime(2008, 1, 1) 18 | endDate = datetime(2009, 9, 1) 19 | 20 | 21 | def getQuotes(symbol, start, end): 22 | quotes = fin.quotes_historical_yahoo(symbol, start, end) 23 | dates, open, close, high, low, volume = zip(*quotes) 24 | 25 | data = { 26 | 'open': open, 27 | 'close': close, 28 | 'high': high, 29 | 'low': low, 30 | 'volume': volume 31 | } 32 | 33 | dates = Index([datetime.fromordinal(int(d)) for d in dates]) 34 | return DataFrame(data, index=dates) 35 | 36 | msft = getQuotes('MSFT', startDate, endDate) 37 | aapl = getQuotes('AAPL', startDate, endDate) 38 | goog = getQuotes('GOOG', startDate, endDate) 39 | ibm = getQuotes('IBM', startDate, endDate) 40 | 41 | px = DataFrame({'MSFT': msft['close'], 42 | 'IBM': ibm['close'], 43 | 'GOOG': goog['close'], 44 | 'AAPL': aapl['close']}) 45 | returns = px / px.shift(1) - 1 46 | 47 | # Select dates 48 | 49 | subIndex = ibm.index[(ibm['close'] > 95) & (ibm['close'] < 100)] 50 | msftOnSameDates = msft.reindex(subIndex) 51 | 52 | # Insert columns 53 | 54 | msft['hi-lo spread'] = msft['high'] - msft['low'] 55 | ibm['hi-lo spread'] = ibm['high'] - ibm['low'] 56 | 57 | # Aggregate monthly 58 | 59 | 60 | def toMonthly(frame, how): 61 | offset = BMonthEnd() 62 | 63 | return frame.groupby(offset.rollforward).aggregate(how) 64 | 65 | msftMonthly = toMonthly(msft, np.mean) 66 | ibmMonthly = toMonthly(ibm, np.mean) 67 | 68 | # Statistics 69 | 70 | stdev = DataFrame({ 71 | 'MSFT': msft.std(), 72 | 'IBM': ibm.std() 73 | }) 74 | 75 | # Arithmetic 76 | 77 | ratios = ibm / msft 78 | 79 | # Works with different indices 80 | 81 | ratio = ibm / ibmMonthly 82 | monthlyRatio = ratio.reindex(ibmMonthly.index) 83 | 84 | # Ratio relative to past month average 85 | 86 | filledRatio = ibm / ibmMonthly.reindex(ibm.index, method='pad') 87 | -------------------------------------------------------------------------------- /examples/regressions.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import string 3 | 4 | import numpy as np 5 | 6 | from pandas.core.api import Series, DataFrame, DatetimeIndex 7 | from pandas.stats.api import ols 8 | 9 | N = 100 10 | 11 | start = datetime(2009, 9, 2) 12 | dateRange = DatetimeIndex(start, periods=N) 13 | 14 | 15 | def makeDataFrame(): 16 | data = DataFrame(np.random.randn(N, 7), 17 | columns=list(string.ascii_uppercase[:7]), 18 | index=dateRange) 19 | 20 | return data 21 | 22 | 23 | def makeSeries(): 24 | return Series(np.random.randn(N), index=dateRange) 25 | 26 | #------------------------------------------------------------------------------- 27 | # Standard rolling linear regression 28 | 29 | X = makeDataFrame() 30 | Y = makeSeries() 31 | 32 | model = ols(y=Y, x=X) 33 | 34 | print(model) 35 | 36 | #------------------------------------------------------------------------------- 37 | # Panel regression 38 | 39 | data = { 40 | 'A': makeDataFrame(), 41 | 'B': makeDataFrame(), 42 | 'C': makeDataFrame() 43 | } 44 | 45 | Y = makeDataFrame() 46 | 47 | panelModel = ols(y=Y, x=data, window=50) 48 | 49 | model = ols(y=Y, x=data) 50 | 51 | print(panelModel) 52 | -------------------------------------------------------------------------------- /fake_pyrex/Pyrex/Distutils/__init__.py: -------------------------------------------------------------------------------- 1 | # work around broken setuptools monkey patching 2 | -------------------------------------------------------------------------------- /fake_pyrex/Pyrex/Distutils/build_ext.py: -------------------------------------------------------------------------------- 1 | build_ext = "yes, it's there!" 2 | -------------------------------------------------------------------------------- /fake_pyrex/Pyrex/__init__.py: -------------------------------------------------------------------------------- 1 | # work around broken setuptools monkey patching 2 | -------------------------------------------------------------------------------- /pandas/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable-msg=W0614,W0401,W0611,W0622 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | try: 6 | from . import hashtable, tslib, lib 7 | except Exception: # pragma: no cover 8 | import sys 9 | e = sys.exc_info()[1] # Py25 and Py3 current exception syntax conflict 10 | print(e) 11 | if 'No module named lib' in str(e): 12 | raise ImportError('C extensions not built: if you installed already ' 13 | 'verify that you are not importing from the source ' 14 | 'directory') 15 | else: 16 | raise 17 | 18 | from datetime import datetime 19 | import numpy as np 20 | 21 | # XXX: HACK for NumPy 1.5.1 to suppress warnings 22 | try: 23 | np.seterr(all='ignore') 24 | except Exception: # pragma: no cover 25 | pass 26 | 27 | # numpy versioning 28 | from distutils.version import LooseVersion 29 | _np_version = np.version.short_version 30 | _np_version_under1p6 = LooseVersion(_np_version) < '1.6' 31 | _np_version_under1p7 = LooseVersion(_np_version) < '1.7' 32 | _np_version_under1p8 = LooseVersion(_np_version) < '1.8' 33 | _np_version_under1p9 = LooseVersion(_np_version) < '1.9' 34 | 35 | from pandas.version import version as __version__ 36 | from pandas.info import __doc__ 37 | 38 | # let init-time option registration happen 39 | import pandas.core.config_init 40 | 41 | from pandas.core.api import * 42 | from pandas.sparse.api import * 43 | from pandas.stats.api import * 44 | from pandas.tseries.api import * 45 | from pandas.io.api import * 46 | from pandas.computation.api import * 47 | 48 | from pandas.tools.describe import value_range 49 | from pandas.tools.merge import merge, concat, ordered_merge 50 | from pandas.tools.pivot import pivot_table, crosstab 51 | from pandas.tools.plotting import scatter_matrix, plot_params 52 | from pandas.tools.tile import cut, qcut 53 | from pandas.core.reshape import melt 54 | from pandas.util.print_versions import show_versions 55 | import pandas.util.testing 56 | -------------------------------------------------------------------------------- /pandas/compat/chainmap.py: -------------------------------------------------------------------------------- 1 | try: 2 | from collections import ChainMap 3 | except ImportError: 4 | from pandas.compat.chainmap_impl import ChainMap 5 | 6 | 7 | class DeepChainMap(ChainMap): 8 | def __setitem__(self, key, value): 9 | for mapping in self.maps: 10 | if key in mapping: 11 | mapping[key] = value 12 | return 13 | self.maps[0][key] = value 14 | 15 | def __delitem__(self, key): 16 | for mapping in self.maps: 17 | if key in mapping: 18 | del mapping[key] 19 | return 20 | raise KeyError(key) 21 | 22 | # override because the m parameter is introduced in Python 3.4 23 | def new_child(self, m=None): 24 | if m is None: 25 | m = {} 26 | return self.__class__(m, *self.maps) 27 | -------------------------------------------------------------------------------- /pandas/compat/openpyxl_compat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Detect incompatible version of OpenPyXL 3 | 4 | GH7169 5 | """ 6 | 7 | from distutils.version import LooseVersion 8 | 9 | start_ver = '1.6.1' 10 | stop_ver = '2.0.0' 11 | 12 | 13 | def is_compat(): 14 | """Detect whether the installed version of openpyxl is supported. 15 | 16 | Returns 17 | ------- 18 | compat : bool 19 | ``True`` if openpyxl is installed and is between versions 1.6.1 and 20 | 2.0.0, ``False`` otherwise. 21 | """ 22 | import openpyxl 23 | ver = LooseVersion(openpyxl.__version__) 24 | return LooseVersion(start_ver) < ver <= LooseVersion(stop_ver) 25 | -------------------------------------------------------------------------------- /pandas/computation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/computation/__init__.py -------------------------------------------------------------------------------- /pandas/computation/api.py: -------------------------------------------------------------------------------- 1 | from pandas.computation.eval import eval 2 | from pandas.computation.expr import Expr 3 | -------------------------------------------------------------------------------- /pandas/computation/common.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from pandas.compat import reduce 4 | 5 | 6 | def _ensure_decoded(s): 7 | """ if we have bytes, decode them to unicode """ 8 | if isinstance(s, (np.bytes_, bytes)): 9 | s = s.decode(pd.get_option('display.encoding')) 10 | return s 11 | 12 | 13 | def _result_type_many(*arrays_and_dtypes): 14 | """ wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32) 15 | argument limit """ 16 | try: 17 | return np.result_type(*arrays_and_dtypes) 18 | except ValueError: 19 | # we have > NPY_MAXARGS terms in our expression 20 | return reduce(np.result_type, arrays_and_dtypes) 21 | 22 | 23 | class NameResolutionError(NameError): 24 | pass 25 | -------------------------------------------------------------------------------- /pandas/computation/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/computation/tests/__init__.py -------------------------------------------------------------------------------- /pandas/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/core/__init__.py -------------------------------------------------------------------------------- /pandas/core/api.py: -------------------------------------------------------------------------------- 1 | 2 | # pylint: disable=W0614,W0401,W0611 3 | 4 | import numpy as np 5 | 6 | from pandas.core.algorithms import factorize, match, unique, value_counts 7 | from pandas.core.common import isnull, notnull 8 | from pandas.core.categorical import Categorical 9 | from pandas.core.groupby import Grouper 10 | from pandas.core.format import set_eng_float_format 11 | from pandas.core.index import Index, Int64Index, Float64Index, MultiIndex 12 | 13 | from pandas.core.series import Series, TimeSeries 14 | from pandas.core.frame import DataFrame 15 | from pandas.core.panel import Panel 16 | from pandas.core.panel4d import Panel4D 17 | from pandas.core.groupby import groupby 18 | from pandas.core.reshape import (pivot_simple as pivot, get_dummies, 19 | lreshape, wide_to_long) 20 | 21 | WidePanel = Panel 22 | 23 | from pandas.core.indexing import IndexSlice 24 | from pandas.tseries.offsets import DateOffset 25 | from pandas.tseries.tools import to_datetime 26 | from pandas.tseries.index import (DatetimeIndex, Timestamp, 27 | date_range, bdate_range) 28 | from pandas.tseries.period import Period, PeriodIndex 29 | 30 | # legacy 31 | from pandas.core.common import save, load # deprecated, remove in 0.13 32 | import pandas.core.datetools as datetools 33 | 34 | from pandas.core.config import (get_option, set_option, reset_option, 35 | describe_option, option_context, options) 36 | -------------------------------------------------------------------------------- /pandas/core/array.py: -------------------------------------------------------------------------------- 1 | """ 2 | Isolate pandas's exposure to NumPy 3 | """ 4 | 5 | import numpy as np 6 | 7 | Array = np.ndarray 8 | 9 | bool = np.bool_ 10 | 11 | _dtypes = { 12 | 'int': [8, 16, 32, 64], 13 | 'uint': [8, 16, 32, 64], 14 | 'float': [16, 32, 64] 15 | } 16 | 17 | _lift_types = [] 18 | 19 | for _k, _v in _dtypes.items(): 20 | for _i in _v: 21 | _lift_types.append(_k + str(_i)) 22 | 23 | for _t in _lift_types: 24 | globals()[_t] = getattr(np, _t) 25 | 26 | _lift_function = ['empty', 'arange', 'array', 'putmask', 'where'] 27 | 28 | for _f in _lift_function: 29 | globals()[_f] = getattr(np, _f) 30 | 31 | _lift_random = ['randn', 'rand'] 32 | 33 | for _f in _lift_random: 34 | globals()[_f] = getattr(np.random, _f) 35 | 36 | NA = np.nan 37 | 38 | -------------------------------------------------------------------------------- /pandas/core/datetools.py: -------------------------------------------------------------------------------- 1 | """A collection of random tools for dealing with dates in Python""" 2 | 3 | from pandas.tseries.tools import * 4 | from pandas.tseries.offsets import * 5 | from pandas.tseries.frequencies import * 6 | 7 | day = DateOffset() 8 | bday = BDay() 9 | businessDay = bday 10 | try: 11 | cday = CDay() 12 | customBusinessDay = CustomBusinessDay() 13 | customBusinessMonthEnd = CBMonthEnd() 14 | customBusinessMonthBegin = CBMonthBegin() 15 | except NotImplementedError: 16 | cday = None 17 | customBusinessDay = None 18 | customBusinessMonthEnd = None 19 | customBusinessMonthBegin = None 20 | monthEnd = MonthEnd() 21 | yearEnd = YearEnd() 22 | yearBegin = YearBegin() 23 | bmonthEnd = BMonthEnd() 24 | bmonthBegin = BMonthBegin() 25 | cbmonthEnd = customBusinessMonthEnd 26 | cbmonthBegin = customBusinessMonthBegin 27 | bquarterEnd = BQuarterEnd() 28 | quarterEnd = QuarterEnd() 29 | byearEnd = BYearEnd() 30 | week = Week() 31 | 32 | # Functions/offsets to roll dates forward 33 | thisMonthEnd = MonthEnd(0) 34 | thisBMonthEnd = BMonthEnd(0) 35 | thisYearEnd = YearEnd(0) 36 | thisYearBegin = YearBegin(0) 37 | thisBQuarterEnd = BQuarterEnd(0) 38 | thisQuarterEnd = QuarterEnd(0) 39 | 40 | # Functions to check where a date lies 41 | isBusinessDay = BDay().onOffset 42 | isMonthEnd = MonthEnd().onOffset 43 | isBMonthEnd = BMonthEnd().onOffset 44 | 45 | 46 | def _resolve_offset(freq, kwds): 47 | if 'timeRule' in kwds or 'offset' in kwds: 48 | offset = kwds.get('offset', None) 49 | offset = kwds.get('timeRule', offset) 50 | if isinstance(offset, compat.string_types): 51 | offset = getOffset(offset) 52 | warn = True 53 | else: 54 | offset = freq 55 | warn = False 56 | 57 | if warn: 58 | import warnings 59 | warnings.warn("'timeRule' and 'offset' parameters are deprecated," 60 | " please use 'freq' instead", 61 | FutureWarning) 62 | 63 | return offset 64 | -------------------------------------------------------------------------------- /pandas/core/matrix.py: -------------------------------------------------------------------------------- 1 | from pandas.core.frame import DataFrame as DataMatrix 2 | -------------------------------------------------------------------------------- /pandas/core/panel4d.py: -------------------------------------------------------------------------------- 1 | """ Panel4D: a 4-d dict like collection of panels """ 2 | 3 | from pandas.core.panelnd import create_nd_panel_factory 4 | from pandas.core.panel import Panel 5 | 6 | Panel4D = create_nd_panel_factory( 7 | klass_name='Panel4D', 8 | orders=['labels', 'items', 'major_axis', 'minor_axis'], 9 | slices={'labels': 'labels', 'items': 'items', 'major_axis': 'major_axis', 10 | 'minor_axis': 'minor_axis'}, 11 | slicer=Panel, 12 | aliases={'major': 'major_axis', 'minor': 'minor_axis'}, 13 | stat_axis=2, 14 | ns=dict(__doc__=""" 15 | Represents a 4 dimensional structured 16 | 17 | Parameters 18 | ---------- 19 | data : ndarray (labels x items x major x minor), or dict of Panels 20 | 21 | labels : Index or array-like : axis=0 22 | items : Index or array-like : axis=1 23 | major_axis : Index or array-like: axis=2 24 | minor_axis : Index or array-like: axis=3 25 | 26 | dtype : dtype, default None 27 | Data type to force, otherwise infer 28 | copy : boolean, default False 29 | Copy data from inputs. Only affects DataFrame / 2d ndarray input 30 | """) 31 | ) 32 | 33 | 34 | def panel4d_init(self, data=None, labels=None, items=None, major_axis=None, 35 | minor_axis=None, copy=False, dtype=None): 36 | 37 | self._init_data(data=data, labels=labels, items=items, 38 | major_axis=major_axis, minor_axis=minor_axis, 39 | copy=copy, dtype=dtype) 40 | 41 | Panel4D.__init__ = panel4d_init 42 | -------------------------------------------------------------------------------- /pandas/core/sparse.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data structures for sparse float data. Life is made simpler by dealing only 3 | with float64 data 4 | """ 5 | 6 | # pylint: disable=W0611 7 | 8 | from pandas.sparse.series import SparseSeries 9 | from pandas.sparse.frame import SparseDataFrame 10 | from pandas.sparse.panel import SparsePanel 11 | -------------------------------------------------------------------------------- /pandas/hashtable.pxd: -------------------------------------------------------------------------------- 1 | from khash cimport kh_int64_t, kh_float64_t, kh_pymap_t, int64_t, float64_t 2 | 3 | # prototypes for sharing 4 | 5 | cdef class HashTable: 6 | pass 7 | 8 | cdef class Int64HashTable(HashTable): 9 | cdef kh_int64_t *table 10 | 11 | cpdef get_item(self, int64_t val) 12 | cpdef set_item(self, int64_t key, Py_ssize_t val) 13 | 14 | cdef class Float64HashTable(HashTable): 15 | cdef kh_float64_t *table 16 | 17 | cpdef get_item(self, float64_t val) 18 | cpdef set_item(self, float64_t key, Py_ssize_t val) 19 | 20 | cdef class PyObjectHashTable(HashTable): 21 | cdef kh_pymap_t *table 22 | 23 | cpdef get_item(self, object val) 24 | cpdef set_item(self, object key, Py_ssize_t val) 25 | -------------------------------------------------------------------------------- /pandas/info.py: -------------------------------------------------------------------------------- 1 | """ 2 | pandas - a powerful data analysis and manipulation library for Python 3 | ===================================================================== 4 | 5 | See http://pandas.sourceforge.net for full documentation. Otherwise, see the 6 | docstrings of the various objects in the pandas namespace: 7 | 8 | Series 9 | DataFrame 10 | Panel 11 | Index 12 | DatetimeIndex 13 | HDFStore 14 | bdate_range 15 | date_range 16 | read_csv 17 | read_fwf 18 | read_table 19 | ols 20 | """ 21 | -------------------------------------------------------------------------------- /pandas/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/__init__.py -------------------------------------------------------------------------------- /pandas/io/api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data IO api 3 | """ 4 | 5 | from pandas.io.parsers import read_csv, read_table, read_fwf 6 | from pandas.io.clipboard import read_clipboard 7 | from pandas.io.excel import ExcelFile, ExcelWriter, read_excel 8 | from pandas.io.pytables import HDFStore, Term, get_store, read_hdf 9 | from pandas.io.json import read_json 10 | from pandas.io.html import read_html 11 | from pandas.io.sql import read_sql, read_sql_table, read_sql_query 12 | from pandas.io.stata import read_stata 13 | from pandas.io.pickle import read_pickle, to_pickle 14 | from pandas.io.packers import read_msgpack, to_msgpack 15 | from pandas.io.gbq import read_gbq 16 | -------------------------------------------------------------------------------- /pandas/io/date_converters.py: -------------------------------------------------------------------------------- 1 | """This module is designed for community supported date conversion functions""" 2 | from pandas.compat import range, map 3 | import numpy as np 4 | import pandas.lib as lib 5 | 6 | 7 | def parse_date_time(date_col, time_col): 8 | date_col = _maybe_cast(date_col) 9 | time_col = _maybe_cast(time_col) 10 | return lib.try_parse_date_and_time(date_col, time_col) 11 | 12 | 13 | def parse_date_fields(year_col, month_col, day_col): 14 | year_col = _maybe_cast(year_col) 15 | month_col = _maybe_cast(month_col) 16 | day_col = _maybe_cast(day_col) 17 | return lib.try_parse_year_month_day(year_col, month_col, day_col) 18 | 19 | 20 | def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, 21 | second_col): 22 | year_col = _maybe_cast(year_col) 23 | month_col = _maybe_cast(month_col) 24 | day_col = _maybe_cast(day_col) 25 | hour_col = _maybe_cast(hour_col) 26 | minute_col = _maybe_cast(minute_col) 27 | second_col = _maybe_cast(second_col) 28 | return lib.try_parse_datetime_components(year_col, month_col, day_col, 29 | hour_col, minute_col, second_col) 30 | 31 | 32 | def generic_parser(parse_func, *cols): 33 | N = _check_columns(cols) 34 | results = np.empty(N, dtype=object) 35 | 36 | for i in range(N): 37 | args = [c[i] for c in cols] 38 | results[i] = parse_func(*args) 39 | 40 | return results 41 | 42 | 43 | def _maybe_cast(arr): 44 | if not arr.dtype.type == np.object_: 45 | arr = np.array(arr, dtype=object) 46 | return arr 47 | 48 | 49 | def _check_columns(cols): 50 | if not len(cols): 51 | raise AssertionError("There must be at least 1 column") 52 | 53 | head, tail = cols[0], cols[1:] 54 | 55 | N = len(head) 56 | 57 | for i, n in enumerate(map(len, tail)): 58 | if n != N: 59 | raise AssertionError('All columns must have the same length: {0}; ' 60 | 'column {1} has length {2}'.format(N, i, n)) 61 | 62 | return N 63 | -------------------------------------------------------------------------------- /pandas/io/pickle.py: -------------------------------------------------------------------------------- 1 | from pandas.compat import cPickle as pkl, pickle_compat as pc, PY3 2 | 3 | def to_pickle(obj, path): 4 | """ 5 | Pickle (serialize) object to input file path 6 | 7 | Parameters 8 | ---------- 9 | obj : any object 10 | path : string 11 | File path 12 | """ 13 | with open(path, 'wb') as f: 14 | pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL) 15 | 16 | 17 | def read_pickle(path): 18 | """ 19 | Load pickled pandas object (or any other pickled object) from the specified 20 | file path 21 | 22 | Warning: Loading pickled data received from untrusted sources can be 23 | unsafe. See: http://docs.python.org/2.7/library/pickle.html 24 | 25 | Parameters 26 | ---------- 27 | path : string 28 | File path 29 | 30 | Returns 31 | ------- 32 | unpickled : type of object stored in file 33 | """ 34 | 35 | def try_read(path, encoding=None): 36 | # try with cPickle 37 | # try with current pickle, if we have a Type Error then 38 | # try with the compat pickle to handle subclass changes 39 | # pass encoding only if its not None as py2 doesn't handle 40 | # the param 41 | 42 | # cpickle 43 | # GH 6899 44 | try: 45 | with open(path, 'rb') as fh: 46 | return pkl.load(fh) 47 | except (Exception) as e: 48 | 49 | # reg/patched pickle 50 | try: 51 | with open(path, 'rb') as fh: 52 | return pc.load(fh, encoding=encoding, compat=False) 53 | 54 | # compat pickle 55 | except: 56 | with open(path, 'rb') as fh: 57 | return pc.load(fh, encoding=encoding, compat=True) 58 | 59 | try: 60 | return try_read(path) 61 | except: 62 | if PY3: 63 | return try_read(path, encoding='latin1') 64 | raise 65 | -------------------------------------------------------------------------------- /pandas/io/tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | def setUp(): 3 | import socket 4 | socket.setdefaulttimeout(5) 5 | -------------------------------------------------------------------------------- /pandas/io/tests/data/gbq_fake_job.txt: -------------------------------------------------------------------------------- 1 | {u'status': {u'state': u'DONE'}, u'kind': u'bigquery#job', u'statistics': {u'query': {u'cacheHit': True, u'totalBytesProcessed': u'0'}, u'endTime': u'1377668744674', u'totalBytesProcessed': u'0', u'startTime': u'1377668744466'}, u'jobReference': {u'projectId': u'57288129629', u'jobId': u'bqjob_r5f956972f0190bdf_00000140c374bf42_2'}, u'etag': u'"4PTsVxg68bQkQs1RJ1Ndewqkgg4/oO4VmgFrAku4N6FWci9s7iFIftc"', u'configuration': {u'query': {u'createDisposition': u'CREATE_IF_NEEDED', u'query': u'SELECT * FROM [publicdata:samples.shakespeare]', u'writeDisposition': u'WRITE_TRUNCATE', u'destinationTable': {u'projectId': u'57288129629', u'tableId': u'anonb5ec450da88eeeb78a27784ea482ee75a146d442', u'datasetId': u'_d0b4f5f0d50dc68a3eb0fa6cba66a9a8687d9253'}}}, u'id': u'57288129629:bqjob_r5f956972f0190bdf_00000140c374bf42_2', u'selfLink': u'https://www.googleapis.com/bigquery/v2/projects/57288129629/jobs/bqjob_r5f956972f0190bdf_00000140c374bf42_2'} -------------------------------------------------------------------------------- /pandas/io/tests/data/html_encoding/chinese_utf-16.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/html_encoding/chinese_utf-16.html -------------------------------------------------------------------------------- /pandas/io/tests/data/html_encoding/chinese_utf-32.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/html_encoding/chinese_utf-32.html -------------------------------------------------------------------------------- /pandas/io/tests/data/html_encoding/chinese_utf-8.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
01
0 漊煻獌 漊煻獌
1 袟袘觕 袟袘觕
2 埱娵徖 埱娵徖
-------------------------------------------------------------------------------- /pandas/io/tests/data/html_encoding/letz_latin1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/html_encoding/letz_latin1.html -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_hdf/legacy.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/legacy.h5 -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_hdf/legacy_0.10.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/legacy_0.10.h5 -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_hdf/legacy_table.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/legacy_table.h5 -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_hdf/pytables_native.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/pytables_native.h5 -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_hdf/pytables_native2.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_hdf/pytables_native2.h5 -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle -------------------------------------------------------------------------------- /pandas/io/tests/data/salary.table: -------------------------------------------------------------------------------- 1 | S X E M 2 | 13876 1 1 1 3 | 11608 1 3 0 4 | 18701 1 3 1 5 | 11283 1 2 0 6 | 11767 1 3 0 7 | 20872 2 2 1 8 | 11772 2 2 0 9 | 10535 2 1 0 10 | 12195 2 3 0 11 | 12313 3 2 0 12 | 14975 3 1 1 13 | 21371 3 2 1 14 | 19800 3 3 1 15 | 11417 4 1 0 16 | 20263 4 3 1 17 | 13231 4 3 0 18 | 12884 4 2 0 19 | 13245 5 2 0 20 | 13677 5 3 0 21 | 15965 5 1 1 22 | 12336 6 1 0 23 | 21352 6 3 1 24 | 13839 6 2 0 25 | 22884 6 2 1 26 | 16978 7 1 1 27 | 14803 8 2 0 28 | 17404 8 1 1 29 | 22184 8 3 1 30 | 13548 8 1 0 31 | 14467 10 1 0 32 | 15942 10 2 0 33 | 23174 10 3 1 34 | 23780 10 2 1 35 | 25410 11 2 1 36 | 14861 11 1 0 37 | 16882 12 2 0 38 | 24170 12 3 1 39 | 15990 13 1 0 40 | 26330 13 2 1 41 | 17949 14 2 0 42 | 25685 15 3 1 43 | 27837 16 2 1 44 | 18838 16 2 0 45 | 17483 16 1 0 46 | 19207 17 2 0 47 | 19346 20 1 0 48 | -------------------------------------------------------------------------------- /pandas/io/tests/data/stata1_114.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata1_114.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata1_117.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata1_117.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata1_encoding.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata1_encoding.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata2_113.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata2_113.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata2_114.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata2_114.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata2_115.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata2_115.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata2_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata2_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats -------------------------------------------------------------------------------- /pandas/io/tests/data/stata2_117.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata2_117.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata3_113.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata3_113.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata3_114.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata3_114.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata3_115.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata3_115.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata3_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata3_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats -------------------------------------------------------------------------------- /pandas/io/tests/data/stata3_117.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata3_117.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata4_113.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata4_113.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata4_114.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata4_114.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata4_115.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata4_115.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata4_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata4_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats -------------------------------------------------------------------------------- /pandas/io/tests/data/stata4_117.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata4_117.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata5.csv: -------------------------------------------------------------------------------- 1 | byte_,int_,long_,float_,double_,date_td,string_,string_1 2 | 0,0,0,0,0,,"a","a" 3 | 1,1,1,1,1,,"ab","b" 4 | -1,-1,-1,-1,-1,,"abc","c" 5 | 100,32740,-2147483647,-1.70100000027769e+38,-2.0000000000000e+307,1970-01-01,"abcdefghijklmnop","d" 6 | -127,-32767,2147483620,1.70100000027769e+38,8.0000000000000e+307,1970-01-02,"abcdefghijklmnopqrstuvwxyz","e" 7 | ,0,,,,2014-01-01,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","f" 8 | 0,,,,,2114-01-01,"1234567890","1" 9 | ,,0,,,2014-12-31,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","2" 10 | .a,.a,.a,.a,.a,2012-02-29,"!","A" 11 | 100,32740,-2.15e+09,-1.70e+38,-2.0e+307,01jan1970,"abcdefghijklmnop","d" 12 | -127,-32767,2.15e+09,1.70e+38,8.0e+307,02jan1970,"abcdefghijklmnopqrstuvwxyz","e" 13 | ,0,,,,01jan2014,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","f" 14 | 0,,,,,01jan2114,"1234567890","1" 15 | ,,0,,,31dec2014,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","2" 16 | .a,.a,.a,.a,.a,29feb2012,"!","A" 17 | .z,.z,.z,.z,.z,,"&","Z" 18 | ,,,0,,,"1.23","!" 19 | ,,,,0,,"10jan1970","." 20 | -------------------------------------------------------------------------------- /pandas/io/tests/data/stata5_113.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata5_113.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata5_114.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata5_114.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata5_115.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata5_115.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata5_117.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata5_117.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata6.csv: -------------------------------------------------------------------------------- 1 | byte_,int_,long_,float_,double_,date_td,string_,string_1 2 | 0,0,0,0,0,1960-01-01,"a","a" 3 | 1,1,1,1,1,3014-12-31,"ab","b" 4 | -1,-1,-1,-1,-1,2014-12-31,"abc","c" 5 | 100,32740,-2147483647,-1.7010000002777e+38,-2.000000000000e+307,1970-01-01,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","d" 6 | -127,-32767,2147483620,1.7010000002777e+38,8.000000000000e+307,1970-01-02,"abcdefghijklmnopqrstuvwxyz","e" 7 | -------------------------------------------------------------------------------- /pandas/io/tests/data/stata6_113.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata6_113.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata6_114.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata6_114.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata6_115.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata6_115.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata6_117.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata6_117.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata7_115.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata7_115.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/stata7_117.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/stata7_117.dta -------------------------------------------------------------------------------- /pandas/io/tests/data/test.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test.xls -------------------------------------------------------------------------------- /pandas/io/tests/data/test.xlsm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test.xlsm -------------------------------------------------------------------------------- /pandas/io/tests/data/test.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test.xlsx -------------------------------------------------------------------------------- /pandas/io/tests/data/test1.csv: -------------------------------------------------------------------------------- 1 | index,A,B,C,D 2 | 2000-01-03 00:00:00,0.980268513777,3.68573087906,-0.364216805298,-1.15973806169 3 | 2000-01-04 00:00:00,1.04791624281,-0.0412318367011,-0.16181208307,0.212549316967 4 | 2000-01-05 00:00:00,0.498580885705,0.731167677815,-0.537677223318,1.34627041952 5 | 2000-01-06 00:00:00,1.12020151869,1.56762092543,0.00364077397681,0.67525259227 6 | 2000-01-07 00:00:00,-0.487094399463,0.571454623474,-1.6116394093,0.103468562917 7 | 2000-01-10 00:00:00,0.836648671666,0.246461918642,0.588542635376,1.0627820613 8 | 2000-01-11 00:00:00,-0.157160753327,1.34030689438,1.19577795622,-1.09700699751 -------------------------------------------------------------------------------- /pandas/io/tests/data/test2.csv: -------------------------------------------------------------------------------- 1 | A,B,C,D,E 2 | 2000-01-03 00:00:00,0.980268513777,3.68573087906,-0.364216805298,-1.15973806169,foo 3 | 2000-01-04 00:00:00,1.04791624281,-0.0412318367011,-0.16181208307,0.212549316967,bar 4 | 2000-01-05 00:00:00,0.498580885705,0.731167677815,-0.537677223318,1.34627041952,baz 5 | 2000-01-06 00:00:00,1.12020151869,1.56762092543,0.00364077397681,0.67525259227,qux 6 | 2000-01-07 00:00:00,-0.487094399463,0.571454623474,-1.6116394093,0.103468562917,foo2 7 | -------------------------------------------------------------------------------- /pandas/io/tests/data/test2.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test2.xls -------------------------------------------------------------------------------- /pandas/io/tests/data/test2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test2.xlsx -------------------------------------------------------------------------------- /pandas/io/tests/data/test3.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test3.xls -------------------------------------------------------------------------------- /pandas/io/tests/data/test_types.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test_types.xls -------------------------------------------------------------------------------- /pandas/io/tests/data/test_types.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/test_types.xlsx -------------------------------------------------------------------------------- /pandas/io/tests/data/times_1900.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/times_1900.xls -------------------------------------------------------------------------------- /pandas/io/tests/data/times_1904.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/times_1904.xls -------------------------------------------------------------------------------- /pandas/io/tests/data/unicode_series.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/unicode_series.csv -------------------------------------------------------------------------------- /pandas/io/tests/data/utf16_ex.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/data/utf16_ex.txt -------------------------------------------------------------------------------- /pandas/io/tests/data/valid_markup.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 |
ab
067
140
294
370
40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
ab
067
140
61 | 62 | 63 | -------------------------------------------------------------------------------- /pandas/io/tests/test_json/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/io/tests/test_json/__init__.py -------------------------------------------------------------------------------- /pandas/io/tests/test_json/data/tsframe_iso_v012.json: -------------------------------------------------------------------------------- 1 | {"A":{"2000-01-03T00:00:00":1.56808523,"2000-01-04T00:00:00":-0.2550111,"2000-01-05T00:00:00":1.51493992,"2000-01-06T00:00:00":-0.02765498,"2000-01-07T00:00:00":0.05951614},"B":{"2000-01-03T00:00:00":0.65727391,"2000-01-04T00:00:00":-0.08072427,"2000-01-05T00:00:00":0.11805825,"2000-01-06T00:00:00":0.44679743,"2000-01-07T00:00:00":-2.69652057},"C":{"2000-01-03T00:00:00":1.81021139,"2000-01-04T00:00:00":-0.03202878,"2000-01-05T00:00:00":1.629455,"2000-01-06T00:00:00":0.33192641,"2000-01-07T00:00:00":1.28163262},"D":{"2000-01-03T00:00:00":-0.17251653,"2000-01-04T00:00:00":-0.17581665,"2000-01-05T00:00:00":-1.31506612,"2000-01-06T00:00:00":-0.27885413,"2000-01-07T00:00:00":0.34703478},"date":{"2000-01-03T00:00:00":"1992-01-06T18:21:32.120000","2000-01-04T00:00:00":"1992-01-06T18:21:32.120000","2000-01-05T00:00:00":"1992-01-06T18:21:32.120000","2000-01-06T00:00:00":"2013-01-01T00:00:00","2000-01-07T00:00:00":"1992-01-06T18:21:32.120000"}} -------------------------------------------------------------------------------- /pandas/io/tests/test_json/data/tsframe_v012.json: -------------------------------------------------------------------------------- 1 | {"A":{"946857600000000000":1.56808523,"946944000000000000":-0.2550111,"947030400000000000":1.51493992,"947116800000000000":-0.02765498,"947203200000000000":0.05951614},"B":{"946857600000000000":0.65727391,"946944000000000000":-0.08072427,"947030400000000000":0.11805825,"947116800000000000":0.44679743,"947203200000000000":-2.69652057},"C":{"946857600000000000":1.81021139,"946944000000000000":-0.03202878,"947030400000000000":1.629455,"947116800000000000":0.33192641,"947203200000000000":1.28163262},"D":{"946857600000000000":-0.17251653,"946944000000000000":-0.17581665,"947030400000000000":-1.31506612,"947116800000000000":-0.27885413,"947203200000000000":0.34703478},"date":{"946857600000000000":694722092120000000,"946944000000000000":694722092120000000,"947030400000000000":694722092120000000,"947116800000000000":1356998400000000000,"947203200000000000":694722092120000000},"modified":{"946857600000000000":694722092120000000,"946944000000000000":null,"947030400000000000":694722092120000000,"947116800000000000":1356998400000000000,"947203200000000000":694722092120000000}} -------------------------------------------------------------------------------- /pandas/rpy/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from .common import importr, r, load_data 3 | except ImportError: 4 | pass 5 | -------------------------------------------------------------------------------- /pandas/rpy/base.py: -------------------------------------------------------------------------------- 1 | import pandas.rpy.util as util 2 | 3 | 4 | class lm(object): 5 | """ 6 | Examples 7 | -------- 8 | >>> model = lm('x ~ y + z', data) 9 | >>> model.coef 10 | """ 11 | def __init__(self, formula, data): 12 | pass 13 | -------------------------------------------------------------------------------- /pandas/rpy/mass.py: -------------------------------------------------------------------------------- 1 | class rlm(object): 2 | pass 3 | -------------------------------------------------------------------------------- /pandas/rpy/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/rpy/tests/__init__.py -------------------------------------------------------------------------------- /pandas/rpy/vars.py: -------------------------------------------------------------------------------- 1 | import pandas.rpy.util as util 2 | 3 | 4 | class VAR(object): 5 | """ 6 | 7 | Parameters 8 | ---------- 9 | y : 10 | p : 11 | type : {"const", "trend", "both", "none"} 12 | season : 13 | exogen : 14 | lag_max : 15 | ic : {"AIC", "HQ", "SC", "FPE"} 16 | Information criterion to use, if lag_max is not None 17 | """ 18 | def __init__(y, p=1, type="none", season=None, exogen=None, 19 | lag_max=None, ic=None): 20 | pass 21 | -------------------------------------------------------------------------------- /pandas/sandbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/sandbox/__init__.py -------------------------------------------------------------------------------- /pandas/sparse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/sparse/__init__.py -------------------------------------------------------------------------------- /pandas/sparse/api.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=W0611 2 | 3 | from pandas.sparse.array import SparseArray 4 | from pandas.sparse.list import SparseList 5 | from pandas.sparse.series import SparseSeries, SparseTimeSeries 6 | from pandas.sparse.frame import SparseDataFrame 7 | from pandas.sparse.panel import SparsePanel 8 | -------------------------------------------------------------------------------- /pandas/sparse/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/sparse/tests/__init__.py -------------------------------------------------------------------------------- /pandas/src/datetime_helper.h: -------------------------------------------------------------------------------- 1 | #include "datetime.h" 2 | 3 | void mangle_nat(PyObject *val) { 4 | PyDateTime_GET_MONTH(val) = -1; 5 | PyDateTime_GET_DAY(val) = -1; 6 | } 7 | -------------------------------------------------------------------------------- /pandas/src/headers/math.h: -------------------------------------------------------------------------------- 1 | #ifndef _PANDAS_MATH_H_ 2 | #define _PANDAS_MATH_H_ 3 | 4 | #if defined(_MSC_VER) 5 | #include 6 | __inline int signbit(double num) { return _copysign(1.0, num) < 0; } 7 | #else 8 | #include 9 | #endif 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /pandas/src/headers/portable.h: -------------------------------------------------------------------------------- 1 | #ifndef _PANDAS_PORTABLE_H_ 2 | #define _PANDAS_PORTABLE_H_ 3 | 4 | #if defined(_MSC_VER) 5 | #define strcasecmp( s1, s2 ) _stricmp( s1, s2 ) 6 | #endif 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /pandas/src/headers/stdint.h: -------------------------------------------------------------------------------- 1 | #ifndef _PANDAS_STDINT_H_ 2 | #define _PANDAS_STDINT_H_ 3 | 4 | #if defined(_MSC_VER) 5 | #include "ms_stdint.h" 6 | #else 7 | #include 8 | #endif 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /pandas/src/helper.h: -------------------------------------------------------------------------------- 1 | #ifndef C_HELPER_H 2 | #define C_HELPER_H 3 | 4 | #ifndef PANDAS_INLINE 5 | #if defined(__GNUC__) 6 | #define PANDAS_INLINE __inline__ 7 | #elif defined(_MSC_VER) 8 | #define PANDAS_INLINE __inline 9 | #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L 10 | #define PANDAS_INLINE inline 11 | #else 12 | #define PANDAS_INLINE 13 | #endif 14 | #endif 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /pandas/src/klib/khash_python.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "khash.h" 4 | 5 | // kludge 6 | 7 | #define kh_float64_hash_func _Py_HashDouble 8 | #define kh_float64_hash_equal kh_int64_hash_equal 9 | 10 | #define KHASH_MAP_INIT_FLOAT64(name, khval_t) \ 11 | KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, kh_float64_hash_equal) 12 | 13 | KHASH_MAP_INIT_FLOAT64(float64, size_t) 14 | 15 | 16 | int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) { 17 | int result = PyObject_RichCompareBool(a, b, Py_EQ); 18 | if (result < 0) { 19 | PyErr_Clear(); 20 | return 0; 21 | } 22 | return result; 23 | } 24 | 25 | 26 | #define kh_python_hash_func(key) (PyObject_Hash(key)) 27 | #define kh_python_hash_equal(a, b) (pyobject_cmp(a, b)) 28 | 29 | 30 | // Python object 31 | 32 | typedef PyObject* kh_pyobject_t; 33 | 34 | #define KHASH_MAP_INIT_PYOBJECT(name, khval_t) \ 35 | KHASH_INIT(name, kh_pyobject_t, khval_t, 1, \ 36 | kh_python_hash_func, kh_python_hash_equal) 37 | 38 | KHASH_MAP_INIT_PYOBJECT(pymap, Py_ssize_t) 39 | 40 | #define KHASH_SET_INIT_PYOBJECT(name) \ 41 | KHASH_INIT(name, kh_pyobject_t, char, 0, \ 42 | kh_python_hash_func, kh_python_hash_equal) 43 | 44 | KHASH_SET_INIT_PYOBJECT(pyset) 45 | 46 | #define kh_exist_pymap(h, k) (kh_exist(h, k)) 47 | #define kh_exist_pyset(h, k) (kh_exist(h, k)) 48 | 49 | KHASH_MAP_INIT_STR(strbox, kh_pyobject_t) 50 | -------------------------------------------------------------------------------- /pandas/src/klib/ktypes.h: -------------------------------------------------------------------------------- 1 | #ifndef __KTYPES_H 2 | #define __KTYPES_H 3 | 4 | /* compipler specific configuration */ 5 | 6 | #endif /* __KTYPES_H */ 7 | -------------------------------------------------------------------------------- /pandas/src/parser/.gitignore: -------------------------------------------------------------------------------- 1 | !*.c 2 | test* -------------------------------------------------------------------------------- /pandas/src/parser/Makefile: -------------------------------------------------------------------------------- 1 | PYTHONBASE = /Library/Frameworks/EPD64.framework/Versions/Current 2 | NUMPY_INC = /Library/Frameworks/EPD64.framework/Versions/7.1/lib/python2.7/site-packages/numpy/core/include 3 | PYTHON_INC = -I$(PYTHONBASE)/include/python2.7 -I$(NUMPY_INC) 4 | PYTHON_LINK = -L$(PYTHONBASE)/lib -lpython 5 | 6 | SOURCES = conversions.c parser.c str_to.c 7 | 8 | check-syntax: 9 | gcc -g $(PYTHON_INC) -o /dev/null -S ${CHK_SOURCES} 10 | 11 | test: $(SOURCES) 12 | gcc $(PYTHON_INC) -o test $(SOURCES) 13 | ./test -------------------------------------------------------------------------------- /pandas/src/parser/io.h: -------------------------------------------------------------------------------- 1 | #include "Python.h" 2 | #include "tokenizer.h" 3 | 4 | 5 | typedef struct _file_source { 6 | /* The file being read. */ 7 | FILE *fp; 8 | 9 | char *buffer; 10 | /* Size of the file, in bytes. */ 11 | /* off_t size; */ 12 | 13 | /* file position when the file_buffer was created. */ 14 | off_t initial_file_pos; 15 | 16 | /* Offset in the file of the data currently in the buffer. */ 17 | off_t buffer_file_pos; 18 | 19 | /* Actual number of bytes in the current buffer. (Can be less than buffer_size.) */ 20 | off_t last_pos; 21 | 22 | /* Size (in bytes) of the buffer. */ 23 | // off_t buffer_size; 24 | 25 | /* Pointer to the buffer. */ 26 | // char *buffer; 27 | 28 | } file_source; 29 | 30 | #define FS(source) ((file_source *)source) 31 | 32 | #if !defined(_WIN32) 33 | #define HAVE_MMAP 34 | #endif 35 | 36 | typedef struct _memory_map { 37 | 38 | FILE *fp; 39 | 40 | /* Size of the file, in bytes. */ 41 | off_t size; 42 | 43 | /* file position when the file_buffer was created. */ 44 | off_t initial_file_pos; 45 | 46 | int line_number; 47 | 48 | int fileno; 49 | off_t position; 50 | off_t last_pos; 51 | char *memmap; 52 | 53 | } memory_map; 54 | 55 | #define MM(src) ((memory_map*) src) 56 | 57 | void *new_mmap(char *fname); 58 | 59 | int del_mmap(void *src); 60 | 61 | void* buffer_mmap_bytes(void *source, size_t nbytes, 62 | size_t *bytes_read, int *status); 63 | 64 | 65 | typedef struct _rd_source { 66 | PyObject* obj; 67 | PyObject* buffer; 68 | size_t position; 69 | } rd_source; 70 | 71 | #define RDS(source) ((rd_source *)source) 72 | 73 | void *new_file_source(char *fname, size_t buffer_size); 74 | 75 | void *new_rd_source(PyObject *obj); 76 | 77 | int del_file_source(void *src); 78 | int del_rd_source(void *src); 79 | 80 | void* buffer_file_bytes(void *source, size_t nbytes, 81 | size_t *bytes_read, int *status); 82 | 83 | void* buffer_rd_bytes(void *source, size_t nbytes, 84 | size_t *bytes_read, int *status); 85 | 86 | -------------------------------------------------------------------------------- /pandas/src/properties.pyx: -------------------------------------------------------------------------------- 1 | from cpython cimport PyDict_Contains, PyDict_GetItem, PyDict_GetItem 2 | 3 | 4 | cdef class cache_readonly(object): 5 | 6 | cdef readonly: 7 | object func, name, allow_setting 8 | 9 | def __init__(self, func=None, allow_setting=False): 10 | if func is not None: 11 | self.func = func 12 | self.name = func.__name__ 13 | self.allow_setting = allow_setting 14 | 15 | def __call__(self, func, doc=None): 16 | self.func = func 17 | self.name = func.__name__ 18 | return self 19 | 20 | def __get__(self, obj, typ): 21 | # Get the cache or set a default one if needed 22 | 23 | cache = getattr(obj, '_cache', None) 24 | if cache is None: 25 | try: 26 | cache = obj._cache = {} 27 | except (AttributeError): 28 | return 29 | 30 | if PyDict_Contains(cache, self.name): 31 | # not necessary to Py_INCREF 32 | val = PyDict_GetItem(cache, self.name) 33 | else: 34 | val = self.func(obj) 35 | PyDict_SetItem(cache, self.name, val) 36 | return val 37 | 38 | def __set__(self, obj, value): 39 | 40 | if not self.allow_setting: 41 | raise Exception("cannot set values for [%s]" % self.name) 42 | 43 | # Get the cache or set a default one if needed 44 | cache = getattr(obj, '_cache', None) 45 | if cache is None: 46 | try: 47 | cache = obj._cache = {} 48 | except (AttributeError): 49 | return 50 | 51 | PyDict_SetItem(cache, self.name, value) 52 | 53 | cdef class AxisProperty(object): 54 | cdef: 55 | Py_ssize_t axis 56 | 57 | def __init__(self, axis=0): 58 | self.axis = axis 59 | 60 | def __get__(self, obj, type): 61 | cdef list axes = obj._data.axes 62 | return axes[self.axis] 63 | 64 | def __set__(self, obj, value): 65 | obj._set_axis(self.axis, value) 66 | -------------------------------------------------------------------------------- /pandas/src/skiplist.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "skiplist.h": 2 | ctypedef struct node_t: 3 | double value 4 | int is_nil 5 | int levels 6 | node_t **next 7 | int *width 8 | int ref_count 9 | 10 | ctypedef struct skiplist_t: 11 | node_t *head 12 | int size, maxlevels 13 | node_t **tmp_chain 14 | int *tmp_steps 15 | 16 | inline skiplist_t* skiplist_init(int) 17 | inline void skiplist_destroy(skiplist_t*) 18 | inline double skiplist_get(skiplist_t*, int, int*) 19 | inline int skiplist_insert(skiplist_t*, double) 20 | inline int skiplist_remove(skiplist_t*, double) 21 | 22 | -------------------------------------------------------------------------------- /pandas/src/ujson/python/version.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | * Neither the name of the ESN Social Software AB nor the 13 | names of its contributors may be used to endorse or promote products 14 | derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) 29 | http://code.google.com/p/stringencoders/ 30 | Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. 31 | 32 | Numeric decoder derived from from TCL library 33 | http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms 34 | * Copyright (c) 1988-1993 The Regents of the University of California. 35 | * Copyright (c) 1994 Sun Microsystems, Inc. 36 | */ 37 | 38 | #define UJSON_VERSION "1.33" 39 | -------------------------------------------------------------------------------- /pandas/stats/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/stats/__init__.py -------------------------------------------------------------------------------- /pandas/stats/api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common namespace of statistical functions 3 | """ 4 | 5 | # pylint: disable-msg=W0611,W0614,W0401 6 | 7 | from pandas.stats.moments import * 8 | from pandas.stats.interface import ols 9 | from pandas.stats.fama_macbeth import fama_macbeth 10 | -------------------------------------------------------------------------------- /pandas/stats/common.py: -------------------------------------------------------------------------------- 1 | 2 | _WINDOW_TYPES = { 3 | 0: 'full_sample', 4 | 1: 'rolling', 5 | 2: 'expanding' 6 | } 7 | # also allow 'rolling' as key 8 | _WINDOW_TYPES.update((v, v) for k,v in list(_WINDOW_TYPES.items())) 9 | _ADDITIONAL_CLUSTER_TYPES = set(("entity", "time")) 10 | 11 | def _get_cluster_type(cluster_type): 12 | # this was previous behavior 13 | if cluster_type is None: 14 | return cluster_type 15 | try: 16 | return _get_window_type(cluster_type) 17 | except ValueError: 18 | final_type = str(cluster_type).lower().replace("_", " ") 19 | if final_type in _ADDITIONAL_CLUSTER_TYPES: 20 | return final_type 21 | raise ValueError('Unrecognized cluster type: %s' % cluster_type) 22 | 23 | def _get_window_type(window_type): 24 | # e.g., 0, 1, 2 25 | final_type = _WINDOW_TYPES.get(window_type) 26 | # e.g., 'full_sample' 27 | final_type = final_type or _WINDOW_TYPES.get(str(window_type).lower().replace(" ", "_")) 28 | if final_type is None: 29 | raise ValueError('Unrecognized window type: %s' % window_type) 30 | return final_type 31 | 32 | def banner(text, width=80): 33 | """ 34 | 35 | """ 36 | toFill = width - len(text) 37 | 38 | left = toFill // 2 39 | right = toFill - left 40 | 41 | return '%s%s%s' % ('-' * left, text, '-' * right) 42 | -------------------------------------------------------------------------------- /pandas/stats/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/stats/tests/__init__.py -------------------------------------------------------------------------------- /pandas/stats/tests/test_fama_macbeth.py: -------------------------------------------------------------------------------- 1 | from pandas import DataFrame, Panel 2 | from pandas.stats.api import fama_macbeth 3 | from .common import assert_almost_equal, BaseTest 4 | 5 | from pandas.compat import range 6 | from pandas import compat 7 | import numpy as np 8 | 9 | 10 | class TestFamaMacBeth(BaseTest): 11 | def testFamaMacBethRolling(self): 12 | # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y, 13 | # nw_lags_beta=2) 14 | 15 | # df = DataFrame(np.random.randn(50, 10)) 16 | x = dict((k, DataFrame(np.random.randn(50, 10))) for k in 'abcdefg') 17 | x = Panel.from_dict(x) 18 | y = (DataFrame(np.random.randn(50, 10)) + 19 | DataFrame(0.01 * np.random.randn(50, 10))) 20 | self.checkFamaMacBethExtended('rolling', x, y, nw_lags_beta=2) 21 | self.checkFamaMacBethExtended('expanding', x, y, nw_lags_beta=2) 22 | 23 | def checkFamaMacBethExtended(self, window_type, x, y, **kwds): 24 | window = 25 25 | 26 | result = fama_macbeth(y=y, x=x, window_type=window_type, window=window, 27 | **kwds) 28 | self._check_stuff_works(result) 29 | 30 | index = result._index 31 | time = len(index) 32 | 33 | for i in range(time - window + 1): 34 | if window_type == 'rolling': 35 | start = index[i] 36 | else: 37 | start = index[0] 38 | 39 | end = index[i + window - 1] 40 | 41 | x2 = {} 42 | for k, v in compat.iteritems(x): 43 | x2[k] = v.truncate(start, end) 44 | y2 = y.truncate(start, end) 45 | 46 | reference = fama_macbeth(y=y2, x=x2, **kwds) 47 | assert_almost_equal(reference._stats, result._stats[:, i]) 48 | 49 | static = fama_macbeth(y=y2, x=x2, **kwds) 50 | self._check_stuff_works(static) 51 | 52 | def _check_stuff_works(self, result): 53 | # does it work? 54 | attrs = ['mean_beta', 'std_beta', 't_stat'] 55 | for attr in attrs: 56 | getattr(result, attr) 57 | 58 | # does it work? 59 | result.summary 60 | 61 | if __name__ == '__main__': 62 | import nose 63 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], 64 | exit=False) 65 | -------------------------------------------------------------------------------- /pandas/stats/tests/test_math.py: -------------------------------------------------------------------------------- 1 | import nose 2 | 3 | from datetime import datetime 4 | from numpy.random import randn 5 | import numpy as np 6 | 7 | from pandas.core.api import Series, DataFrame, date_range 8 | from pandas.util.testing import assert_almost_equal 9 | import pandas.core.datetools as datetools 10 | import pandas.stats.moments as mom 11 | import pandas.util.testing as tm 12 | import pandas.stats.math as pmath 13 | import pandas.tests.test_series as ts 14 | from pandas import ols 15 | 16 | N, K = 100, 10 17 | 18 | _have_statsmodels = True 19 | try: 20 | import statsmodels.api as sm 21 | except ImportError: 22 | try: 23 | import scikits.statsmodels.api as sm 24 | except ImportError: 25 | _have_statsmodels = False 26 | 27 | 28 | class TestMath(tm.TestCase): 29 | 30 | _nan_locs = np.arange(20, 40) 31 | _inf_locs = np.array([]) 32 | 33 | def setUp(self): 34 | arr = randn(N) 35 | arr[self._nan_locs] = np.NaN 36 | 37 | self.arr = arr 38 | self.rng = date_range(datetime(2009, 1, 1), periods=N) 39 | 40 | self.series = Series(arr.copy(), index=self.rng) 41 | 42 | self.frame = DataFrame(randn(N, K), index=self.rng, 43 | columns=np.arange(K)) 44 | 45 | def test_rank_1d(self): 46 | self.assertEqual(1, pmath.rank(self.series)) 47 | self.assertEqual(0, pmath.rank(Series(0, self.series.index))) 48 | 49 | def test_solve_rect(self): 50 | if not _have_statsmodels: 51 | raise nose.SkipTest("no statsmodels") 52 | 53 | b = Series(np.random.randn(N), self.frame.index) 54 | result = pmath.solve(self.frame, b) 55 | expected = ols(y=b, x=self.frame, intercept=False).beta 56 | self.assertTrue(np.allclose(result, expected)) 57 | 58 | def test_inv_illformed(self): 59 | singular = DataFrame(np.array([[1, 1], [2, 2]])) 60 | rs = pmath.inv(singular) 61 | expected = np.array([[0.1, 0.2], [0.1, 0.2]]) 62 | self.assertTrue(np.allclose(rs, expected)) 63 | 64 | if __name__ == '__main__': 65 | import nose 66 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], 67 | exit=False) 68 | -------------------------------------------------------------------------------- /pandas/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tests/__init__.py -------------------------------------------------------------------------------- /pandas/tests/data/mindex_073.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tests/data/mindex_073.pickle -------------------------------------------------------------------------------- /pandas/tests/data/multiindex_v1.pickle: -------------------------------------------------------------------------------- 1 | cnumpy.core.multiarray 2 | _reconstruct 3 | p0 4 | (cpandas.core.index 5 | MultiIndex 6 | p1 7 | (I0 8 | tp2 9 | S'b' 10 | p3 11 | tp4 12 | Rp5 13 | ((I1 14 | (I10 15 | tp6 16 | cnumpy 17 | dtype 18 | p7 19 | (S'O8' 20 | p8 21 | I0 22 | I1 23 | tp9 24 | Rp10 25 | (I3 26 | S'|' 27 | p11 28 | NNNI-1 29 | I-1 30 | I63 31 | tp12 32 | bI00 33 | (lp13 34 | I0 35 | aI1 36 | aI2 37 | aI3 38 | aI4 39 | aI5 40 | aI6 41 | aI7 42 | aI8 43 | aI9 44 | atp14 45 | ((lp15 46 | g0 47 | (cpandas.core.index 48 | Index 49 | p16 50 | (I0 51 | tp17 52 | g3 53 | tp18 54 | Rp19 55 | ((I1 56 | (I4 57 | tp20 58 | g10 59 | I00 60 | (lp21 61 | S'foo' 62 | p22 63 | aS'bar' 64 | p23 65 | aS'baz' 66 | p24 67 | aS'qux' 68 | p25 69 | atp26 70 | (S'first' 71 | p27 72 | tp28 73 | tp29 74 | bag0 75 | (g16 76 | (I0 77 | tp30 78 | g3 79 | tp31 80 | Rp32 81 | ((I1 82 | (I3 83 | tp33 84 | g10 85 | I00 86 | (lp34 87 | S'one' 88 | p35 89 | aS'two' 90 | p36 91 | aS'three' 92 | p37 93 | atp38 94 | (S'second' 95 | p39 96 | tp40 97 | tp41 98 | ba(lp42 99 | g0 100 | (cnumpy 101 | ndarray 102 | p43 103 | (I0 104 | tp44 105 | g3 106 | tp45 107 | Rp46 108 | (I1 109 | (I10 110 | tp47 111 | g7 112 | (S'i4' 113 | p48 114 | I0 115 | I1 116 | tp49 117 | Rp50 118 | (I3 119 | S'<' 120 | p51 121 | NNNI-1 122 | I-1 123 | I0 124 | tp52 125 | bI00 126 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00' 127 | p53 128 | tp54 129 | bag0 130 | (g43 131 | (I0 132 | tp55 133 | g3 134 | tp56 135 | Rp57 136 | (I1 137 | (I10 138 | tp58 139 | g50 140 | I00 141 | S'\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00' 142 | p59 143 | tp60 144 | baN(lp61 145 | g27 146 | ag39 147 | atp62 148 | tp63 149 | b. -------------------------------------------------------------------------------- /pandas/tests/data/unicode_series.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tests/data/unicode_series.csv -------------------------------------------------------------------------------- /pandas/tests/test_msgpack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tests/test_msgpack/__init__.py -------------------------------------------------------------------------------- /pandas/tests/test_msgpack/test_buffer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from pandas.msgpack import packb, unpackb 5 | 6 | 7 | def test_unpack_buffer(): 8 | from array import array 9 | buf = array('b') 10 | buf.fromstring(packb(('foo', 'bar'))) 11 | obj = unpackb(buf, use_list=1) 12 | assert [b'foo', b'bar'] == obj 13 | -------------------------------------------------------------------------------- /pandas/tests/test_msgpack/test_except.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import unittest 5 | import nose 6 | 7 | import datetime 8 | from pandas.msgpack import packb, unpackb 9 | 10 | class DummyException(Exception): 11 | pass 12 | 13 | class TestExceptions(unittest.TestCase): 14 | 15 | def test_raise_on_find_unsupported_value(self): 16 | import datetime 17 | self.assertRaises(TypeError, packb, datetime.datetime.now()) 18 | 19 | def test_raise_from_object_hook(self): 20 | def hook(obj): 21 | raise DummyException 22 | self.assertRaises(DummyException, unpackb, packb({}), object_hook=hook) 23 | self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_hook=hook) 24 | self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_pairs_hook=hook) 25 | self.assertRaises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) 26 | self.assertRaises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook) 27 | 28 | def test_invalidvalue(self): 29 | self.assertRaises(ValueError, unpackb, b'\xd9\x97#DL_') 30 | -------------------------------------------------------------------------------- /pandas/tests/test_msgpack/test_format.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from pandas.msgpack import unpackb 5 | 6 | def check(src, should, use_list=0): 7 | assert unpackb(src, use_list=use_list) == should 8 | 9 | def testSimpleValue(): 10 | check(b"\x93\xc0\xc2\xc3", 11 | (None, False, True,)) 12 | 13 | def testFixnum(): 14 | check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", 15 | ((0,64,127,), (-32,-16,-1,),) 16 | ) 17 | 18 | def testFixArray(): 19 | check(b"\x92\x90\x91\x91\xc0", 20 | ((),((None,),),), 21 | ) 22 | 23 | def testFixRaw(): 24 | check(b"\x94\xa0\xa1a\xa2bc\xa3def", 25 | (b"", b"a", b"bc", b"def",), 26 | ) 27 | 28 | def testFixMap(): 29 | check( 30 | b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", 31 | {False: {None: None}, True:{None:{}}}, 32 | ) 33 | 34 | def testUnsignedInt(): 35 | check( 36 | b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" 37 | b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" 38 | b"\xce\xff\xff\xff\xff", 39 | (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,), 40 | ) 41 | 42 | def testSignedInt(): 43 | check(b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" 44 | b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" 45 | b"\xd2\xff\xff\xff\xff", 46 | (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,)) 47 | 48 | def testRaw(): 49 | check(b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" 50 | b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", 51 | (b"", b"a", b"ab", b"", b"a", b"ab")) 52 | 53 | def testArray(): 54 | check(b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" 55 | b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" 56 | b"\xc2\xc3", 57 | ((), (None,), (False,True), (), (None,), (False,True)) 58 | ) 59 | 60 | def testMap(): 61 | check( 62 | b"\x96" 63 | b"\xde\x00\x00" 64 | b"\xde\x00\x01\xc0\xc2" 65 | b"\xde\x00\x02\xc0\xc2\xc3\xc2" 66 | b"\xdf\x00\x00\x00\x00" 67 | b"\xdf\x00\x00\x00\x01\xc0\xc2" 68 | b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", 69 | ({}, {None: False}, {True: False, None: False}, {}, 70 | {None: False}, {True: False, None: False})) 71 | -------------------------------------------------------------------------------- /pandas/tests/test_msgpack/test_read_size.py: -------------------------------------------------------------------------------- 1 | """Test Unpacker's read_array_header and read_map_header methods""" 2 | from pandas.msgpack import packb, Unpacker, OutOfData 3 | UnexpectedTypeException = ValueError 4 | 5 | def test_read_array_header(): 6 | unpacker = Unpacker() 7 | unpacker.feed(packb(['a', 'b', 'c'])) 8 | assert unpacker.read_array_header() == 3 9 | assert unpacker.unpack() == b'a' 10 | assert unpacker.unpack() == b'b' 11 | assert unpacker.unpack() == b'c' 12 | try: 13 | unpacker.unpack() 14 | assert 0, 'should raise exception' 15 | except OutOfData: 16 | assert 1, 'okay' 17 | 18 | 19 | def test_read_map_header(): 20 | unpacker = Unpacker() 21 | unpacker.feed(packb({'a': 'A'})) 22 | assert unpacker.read_map_header() == 1 23 | assert unpacker.unpack() == B'a' 24 | assert unpacker.unpack() == B'A' 25 | try: 26 | unpacker.unpack() 27 | assert 0, 'should raise exception' 28 | except OutOfData: 29 | assert 1, 'okay' 30 | 31 | def test_incorrect_type_array(): 32 | unpacker = Unpacker() 33 | unpacker.feed(packb(1)) 34 | try: 35 | unpacker.read_array_header() 36 | assert 0, 'should raise exception' 37 | except UnexpectedTypeException: 38 | assert 1, 'okay' 39 | 40 | def test_incorrect_type_map(): 41 | unpacker = Unpacker() 42 | unpacker.feed(packb(1)) 43 | try: 44 | unpacker.read_map_header() 45 | assert 0, 'should raise exception' 46 | except UnexpectedTypeException: 47 | assert 1, 'okay' 48 | 49 | def test_correct_type_nested_array(): 50 | unpacker = Unpacker() 51 | unpacker.feed(packb({'a': ['b', 'c', 'd']})) 52 | try: 53 | unpacker.read_array_header() 54 | assert 0, 'should raise exception' 55 | except UnexpectedTypeException: 56 | assert 1, 'okay' 57 | 58 | def test_incorrect_type_nested_map(): 59 | unpacker = Unpacker() 60 | unpacker.feed(packb([{'a': 'b'}])) 61 | try: 62 | unpacker.read_map_header() 63 | assert 0, 'should raise exception' 64 | except UnexpectedTypeException: 65 | assert 1, 'okay' 66 | -------------------------------------------------------------------------------- /pandas/tests/test_msgpack/test_seq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from pandas import compat 5 | from pandas.compat import u 6 | import pandas.msgpack as msgpack 7 | 8 | binarydata = [chr(i) for i in range(256)] 9 | binarydata = "".join(binarydata) 10 | if compat.PY3: 11 | binarydata = binarydata.encode('utf-8') 12 | 13 | def gen_binary_data(idx): 14 | data = binarydata[:idx % 300] 15 | return data 16 | 17 | def test_exceeding_unpacker_read_size(): 18 | dumpf = compat.BytesIO() 19 | 20 | packer = msgpack.Packer() 21 | 22 | NUMBER_OF_STRINGS = 6 23 | read_size = 16 24 | # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): 25 | # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) 26 | # 40 ok for read_size=1024, while 50 introduces errors 27 | # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): 28 | 29 | for idx in range(NUMBER_OF_STRINGS): 30 | data = gen_binary_data(idx) 31 | dumpf.write(packer.pack(data)) 32 | 33 | f = compat.BytesIO(dumpf.getvalue()) 34 | dumpf.close() 35 | 36 | unpacker = msgpack.Unpacker(f, read_size=read_size, use_list=1) 37 | 38 | read_count = 0 39 | for idx, o in enumerate(unpacker): 40 | assert type(o) == bytes 41 | assert o == gen_binary_data(idx) 42 | read_count += 1 43 | 44 | assert read_count == NUMBER_OF_STRINGS 45 | -------------------------------------------------------------------------------- /pandas/tests/test_msgpack/test_subtype.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from pandas.msgpack import packb, unpackb 5 | from collections import namedtuple 6 | 7 | class MyList(list): 8 | pass 9 | 10 | class MyDict(dict): 11 | pass 12 | 13 | class MyTuple(tuple): 14 | pass 15 | 16 | MyNamedTuple = namedtuple('MyNamedTuple', 'x y') 17 | 18 | def test_types(): 19 | assert packb(MyDict()) == packb(dict()) 20 | assert packb(MyList()) == packb(list()) 21 | assert packb(MyNamedTuple(1, 2)) == packb((1, 2)) 22 | -------------------------------------------------------------------------------- /pandas/tests/test_msgpack/test_unpack_raw.py: -------------------------------------------------------------------------------- 1 | """Tests for cases where the user seeks to obtain packed msgpack objects""" 2 | 3 | from pandas import compat 4 | from pandas.msgpack import Unpacker, packb 5 | 6 | def test_write_bytes(): 7 | unpacker = Unpacker() 8 | unpacker.feed(b'abc') 9 | f = compat.BytesIO() 10 | assert unpacker.unpack(f.write) == ord('a') 11 | assert f.getvalue() == b'a' 12 | f = compat.BytesIO() 13 | assert unpacker.skip(f.write) is None 14 | assert f.getvalue() == b'b' 15 | f = compat.BytesIO() 16 | assert unpacker.skip() is None 17 | assert f.getvalue() == b'' 18 | 19 | 20 | def test_write_bytes_multi_buffer(): 21 | long_val = (5) * 100 22 | expected = packb(long_val) 23 | unpacker = Unpacker(compat.BytesIO(expected), read_size=3, max_buffer_size=3) 24 | 25 | f = compat.BytesIO() 26 | unpacked = unpacker.unpack(f.write) 27 | assert unpacked == long_val 28 | assert f.getvalue() == expected 29 | -------------------------------------------------------------------------------- /pandas/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tools/__init__.py -------------------------------------------------------------------------------- /pandas/tools/describe.py: -------------------------------------------------------------------------------- 1 | from pandas.core.series import Series 2 | 3 | 4 | def value_range(df): 5 | """ 6 | Return the minimum and maximum of a dataframe in a series object 7 | 8 | Parameters 9 | ---------- 10 | df : DataFrame 11 | 12 | Returns 13 | ------- 14 | (maximum, minimum) : Series 15 | 16 | """ 17 | return Series((min(df.min()), max(df.max())), ('Minimum', 'Maximum')) 18 | -------------------------------------------------------------------------------- /pandas/tools/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tools/tests/__init__.py -------------------------------------------------------------------------------- /pandas/tools/tests/test_tools.py: -------------------------------------------------------------------------------- 1 | from pandas import DataFrame 2 | from pandas.tools.describe import value_range 3 | 4 | import numpy as np 5 | import pandas.util.testing as tm 6 | 7 | 8 | class TestTools(tm.TestCase): 9 | 10 | def test_value_range(self): 11 | df = DataFrame(np.random.randn(5, 5)) 12 | df.ix[0, 2] = -5 13 | df.ix[2, 0] = 5 14 | 15 | res = value_range(df) 16 | 17 | self.assertEqual(res['Minimum'], -5) 18 | self.assertEqual(res['Maximum'], 5) 19 | 20 | df.ix[0, 1] = np.NaN 21 | 22 | self.assertEqual(res['Minimum'], -5) 23 | self.assertEqual(res['Maximum'], 5) 24 | -------------------------------------------------------------------------------- /pandas/tools/util.py: -------------------------------------------------------------------------------- 1 | import operator 2 | from pandas.compat import reduce 3 | from pandas.core.index import Index 4 | import numpy as np 5 | from pandas import algos 6 | from pandas.core import common as com 7 | 8 | 9 | def match(needles, haystack): 10 | haystack = Index(haystack) 11 | needles = Index(needles) 12 | return haystack.get_indexer(needles) 13 | 14 | 15 | def cartesian_product(X): 16 | ''' 17 | Numpy version of itertools.product or pandas.compat.product. 18 | Sometimes faster (for large inputs)... 19 | 20 | Examples 21 | -------- 22 | >>> cartesian_product([list('ABC'), [1, 2]]) 23 | [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'), 24 | array([1, 2, 1, 2, 1, 2])] 25 | 26 | ''' 27 | 28 | lenX = np.fromiter((len(x) for x in X), dtype=int) 29 | cumprodX = np.cumproduct(lenX) 30 | 31 | a = np.roll(cumprodX, 1) 32 | a[0] = 1 33 | 34 | b = cumprodX[-1] / cumprodX 35 | 36 | return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]), 37 | np.product(a[i])) 38 | for i, x in enumerate(X)] 39 | 40 | 41 | def _compose2(f, g): 42 | """Compose 2 callables""" 43 | return lambda *args, **kwargs: f(g(*args, **kwargs)) 44 | 45 | 46 | def compose(*funcs): 47 | """Compose 2 or more callables""" 48 | assert len(funcs) > 1, 'At least 2 callables must be passed to compose' 49 | return reduce(_compose2, funcs) 50 | -------------------------------------------------------------------------------- /pandas/tseries/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/__init__.py -------------------------------------------------------------------------------- /pandas/tseries/api.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | 6 | from pandas.tseries.index import DatetimeIndex, date_range, bdate_range 7 | from pandas.tseries.frequencies import infer_freq 8 | from pandas.tseries.period import Period, PeriodIndex, period_range, pnow 9 | from pandas.tseries.resample import TimeGrouper 10 | from pandas.tseries.timedeltas import to_timedelta 11 | from pandas.lib import NaT 12 | import pandas.tseries.offsets as offsets 13 | -------------------------------------------------------------------------------- /pandas/tseries/interval.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pandas.core.index import Index 4 | 5 | 6 | class Interval(object): 7 | """ 8 | Represents an interval of time defined by two timestamps 9 | """ 10 | 11 | def __init__(self, start, end): 12 | self.start = start 13 | self.end = end 14 | 15 | 16 | class PeriodInterval(object): 17 | """ 18 | Represents an interval of time defined by two Period objects (time ordinals) 19 | """ 20 | 21 | def __init__(self, start, end): 22 | self.start = start 23 | self.end = end 24 | 25 | 26 | class IntervalIndex(Index): 27 | """ 28 | 29 | """ 30 | def __new__(self, starts, ends): 31 | pass 32 | 33 | def dtype(self): 34 | return self.values.dtype 35 | 36 | if __name__ == '__main__': 37 | pass 38 | -------------------------------------------------------------------------------- /pandas/tseries/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/tests/__init__.py -------------------------------------------------------------------------------- /pandas/tseries/tests/data/daterange_073.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/tests/data/daterange_073.pickle -------------------------------------------------------------------------------- /pandas/tseries/tests/data/frame.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/tests/data/frame.pickle -------------------------------------------------------------------------------- /pandas/tseries/tests/data/series.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/tests/data/series.pickle -------------------------------------------------------------------------------- /pandas/tseries/tests/data/series_daterange0.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/tseries/tests/data/series_daterange0.pickle -------------------------------------------------------------------------------- /pandas/tslib.pxd: -------------------------------------------------------------------------------- 1 | from numpy cimport ndarray, int64_t 2 | 3 | cdef convert_to_tsobject(object, object, object) 4 | cdef convert_to_timedelta64(object, object, object) 5 | -------------------------------------------------------------------------------- /pandas/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/pandas/util/__init__.py -------------------------------------------------------------------------------- /pandas/util/misc.py: -------------------------------------------------------------------------------- 1 | """ various miscellaneous utilities """ 2 | 3 | def is_little_endian(): 4 | """ am I little endian """ 5 | import sys 6 | return sys.byteorder == 'little' 7 | 8 | def exclusive(*args): 9 | count = sum([arg is not None for arg in args]) 10 | return count == 1 11 | -------------------------------------------------------------------------------- /scripts/bench_join.R: -------------------------------------------------------------------------------- 1 | library(xts) 2 | 3 | iterations <- 50 4 | 5 | ns = c(100, 1000, 10000, 100000, 1000000) 6 | kinds = c("outer", "left", "inner") 7 | 8 | result = matrix(0, nrow=3, ncol=length(ns)) 9 | n <- 100000 10 | pct.overlap <- 0.2 11 | 12 | k <- 1 13 | 14 | for (ni in 1:length(ns)){ 15 | n <- ns[ni] 16 | rng1 <- 1:n 17 | offset <- as.integer(n * pct.overlap) 18 | rng2 <- rng1 + offset 19 | x <- xts(matrix(rnorm(n * k), nrow=n, ncol=k), 20 | as.POSIXct(Sys.Date()) + rng1) 21 | y <- xts(matrix(rnorm(n * k), nrow=n, ncol=k), 22 | as.POSIXct(Sys.Date()) + rng2) 23 | timing <- numeric() 24 | for (i in 1:3) { 25 | kind = kinds[i] 26 | for(j in 1:iterations) { 27 | gc() # just to be sure 28 | timing[j] <- system.time(merge(x,y,join=kind))[3] 29 | } 30 | #timing <- system.time(for (j in 1:iterations) merge.xts(x, y, join=kind), 31 | # gcFirst=F) 32 | #timing <- as.list(timing) 33 | result[i, ni] <- mean(timing) * 1000 34 | #result[i, ni] = (timing$elapsed / iterations) * 1000 35 | } 36 | } 37 | 38 | rownames(result) <- kinds 39 | colnames(result) <- log10(ns) 40 | 41 | mat <- matrix(rnorm(500000), nrow=100000, ncol=5) 42 | set.seed(12345) 43 | indexer <- sample(1:100000) 44 | 45 | timing <- rep(0, 10) 46 | for (i in 1:10) { 47 | gc() 48 | timing[i] = system.time(mat[indexer,])[3] 49 | } 50 | 51 | -------------------------------------------------------------------------------- /scripts/bench_join_multi.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | 3 | import numpy as np 4 | from pandas.compat import zip, range, lzip 5 | from pandas.util.testing import rands 6 | import pandas.lib as lib 7 | 8 | N = 100000 9 | 10 | key1 = [rands(10) for _ in range(N)] 11 | key2 = [rands(10) for _ in range(N)] 12 | 13 | zipped = lzip(key1, key2) 14 | 15 | 16 | def _zip(*args): 17 | arr = np.empty(N, dtype=object) 18 | arr[:] = lzip(*args) 19 | return arr 20 | 21 | 22 | def _zip2(*args): 23 | return lib.list_to_object_array(lzip(*args)) 24 | 25 | index = MultiIndex.from_arrays([key1, key2]) 26 | to_join = DataFrame({'j1': np.random.randn(100000)}, index=index) 27 | 28 | data = DataFrame({'A': np.random.randn(500000), 29 | 'key1': np.repeat(key1, 5), 30 | 'key2': np.repeat(key2, 5)}) 31 | 32 | # data.join(to_join, on=['key1', 'key2']) 33 | -------------------------------------------------------------------------------- /scripts/bench_refactor.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | from pandas.compat import range 3 | try: 4 | import pandas.core.internals as internals 5 | reload(internals) 6 | import pandas.core.frame as frame 7 | reload(frame) 8 | from pandas.core.frame import DataFrame as DataMatrix 9 | except ImportError: 10 | pass 11 | 12 | N = 1000 13 | K = 500 14 | 15 | 16 | def horribly_unconsolidated(): 17 | index = np.arange(N) 18 | 19 | df = DataMatrix(index=index) 20 | 21 | for i in range(K): 22 | df[i] = float(K) 23 | 24 | return df 25 | 26 | 27 | def bench_reindex_index(df, it=100): 28 | new_idx = np.arange(0, N, 2) 29 | for i in range(it): 30 | df.reindex(new_idx) 31 | 32 | 33 | def bench_reindex_columns(df, it=100): 34 | new_cols = np.arange(0, K, 2) 35 | for i in range(it): 36 | df.reindex(columns=new_cols) 37 | 38 | 39 | def bench_join_index(df, it=10): 40 | left = df.reindex(index=np.arange(0, N, 2), 41 | columns=np.arange(K // 2)) 42 | right = df.reindex(columns=np.arange(K // 2 + 1, K)) 43 | for i in range(it): 44 | joined = left.join(right) 45 | 46 | if __name__ == '__main__': 47 | df = horribly_unconsolidated() 48 | left = df.reindex(index=np.arange(0, N, 2), 49 | columns=np.arange(K // 2)) 50 | right = df.reindex(columns=np.arange(K // 2 + 1, K)) 51 | bench_join_index(df) 52 | -------------------------------------------------------------------------------- /scripts/boxplot_test.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | import random 4 | import pandas.util.testing as tm 5 | tm.N = 1000 6 | df = tm.makeTimeDataFrame() 7 | import string 8 | foo = list(string.letters[:5]) * 200 9 | df['indic'] = list(string.letters[:5]) * 200 10 | random.shuffle(foo) 11 | df['indic2'] = foo 12 | df.boxplot(by=['indic', 'indic2'], fontsize=8, rot=90) 13 | 14 | plt.show() 15 | -------------------------------------------------------------------------------- /scripts/count_code.sh: -------------------------------------------------------------------------------- 1 | cloc pandas --force-lang=Python,pyx --not-match-f="parser.c|lib.c|tslib.c|sandbox.c|hashtable.c|sparse.c|algos.c|index.c" -------------------------------------------------------------------------------- /scripts/faster_xs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pandas.util.testing as tm 4 | 5 | from pandas.core.internals import _interleaved_dtype 6 | 7 | df = tm.makeDataFrame() 8 | 9 | df['E'] = 'foo' 10 | df['F'] = 'foo' 11 | df['G'] = 2 12 | df['H'] = df['A'] > 0 13 | 14 | blocks = df._data.blocks 15 | items = df.columns 16 | -------------------------------------------------------------------------------- /scripts/git_code_churn.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | import re 4 | import sys 5 | 6 | import numpy as np 7 | 8 | from pandas import * 9 | 10 | 11 | if __name__ == '__main__': 12 | from vbench.git import GitRepo 13 | repo = GitRepo('/Users/wesm/code/pandas') 14 | churn = repo.get_churn_by_file() 15 | 16 | file_include = [] 17 | for path in churn.major_axis: 18 | if path.endswith('.pyx') or path.endswith('.py'): 19 | file_include.append(path) 20 | commits_include = [sha for sha in churn.minor_axis 21 | if 'LF' not in repo.messages[sha]] 22 | commits_include.remove('dcf3490') 23 | 24 | clean_churn = churn.reindex(major=file_include, minor=commits_include) 25 | 26 | by_commit = clean_churn.sum('major').sum(1) 27 | 28 | by_date = by_commit.groupby(repo.commit_date).sum() 29 | 30 | by_date = by_date.drop([datetime(2011, 6, 10)]) 31 | 32 | # clean out days where I touched Cython 33 | 34 | by_date = by_date[by_date < 5000] 35 | -------------------------------------------------------------------------------- /scripts/groupby_sample.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | import numpy as np 3 | import string 4 | import pandas.compat as compat 5 | 6 | g1 = np.array(list(string.letters))[:-1] 7 | g2 = np.arange(510) 8 | df_small = DataFrame({'group1': ["a", "b", "a", "a", "b", "c", "c", "c", "c", 9 | "c", "a", "a", "a", "b", "b", "b", "b"], 10 | 'group2': [1, 2, 3, 4, 1, 3, 5, 6, 5, 4, 1, 2, 3, 4, 3, 2, 1], 11 | 'value': ["apple", "pear", "orange", "apple", 12 | "banana", "durian", "lemon", "lime", 13 | "raspberry", "durian", "peach", "nectarine", 14 | "banana", "lemon", "guava", "blackberry", 15 | "grape"]}) 16 | value = df_small['value'].values.repeat(3) 17 | df = DataFrame({'group1': g1.repeat(4000 * 5), 18 | 'group2': np.tile(g2, 400 * 5), 19 | 'value': value.repeat(4000 * 5)}) 20 | 21 | 22 | def random_sample(): 23 | grouped = df.groupby(['group1', 'group2'])['value'] 24 | from random import choice 25 | choose = lambda group: choice(group.index) 26 | indices = grouped.apply(choose) 27 | return df.reindex(indices) 28 | 29 | 30 | def random_sample_v2(): 31 | grouped = df.groupby(['group1', 'group2'])['value'] 32 | from random import choice 33 | choose = lambda group: choice(group.index) 34 | indices = [choice(v) for k, v in compat.iteritems(grouped.groups)] 35 | return df.reindex(indices) 36 | 37 | 38 | def do_shuffle(arr): 39 | from random import shuffle 40 | result = arr.copy().values 41 | shuffle(result) 42 | return result 43 | 44 | 45 | def shuffle_uri(df, grouped): 46 | perm = np.r_[tuple([np.random.permutation( 47 | idxs) for idxs in compat.itervalues(grouped.groups)])] 48 | df['state_permuted'] = np.asarray(df.ix[perm]['value']) 49 | 50 | df2 = df.copy() 51 | grouped = df2.groupby('group1') 52 | shuffle_uri(df2, grouped) 53 | 54 | df2['state_perm'] = grouped['value'].transform(do_shuffle) 55 | -------------------------------------------------------------------------------- /scripts/groupby_speed.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from pandas import * 3 | 4 | rng = DatetimeIndex('1/3/2011', '11/30/2011', offset=datetools.Minute()) 5 | 6 | df = DataFrame(np.random.randn(len(rng), 5), index=rng, 7 | columns=list('OHLCV')) 8 | 9 | rng5 = DatetimeIndex('1/3/2011', '11/30/2011', offset=datetools.Minute(5)) 10 | gp = rng5.asof 11 | grouped = df.groupby(gp) 12 | 13 | 14 | def get1(dt): 15 | k = gp(dt) 16 | return grouped.get_group(k) 17 | 18 | 19 | def get2(dt): 20 | k = gp(dt) 21 | return df.ix[grouped.groups[k]] 22 | 23 | 24 | def f(): 25 | for i, date in enumerate(df.index): 26 | if i % 10000 == 0: 27 | print(i) 28 | get1(date) 29 | 30 | 31 | def g(): 32 | for i, date in enumerate(df.index): 33 | if i % 10000 == 0: 34 | print(i) 35 | get2(date) 36 | -------------------------------------------------------------------------------- /scripts/hdfstore_panel_perf.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | from pandas.util.testing import rands 3 | from pandas.compat import range 4 | 5 | i, j, k = 7, 771, 5532 6 | 7 | panel = Panel(np.random.randn(i, j, k), 8 | items=[rands(10) for _ in range(i)], 9 | major_axis=DatetimeIndex('1/1/2000', periods=j, 10 | offset=datetools.Minute()), 11 | minor_axis=[rands(10) for _ in range(k)]) 12 | 13 | 14 | store = HDFStore('test.h5') 15 | store.put('test_panel', panel, table=True) 16 | 17 | retrieved = store['test_panel'] 18 | -------------------------------------------------------------------------------- /scripts/leak.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | from pandas.compat import range 3 | import numpy as np 4 | import pandas.util.testing as tm 5 | import os 6 | import psutil 7 | 8 | pid = os.getpid() 9 | proc = psutil.Process(pid) 10 | 11 | df = DataFrame(index=np.arange(100)) 12 | for i in range(5000): 13 | df[i] = 5 14 | -------------------------------------------------------------------------------- /scripts/parser_magic.py: -------------------------------------------------------------------------------- 1 | from pandas.util.testing import set_trace 2 | import pandas.util.testing as tm 3 | import pandas.compat as compat 4 | 5 | from pandas import * 6 | import ast 7 | import inspect 8 | import sys 9 | 10 | 11 | def merge(a, b): 12 | f, args, _ = parse_stmt(inspect.currentframe().f_back) 13 | return DataFrame({args[0]: a, 14 | args[1]: b}) 15 | 16 | 17 | def parse_stmt(frame): 18 | info = inspect.getframeinfo(frame) 19 | call = info[-2][0] 20 | mod = ast.parse(call) 21 | body = mod.body[0] 22 | if isinstance(body, (ast.Assign, ast.Expr)): 23 | call = body.value 24 | elif isinstance(body, ast.Call): 25 | call = body 26 | return _parse_call(call) 27 | 28 | 29 | def _parse_call(call): 30 | func = _maybe_format_attribute(call.func) 31 | 32 | str_args = [] 33 | for arg in call.args: 34 | if isinstance(arg, ast.Name): 35 | str_args.append(arg.id) 36 | elif isinstance(arg, ast.Call): 37 | formatted = _format_call(arg) 38 | str_args.append(formatted) 39 | 40 | return func, str_args, {} 41 | 42 | 43 | def _format_call(call): 44 | func, args, kwds = _parse_call(call) 45 | content = '' 46 | if args: 47 | content += ', '.join(args) 48 | if kwds: 49 | fmt_kwds = ['%s=%s' % item for item in compat.iteritems(kwds)] 50 | joined_kwds = ', '.join(fmt_kwds) 51 | if args: 52 | content = content + ', ' + joined_kwds 53 | else: 54 | content += joined_kwds 55 | return '%s(%s)' % (func, content) 56 | 57 | 58 | def _maybe_format_attribute(name): 59 | if isinstance(name, ast.Attribute): 60 | return _format_attribute(name) 61 | return name.id 62 | 63 | 64 | def _format_attribute(attr): 65 | obj = attr.value 66 | if isinstance(attr.value, ast.Attribute): 67 | obj = _format_attribute(attr.value) 68 | else: 69 | obj = obj.id 70 | return '.'.join((obj, attr.attr)) 71 | 72 | a = tm.makeTimeSeries() 73 | b = tm.makeTimeSeries() 74 | df = merge(a, b) 75 | -------------------------------------------------------------------------------- /scripts/preepoch_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pandas import * 3 | 4 | 5 | def panda_test(): 6 | 7 | # generate some data 8 | data = np.random.rand(50, 5) 9 | # generate some dates 10 | dates = DatetimeIndex('1/1/1969', periods=50) 11 | # generate column headings 12 | cols = ['A', 'B', 'C', 'D', 'E'] 13 | 14 | df = DataFrame(data, index=dates, columns=cols) 15 | 16 | # save to HDF5Store 17 | store = HDFStore('bugzilla.h5', mode='w') 18 | store['df'] = df # This gives: OverflowError: mktime argument out of range 19 | store.close() 20 | 21 | 22 | if __name__ == '__main__': 23 | panda_test() 24 | -------------------------------------------------------------------------------- /scripts/roll_median_leak.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from pandas import * 3 | 4 | import numpy as np 5 | import os 6 | 7 | from vbench.api import Benchmark 8 | from pandas.util.testing import rands 9 | from pandas.compat import range 10 | import pandas.lib as lib 11 | import pandas._sandbox as sbx 12 | import time 13 | 14 | import psutil 15 | 16 | pid = os.getpid() 17 | proc = psutil.Process(pid) 18 | 19 | lst = SparseList() 20 | lst.append([5] * 10000) 21 | lst.append(np.repeat(np.nan, 1000000)) 22 | 23 | for _ in range(10000): 24 | print(proc.get_memory_info()) 25 | sdf = SparseDataFrame({'A': lst.to_array()}) 26 | chunk = sdf[sdf['A'] == 5] 27 | -------------------------------------------------------------------------------- /scripts/runtests.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | print(os.getpid()) 4 | import nose 5 | nose.main('pandas.core') 6 | -------------------------------------------------------------------------------- /scripts/test_py25.bat: -------------------------------------------------------------------------------- 1 | SET PATH=C:\MinGW\bin;C:\Python25;C:\Python25\Scripts;%PATH% 2 | del pandas\_tseries.pyd 3 | del pandas\_sparse.pyd 4 | del pandas\src\tseries.c 5 | del pandas\src\sparse.c 6 | python setup.py clean 7 | python setup.py build_ext -c mingw32 --inplace 8 | nosetests pandas -------------------------------------------------------------------------------- /scripts/test_py26.bat: -------------------------------------------------------------------------------- 1 | SET PATH=C:\MinGW\bin;E:\Python26;E:\Python26\Scripts;%PATH% 2 | del pandas\_tseries.pyd 3 | del pandas\_sparse.pyd 4 | del pandas\src\tseries.c 5 | del pandas\src\sparse.c 6 | python setup.py clean 7 | python setup.py build_ext -c mingw32 --inplace 8 | nosetests pandas -------------------------------------------------------------------------------- /scripts/test_py27.bat: -------------------------------------------------------------------------------- 1 | SET PATH=C:\MinGW\bin;C:\Python27;C:\Python27\Scripts;%PATH% 2 | 3 | python setup.py clean 4 | python setup.py build_ext -c mingw32 --inplace 5 | 6 | nosetests pandas -------------------------------------------------------------------------------- /scripts/test_py31.bat: -------------------------------------------------------------------------------- 1 | set BASE=E:\python31 2 | set PYTHON=%BASE%\python.exe 3 | set NOSETESTS=%BASE%\scripts\nosetests-script.py 4 | 5 | %PYTHON% setup.py install 6 | cd bench 7 | %PYTHON% %NOSETESTS% pandas 8 | cd .. -------------------------------------------------------------------------------- /scripts/test_py32.bat: -------------------------------------------------------------------------------- 1 | set BASE=E:\python32 2 | set PYTHON=%BASE%\python.exe 3 | set NOSETESTS=%BASE%\scripts\nosetests-script.py 4 | 5 | %PYTHON% setup.py install 6 | cd bench 7 | %PYTHON% %NOSETESTS% pandas 8 | cd .. -------------------------------------------------------------------------------- /scripts/touchup_gh_issues.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | from collections import OrderedDict 6 | import sys 7 | import re 8 | 9 | """ 10 | Reads in stdin, replace all occurences of '#num' or 'GH #num' with 11 | links to github issue. dumps the issue anchors before the next 12 | section header 13 | """ 14 | 15 | pat = "((?:\s*GH\s*)?)#(\d{3,4})([^_]|$)?" 16 | rep_pat = r"\1GH\2_\3" 17 | anchor_pat = ".. _GH{id}: https://github.com/pydata/pandas/issues/{id}" 18 | section_pat = "^pandas\s[\d\.]+\s*$" 19 | 20 | 21 | def main(): 22 | issues = OrderedDict() 23 | while True: 24 | 25 | line = sys.stdin.readline() 26 | if not line: 27 | break 28 | 29 | if re.search(section_pat, line): 30 | for id in issues: 31 | print(anchor_pat.format(id=id).rstrip()) 32 | if issues: 33 | print("\n") 34 | issues = OrderedDict() 35 | 36 | for m in re.finditer(pat, line): 37 | id = m.group(2) 38 | if id not in issues: 39 | issues[id] = True 40 | print(re.sub(pat, rep_pat, line).rstrip()) 41 | pass 42 | 43 | if __name__ == "__main__": 44 | main() 45 | -------------------------------------------------------------------------------- /scripts/winbuild_py25.bat: -------------------------------------------------------------------------------- 1 | SET PATH=C:\MinGW\bin;C:\Python25;C:\Python25\Scripts;%PATH% 2 | python setup.py build -c mingw32 bdist_wininst 3 | -------------------------------------------------------------------------------- /scripts/winbuild_py27.bat: -------------------------------------------------------------------------------- 1 | SET PATH=C:\MinGW\bin;C:\Python27;C:\Python27\Scripts;%PATH% 2 | python setup.py build -c mingw32 bdist_wininst 3 | -------------------------------------------------------------------------------- /scripts/windows_builder/build_26-32.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo "starting 26-32" 3 | 4 | 5 | title 26-32 build 6 | echo "building" 7 | cd "c:\users\Jeff Reback\documents\github\pandas" 8 | C:\python26-32\python.exe setup.py build > build.26-32.log 2>&1 9 | 10 | echo "installing" 11 | C:\python26-32\python.exe setup.py bdist --formats=wininst > install.26-32.log 2>&1 12 | 13 | echo "testing" 14 | C:\python26-32\scripts\nosetests -A "not slow" build\lib.win32-2.6\pandas > test.26-32.log 2>&1 15 | 16 | echo "versions" 17 | cd build\lib.win32-2.6 18 | C:\python26-32\python.exe ../../ci/print_versions.py > ../../versions.26-32.log 2>&1 19 | 20 | 21 | exit 22 | -------------------------------------------------------------------------------- /scripts/windows_builder/build_26-64.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo "starting 26-64" 3 | 4 | setlocal EnableDelayedExpansion 5 | set MSSdk=1 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x64 /release 7 | set DISTUTILS_USE_SDK=1 8 | 9 | title 26-64 build 10 | echo "building" 11 | cd "c:\users\Jeff Reback\documents\github\pandas" 12 | C:\python26-64\python.exe setup.py build > build.26-64.log 2>&1 13 | 14 | echo "installing" 15 | C:\python26-64\python.exe setup.py bdist --formats=wininst > install.26-64.log 2>&1 16 | 17 | echo "testing" 18 | C:\python26-64\scripts\nosetests -A "not slow" build\lib.win-amd64-2.6\pandas > test.26-64.log 2>&1 19 | 20 | echo "versions" 21 | cd build\lib.win-amd64-2.6 22 | C:\python26-64\python.exe ../../ci/print_versions.py > ../../versions.26-64.log 2>&1 23 | 24 | 25 | exit 26 | -------------------------------------------------------------------------------- /scripts/windows_builder/build_27-32.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo "starting 27-32" 3 | 4 | setlocal EnableDelayedExpansion 5 | set MSSdk=1 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x86 /release 7 | set DISTUTILS_USE_SDK=1 8 | 9 | title 27-32 build 10 | echo "building" 11 | cd "c:\users\Jeff Reback\documents\github\pandas" 12 | C:\python27-32\python.exe setup.py build > build.27-32.log 2>&1 13 | 14 | title "installing" 15 | C:\python27-32\python.exe setup.py bdist --formats=wininst > install.27-32.log 2>&1 16 | 17 | echo "testing" 18 | C:\python27-32\scripts\nosetests -A "not slow" build\lib.win32-2.7\pandas > test.27-32.log 2>&1 19 | 20 | echo "versions" 21 | cd build\lib.win32-2.7 22 | C:\python27-32\python.exe ../../ci/print_versions.py > ../../versions.27-32.log 2>&1 23 | 24 | exit 25 | 26 | -------------------------------------------------------------------------------- /scripts/windows_builder/build_27-64.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo "starting 27-64" 3 | 4 | setlocal EnableDelayedExpansion 5 | set MSSdk=1 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x64 /release 7 | set DISTUTILS_USE_SDK=1 8 | 9 | title 27-64 build 10 | echo "building" 11 | cd "c:\users\Jeff Reback\documents\github\pandas" 12 | C:\python27-64\python.exe setup.py build > build.27-64.log 2>&1 13 | 14 | echo "installing" 15 | C:\python27-64\python.exe setup.py bdist --formats=wininst > install.27-64.log 2>&1 16 | 17 | echo "testing" 18 | C:\python27-64\scripts\nosetests -A "not slow" build\lib.win-amd64-2.7\pandas > test.27-64.log 2>&1 19 | 20 | echo "versions" 21 | cd build\lib.win-amd64-2.7 22 | C:\python27-64\python.exe ../../ci/print_versions.py > ../../versions.27-64.log 2>&1 23 | 24 | exit 25 | 26 | -------------------------------------------------------------------------------- /scripts/windows_builder/build_33-32.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo "starting 33-32" 3 | 4 | setlocal EnableDelayedExpansion 5 | set MSSdk=1 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x86 /release 7 | set DISTUTILS_USE_SDK=1 8 | 9 | title 33-32 build 10 | echo "building" 11 | cd "c:\users\Jeff Reback\documents\github\pandas" 12 | C:\python33-32\python.exe setup.py build > build.33-32.log 2>&1 13 | 14 | echo "installing" 15 | C:\python33-32\python.exe setup.py bdist --formats=wininst > install.33-32.log 2>&1 16 | 17 | echo "testing" 18 | C:\python33-32\scripts\nosetests -A "not slow" build\lib.win32-3.3\pandas > test.33-32.log 2>&1 19 | 20 | echo "versions" 21 | cd build\lib.win32-3.3 22 | C:\python33-32\python.exe ../../ci/print_versions.py > ../../versions.33-32.log 2>&1 23 | 24 | exit 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /scripts/windows_builder/build_33-64.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo "starting 33-64" 3 | 4 | setlocal EnableDelayedExpansion 5 | set MSSdk=1 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release 7 | set DISTUTILS_USE_SDK=1 8 | 9 | title 33-64 build 10 | echo "building" 11 | cd "c:\users\Jeff Reback\documents\github\pandas" 12 | C:\python33-64\python.exe setup.py build > build.33-64.log 2>&1 13 | 14 | echo "installing" 15 | C:\python33-64\python.exe setup.py bdist --formats=wininst > install.33-64.log 2>&1 16 | 17 | echo "testing" 18 | C:\python33-64\scripts\nosetests -A "not slow" build\lib.win-amd64-3.3\pandas > test.33-64.log 2>&1 19 | 20 | echo "versions" 21 | cd build\lib.win-amd64-3.3 22 | C:\python33-64\python.exe ../../ci/print_versions.py > ../../versions.33-64.log 2>&1 23 | 24 | exit 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /scripts/windows_builder/build_34-32.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo "starting 34-32" 3 | 4 | setlocal EnableDelayedExpansion 5 | set MSSdk=1 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x86 /release 7 | set DISTUTILS_USE_SDK=1 8 | 9 | title 34-32 build 10 | echo "building" 11 | cd "c:\users\Jeff Reback\documents\github\pandas" 12 | C:\python34-32\python.exe setup.py build > build.34-32.log 2>&1 13 | 14 | echo "installing" 15 | C:\python34-32\python.exe setup.py bdist --formats=wininst > install.34-32.log 2>&1 16 | 17 | echo "testing" 18 | C:\python34-32\scripts\nosetests -A "not slow" build\lib.win32-3.4\pandas > test.34-32.log 2>&1 19 | 20 | echo "versions" 21 | cd build\lib.win32-3.4 22 | C:\python34-32\python.exe ../../ci/print_versions.py > ../../versions.34-32.log 2>&1 23 | 24 | exit 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /scripts/windows_builder/build_34-64.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo "starting 34-64" 3 | 4 | setlocal EnableDelayedExpansion 5 | set MSSdk=1 6 | CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release 7 | set DISTUTILS_USE_SDK=1 8 | 9 | title 34-64 build 10 | echo "building" 11 | cd "c:\users\Jeff Reback\documents\github\pandas" 12 | C:\python34-64\python.exe setup.py build > build.34-64.log 2>&1 13 | 14 | echo "installing" 15 | C:\python34-64\python.exe setup.py bdist --formats=wininst > install.34-64.log 2>&1 16 | 17 | echo "testing" 18 | C:\python34-64\scripts\nosetests -A "not slow" build\lib.win-amd64-3.4\pandas > test.34-64.log 2>&1 19 | 20 | echo "versions" 21 | cd build\lib.win-amd64-3.4 22 | C:\python34-64\python.exe ../../ci/print_versions.py > ../../versions.34-64.log 2>&1 23 | 24 | exit 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /scripts/windows_builder/check_and_build.bat: -------------------------------------------------------------------------------- 1 | set PYTHONPATH=c:/python27-64/lib 2 | c:/python27-64/python.exe c:/Builds/check_and_build.py %1 %2 %3 %4 %4 %6 %7 %8 %9 3 | -------------------------------------------------------------------------------- /scripts/windows_builder/readme.txt: -------------------------------------------------------------------------------- 1 | This is a collection of windows batch scripts (and a python script) 2 | to rebuild the binaries, test, and upload the binaries for public distribution 3 | upon a commit on github. 4 | 5 | Obviously requires that these be setup on windows 6 | Requires an install of Windows SDK 3.5 and 4.0 7 | Full python installs for each version with the deps 8 | 9 | Currently supporting 10 | 11 | 26-32,26-64,27-32,27-64,33-32,33-64,34-32,34-64 12 | 13 | Note that 33 and 34 use the 4.0 SDK, while the other suse 3.5 SDK 14 | 15 | I installed these scripts in C:\Builds 16 | 17 | Installed libaries in C:\Installs 18 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | command -v coverage >/dev/null && coverage erase 3 | command -v python-coverage >/dev/null && python-coverage erase 4 | # nosetests pandas/tests/test_index.py --with-coverage --cover-package=pandas.core --pdb-failure --pdb 5 | #nosetests -w pandas --with-coverage --cover-package=pandas --pdb-failure --pdb #--cover-inclusive 6 | #nosetests -A "not slow" -w pandas/tseries --with-coverage --cover-package=pandas.tseries $* #--cover-inclusive 7 | nosetests -w pandas --with-coverage --cover-package=pandas $* 8 | # nosetests -w pandas/io --with-coverage --cover-package=pandas.io --pdb-failure --pdb 9 | # nosetests -w pandas/core --with-coverage --cover-package=pandas.core --pdb-failure --pdb 10 | # nosetests -w pandas/stats --with-coverage --cover-package=pandas.stats 11 | # coverage run runtests.py 12 | -------------------------------------------------------------------------------- /test_fast.sh: -------------------------------------------------------------------------------- 1 | nosetests -A "not slow and not network" pandas --with-id $* 2 | -------------------------------------------------------------------------------- /test_multi.sh: -------------------------------------------------------------------------------- 1 | nosetests -A "not slow and not network" pandas --processes=4 $* 2 | -------------------------------------------------------------------------------- /test_perf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | CURDIR=$(pwd) 4 | BASEDIR=$(cd "$(dirname "$0")"; pwd) 5 | python "$BASEDIR"/vb_suite/test_perf.py $@ 6 | -------------------------------------------------------------------------------- /test_rebuild.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | python setup.py clean 4 | python setup.py build_ext --inplace 5 | coverage erase 6 | # nosetests pandas/tests/test_index.py --with-coverage --cover-package=pandas.core --pdb-failure --pdb 7 | #nosetests -w pandas --with-coverage --cover-package=pandas --pdb-failure --pdb #--cover-inclusive 8 | nosetests -w pandas --with-coverage --cover-package=pandas $* #--cover-inclusive 9 | # nosetests -w pandas/io --with-coverage --cover-package=pandas.io --pdb-failure --pdb 10 | # nosetests -w pandas/core --with-coverage --cover-package=pandas.core --pdb-failure --pdb 11 | # nosetests -w pandas/stats --with-coverage --cover-package=pandas.stats 12 | # coverage run runtests.py 13 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (http://tox.testrun.org/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py26, py27, py32, py33, py34 8 | 9 | [testenv] 10 | deps = 11 | cython 12 | nose 13 | pytz>=2011k 14 | python-dateutil 15 | beautifulsoup4 16 | lxml 17 | openpyxl<2.0.0 18 | xlsxwriter 19 | xlrd 20 | six 21 | sqlalchemy 22 | 23 | # cd to anything but the default {toxinidir} which 24 | # contains the pandas subdirectory and confuses 25 | # nose away from the fresh install in site-packages 26 | changedir = {envdir} 27 | 28 | commands = 29 | # TODO: --exe because of GH #761 30 | {envbindir}/nosetests --exe pandas {posargs:-A "not network and not disabled"} 31 | # cleanup the temp. build dir created by the tox build 32 | # /bin/rm -rf {toxinidir}/build 33 | 34 | # quietly rollback the install. 35 | # Note this line will only be reached if the 36 | # previous lines succeed (in particular, the tests), 37 | # but an uninstall is really only required when 38 | # files are removed from the source tree, in which case, 39 | # stale versions of files will will remain in the venv 40 | # until the next time uninstall is run. 41 | # 42 | # tox should provide a preinstall-commands hook. 43 | pip uninstall pandas -qy 44 | 45 | [testenv:py26] 46 | deps = 47 | numpy==1.6.1 48 | boto 49 | bigquery 50 | {[testenv]deps} 51 | 52 | [testenv:py27] 53 | deps = 54 | numpy==1.8.1 55 | boto 56 | bigquery 57 | {[testenv]deps} 58 | 59 | [testenv:py32] 60 | deps = 61 | numpy==1.7.1 62 | {[testenv]deps} 63 | 64 | [testenv:py33] 65 | deps = 66 | numpy==1.8.0 67 | {[testenv]deps} 68 | 69 | [testenv:py34] 70 | deps = 71 | numpy==1.8.0 72 | {[testenv]deps} 73 | -------------------------------------------------------------------------------- /vb_suite/.gitignore: -------------------------------------------------------------------------------- 1 | benchmarks.db 2 | build/* 3 | source/vbench/* 4 | source/*.rst -------------------------------------------------------------------------------- /vb_suite/attrs_caching.py: -------------------------------------------------------------------------------- 1 | from vbench.benchmark import Benchmark 2 | 3 | common_setup = """from pandas_vb_common import * 4 | """ 5 | 6 | #---------------------------------------------------------------------- 7 | # DataFrame.index / columns property lookup time 8 | 9 | setup = common_setup + """ 10 | df = DataFrame(np.random.randn(10, 6)) 11 | cur_index = df.index 12 | """ 13 | stmt = "foo = df.index" 14 | 15 | getattr_dataframe_index = Benchmark(stmt, setup, 16 | name="getattr_dataframe_index") 17 | 18 | stmt = "df.index = cur_index" 19 | setattr_dataframe_index = Benchmark(stmt, setup, 20 | name="setattr_dataframe_index") 21 | -------------------------------------------------------------------------------- /vb_suite/ctors.py: -------------------------------------------------------------------------------- 1 | from vbench.benchmark import Benchmark 2 | from datetime import datetime 3 | 4 | common_setup = """from pandas_vb_common import * 5 | """ 6 | 7 | #---------------------------------------------------------------------- 8 | # Series constructors 9 | 10 | setup = common_setup + """ 11 | data = np.random.randn(100) 12 | index = Index(np.arange(100)) 13 | """ 14 | 15 | ctor_series_ndarray = \ 16 | Benchmark("Series(data, index=index)", setup=setup, 17 | name='series_constructor_ndarray') 18 | 19 | setup = common_setup + """ 20 | arr = np.random.randn(100, 100) 21 | """ 22 | 23 | ctor_frame_ndarray = \ 24 | Benchmark("DataFrame(arr)", setup=setup, 25 | name='frame_constructor_ndarray') 26 | 27 | setup = common_setup + """ 28 | data = np.array(['foo', 'bar', 'baz'], dtype=object) 29 | """ 30 | 31 | ctor_index_array_string = Benchmark('Index(data)', setup=setup) 32 | 33 | # index constructors 34 | setup = common_setup + """ 35 | s = Series([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')]*1000) 36 | """ 37 | index_from_series_ctor = Benchmark('Index(s)', setup=setup) 38 | 39 | dtindex_from_series_ctor = Benchmark('DatetimeIndex(s)', setup=setup) 40 | -------------------------------------------------------------------------------- /vb_suite/generate_rst_files.py: -------------------------------------------------------------------------------- 1 | from suite import benchmarks, generate_rst_files 2 | generate_rst_files(benchmarks) 3 | -------------------------------------------------------------------------------- /vb_suite/inference.py: -------------------------------------------------------------------------------- 1 | from vbench.api import Benchmark 2 | from datetime import datetime 3 | import sys 4 | 5 | # from GH 7332 6 | 7 | setup = """from pandas_vb_common import * 8 | import pandas as pd 9 | N = 500000 10 | df_int64 = DataFrame(dict(A = np.arange(N,dtype='int64'), B = np.arange(N,dtype='int64'))) 11 | df_int32 = DataFrame(dict(A = np.arange(N,dtype='int32'), B = np.arange(N,dtype='int32'))) 12 | df_uint32 = DataFrame(dict(A = np.arange(N,dtype='uint32'), B = np.arange(N,dtype='uint32'))) 13 | df_float64 = DataFrame(dict(A = np.arange(N,dtype='float64'), B = np.arange(N,dtype='float64'))) 14 | df_float32 = DataFrame(dict(A = np.arange(N,dtype='float32'), B = np.arange(N,dtype='float32'))) 15 | df_datetime64 = DataFrame(dict(A = pd.to_datetime(np.arange(N,dtype='int64'),unit='ms'), 16 | B = pd.to_datetime(np.arange(N,dtype='int64'),unit='ms'))) 17 | df_timedelta64 = DataFrame(dict(A = df_datetime64['A']-df_datetime64['B'], 18 | B = df_datetime64['B'])) 19 | """ 20 | 21 | dtype_infer_int64 = Benchmark('df_int64["A"] + df_int64["B"]', setup, 22 | start_date=datetime(2014, 1, 1)) 23 | dtype_infer_int32 = Benchmark('df_int32["A"] + df_int32["B"]', setup, 24 | start_date=datetime(2014, 1, 1)) 25 | dtype_infer_uint32 = Benchmark('df_uint32["A"] + df_uint32["B"]', setup, 26 | start_date=datetime(2014, 1, 1)) 27 | dtype_infer_float64 = Benchmark('df_float64["A"] + df_float64["B"]', setup, 28 | start_date=datetime(2014, 1, 1)) 29 | dtype_infer_float32 = Benchmark('df_float32["A"] + df_float32["B"]', setup, 30 | start_date=datetime(2014, 1, 1)) 31 | dtype_infer_datetime64 = Benchmark('df_datetime64["A"] - df_datetime64["B"]', setup, 32 | start_date=datetime(2014, 1, 1)) 33 | dtype_infer_timedelta64_1 = Benchmark('df_timedelta64["A"] + df_timedelta64["B"]', setup, 34 | start_date=datetime(2014, 1, 1)) 35 | dtype_infer_timedelta64_2 = Benchmark('df_timedelta64["A"] + df_timedelta64["A"]', setup, 36 | start_date=datetime(2014, 1, 1)) 37 | -------------------------------------------------------------------------------- /vb_suite/measure_memory_consumption.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | 6 | """Short one-line summary 7 | 8 | long summary 9 | """ 10 | 11 | 12 | def main(): 13 | import shutil 14 | import tempfile 15 | import warnings 16 | 17 | from pandas import Series 18 | 19 | from vbench.api import BenchmarkRunner 20 | from suite import (REPO_PATH, BUILD, DB_PATH, PREPARE, 21 | dependencies, benchmarks) 22 | 23 | from memory_profiler import memory_usage 24 | 25 | warnings.filterwarnings('ignore', category=FutureWarning) 26 | 27 | try: 28 | TMP_DIR = tempfile.mkdtemp() 29 | runner = BenchmarkRunner( 30 | benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, 31 | TMP_DIR, PREPARE, always_clean=True, 32 | # run_option='eod', start_date=START_DATE, 33 | module_dependencies=dependencies) 34 | results = {} 35 | for b in runner.benchmarks: 36 | k = b.name 37 | try: 38 | vs = memory_usage((b.run,)) 39 | v = max(vs) 40 | # print(k, v) 41 | results[k] = v 42 | except Exception as e: 43 | print("Exception caught in %s\n" % k) 44 | print(str(e)) 45 | 46 | s = Series(results) 47 | s.sort() 48 | print((s)) 49 | 50 | finally: 51 | shutil.rmtree(TMP_DIR) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /vb_suite/miscellaneous.py: -------------------------------------------------------------------------------- 1 | from vbench.benchmark import Benchmark 2 | from datetime import datetime 3 | 4 | common_setup = """from pandas_vb_common import * 5 | """ 6 | 7 | #---------------------------------------------------------------------- 8 | # cache_readonly 9 | 10 | setup = common_setup + """ 11 | from pandas.util.decorators import cache_readonly 12 | 13 | class Foo: 14 | 15 | @cache_readonly 16 | def prop(self): 17 | return 5 18 | obj = Foo() 19 | """ 20 | misc_cache_readonly = Benchmark("obj.prop", setup, name="misc_cache_readonly", 21 | ncalls=2000000) 22 | 23 | #---------------------------------------------------------------------- 24 | # match 25 | 26 | setup = common_setup + """ 27 | from pandas.util.testing import rands 28 | 29 | uniques = np.array([rands(10) for _ in xrange(1000)], dtype='O') 30 | all = uniques.repeat(10) 31 | """ 32 | 33 | match_strings = Benchmark("match(all, uniques)", setup, 34 | start_date=datetime(2012, 5, 12)) 35 | -------------------------------------------------------------------------------- /vb_suite/pandas_vb_common.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | from pandas.util.testing import rands 3 | from datetime import timedelta 4 | from numpy.random import randn 5 | from numpy.random import randint 6 | from numpy.random import permutation 7 | import pandas.util.testing as tm 8 | import random 9 | import numpy as np 10 | 11 | try: 12 | import pandas._tseries as lib 13 | except: 14 | import pandas.lib as lib 15 | 16 | try: 17 | Panel = WidePanel 18 | except Exception: 19 | pass 20 | 21 | # didn't add to namespace until later 22 | try: 23 | from pandas.core.index import MultiIndex 24 | except ImportError: 25 | pass 26 | -------------------------------------------------------------------------------- /vb_suite/panel_methods.py: -------------------------------------------------------------------------------- 1 | from vbench.api import Benchmark 2 | from datetime import datetime 3 | 4 | common_setup = """from pandas_vb_common import * 5 | """ 6 | 7 | #---------------------------------------------------------------------- 8 | # shift 9 | 10 | setup = common_setup + """ 11 | index = date_range(start="2000", freq="D", periods=1000) 12 | panel = Panel(np.random.randn(100, len(index), 1000)) 13 | """ 14 | 15 | panel_shift = Benchmark('panel.shift(1)', setup, 16 | start_date=datetime(2012, 1, 12)) 17 | 18 | panel_shift_minor = Benchmark('panel.shift(1, axis="minor")', setup, 19 | start_date=datetime(2012, 1, 12)) 20 | 21 | panel_pct_change_major = Benchmark('panel.pct_change(1, axis="major")', setup, 22 | start_date=datetime(2014, 4, 19)) 23 | 24 | panel_pct_change_minor = Benchmark('panel.pct_change(1, axis="minor")', setup, 25 | start_date=datetime(2014, 4, 19)) 26 | 27 | panel_pct_change_items = Benchmark('panel.pct_change(1, axis="items")', setup, 28 | start_date=datetime(2014, 4, 19)) 29 | -------------------------------------------------------------------------------- /vb_suite/plotting.py: -------------------------------------------------------------------------------- 1 | from vbench.benchmark import Benchmark 2 | from datetime import datetime 3 | 4 | common_setup = """from pandas_vb_common import * 5 | 6 | try: 7 | from pandas import date_range 8 | except ImportError: 9 | def date_range(start=None, end=None, periods=None, freq=None): 10 | return DatetimeIndex(start, end, periods=periods, offset=freq) 11 | 12 | """ 13 | 14 | #----------------------------------------------------------------------------- 15 | # Timeseries plotting 16 | 17 | setup = common_setup + """ 18 | N = 2000 19 | M = 5 20 | df = DataFrame(np.random.randn(N,M), index=date_range('1/1/1975', periods=N)) 21 | """ 22 | 23 | plot_timeseries_period = Benchmark("df.plot()", setup=setup, 24 | name='plot_timeseries_period') 25 | 26 | -------------------------------------------------------------------------------- /vb_suite/replace.py: -------------------------------------------------------------------------------- 1 | from vbench.api import Benchmark 2 | from datetime import datetime 3 | 4 | common_setup = """from pandas_vb_common import * 5 | from datetime import timedelta 6 | 7 | N = 1000000 8 | 9 | try: 10 | rng = date_range('1/1/2000', periods=N, freq='min') 11 | except NameError: 12 | rng = DatetimeIndex('1/1/2000', periods=N, offset=datetools.Minute()) 13 | date_range = DateRange 14 | 15 | ts = Series(np.random.randn(N), index=rng) 16 | """ 17 | 18 | large_dict_setup = """from pandas_vb_common import * 19 | from pandas.compat import range 20 | n = 10 ** 6 21 | start_value = 10 ** 5 22 | to_rep = dict((i, start_value + i) for i in range(n)) 23 | s = Series(np.random.randint(n, size=10 ** 3)) 24 | """ 25 | 26 | replace_fillna = Benchmark('ts.fillna(0., inplace=True)', common_setup, 27 | name='replace_fillna', 28 | start_date=datetime(2012, 4, 4)) 29 | replace_replacena = Benchmark('ts.replace(np.nan, 0., inplace=True)', 30 | common_setup, 31 | name='replace_replacena', 32 | start_date=datetime(2012, 5, 15)) 33 | replace_large_dict = Benchmark('s.replace(to_rep, inplace=True)', 34 | large_dict_setup, 35 | name='replace_large_dict', 36 | start_date=datetime(2014, 4, 6)) 37 | -------------------------------------------------------------------------------- /vb_suite/run_suite.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from vbench.api import BenchmarkRunner 3 | from suite import * 4 | 5 | 6 | def run_process(): 7 | runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_URL, 8 | BUILD, DB_PATH, TMP_DIR, PREPARE, 9 | always_clean=True, 10 | run_option='eod', start_date=START_DATE, 11 | module_dependencies=dependencies) 12 | runner.run() 13 | 14 | if __name__ == '__main__': 15 | run_process() 16 | -------------------------------------------------------------------------------- /vb_suite/series_methods.py: -------------------------------------------------------------------------------- 1 | from vbench.api import Benchmark 2 | from datetime import datetime 3 | 4 | common_setup = """from pandas_vb_common import * 5 | """ 6 | 7 | setup = common_setup + """ 8 | s1 = Series(np.random.randn(10000)) 9 | s2 = Series(np.random.randint(1, 10, 10000)) 10 | """ 11 | 12 | series_nlargest1 = Benchmark('s1.nlargest(3, take_last=True);' 13 | 's1.nlargest(3, take_last=False)', 14 | setup, 15 | start_date=datetime(2014, 1, 25)) 16 | series_nlargest2 = Benchmark('s2.nlargest(3, take_last=True);' 17 | 's2.nlargest(3, take_last=False)', 18 | setup, 19 | start_date=datetime(2014, 1, 25)) 20 | 21 | series_nsmallest2 = Benchmark('s1.nsmallest(3, take_last=True);' 22 | 's1.nsmallest(3, take_last=False)', 23 | setup, 24 | start_date=datetime(2014, 1, 25)) 25 | 26 | series_nsmallest2 = Benchmark('s2.nsmallest(3, take_last=True);' 27 | 's2.nsmallest(3, take_last=False)', 28 | setup, 29 | start_date=datetime(2014, 1, 25)) 30 | -------------------------------------------------------------------------------- /vb_suite/source/_static/stub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/vb_suite/source/_static/stub -------------------------------------------------------------------------------- /vb_suite/source/themes/agogo/static/bgfooter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/vb_suite/source/themes/agogo/static/bgfooter.png -------------------------------------------------------------------------------- /vb_suite/source/themes/agogo/static/bgtop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/pandas/db8533f90200c689c84e9620492eed0f3d6da016/vb_suite/source/themes/agogo/static/bgtop.png -------------------------------------------------------------------------------- /vb_suite/source/themes/agogo/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | stylesheet = agogo.css 4 | pygments_style = tango 5 | 6 | [options] 7 | bodyfont = "Verdana", Arial, sans-serif 8 | headerfont = "Georgia", "Times New Roman", serif 9 | pagewidth = 70em 10 | documentwidth = 50em 11 | sidebarwidth = 20em 12 | bgcolor = #eeeeec 13 | headerbg = url(bgtop.png) top left repeat-x 14 | footerbg = url(bgfooter.png) top left repeat-x 15 | linkcolor = #ce5c00 16 | headercolor1 = #204a87 17 | headercolor2 = #3465a4 18 | headerlinkcolor = #fcaf3e 19 | textalign = justify -------------------------------------------------------------------------------- /vb_suite/sparse.py: -------------------------------------------------------------------------------- 1 | from vbench.benchmark import Benchmark 2 | from datetime import datetime 3 | 4 | common_setup = """from pandas_vb_common import * 5 | """ 6 | 7 | #---------------------------------------------------------------------- 8 | 9 | setup = common_setup + """ 10 | from pandas.core.sparse import SparseSeries, SparseDataFrame 11 | 12 | K = 50 13 | N = 50000 14 | rng = np.asarray(date_range('1/1/2000', periods=N, 15 | freq='T')) 16 | 17 | # rng2 = np.asarray(rng).astype('M8[ns]').astype('i8') 18 | 19 | series = {} 20 | for i in range(1, K + 1): 21 | data = np.random.randn(N)[:-i] 22 | this_rng = rng[:-i] 23 | data[100:] = np.nan 24 | series[i] = SparseSeries(data, index=this_rng) 25 | """ 26 | stmt = "SparseDataFrame(series)" 27 | 28 | bm_sparse1 = Benchmark(stmt, setup, name="sparse_series_to_frame", 29 | start_date=datetime(2011, 6, 1)) 30 | 31 | 32 | setup = common_setup + """ 33 | from pandas.core.sparse import SparseDataFrame 34 | """ 35 | 36 | stmt = "SparseDataFrame(columns=np.arange(100), index=np.arange(1000))" 37 | 38 | sparse_constructor = Benchmark(stmt, setup, name="sparse_frame_constructor", 39 | start_date=datetime(2012, 6, 1)) 40 | -------------------------------------------------------------------------------- /vb_suite/test.py: -------------------------------------------------------------------------------- 1 | from pandas import * 2 | import matplotlib.pyplot as plt 3 | 4 | import sqlite3 5 | 6 | from vbench.git import GitRepo 7 | 8 | 9 | REPO_PATH = '/home/adam/code/pandas' 10 | repo = GitRepo(REPO_PATH) 11 | 12 | con = sqlite3.connect('vb_suite/benchmarks.db') 13 | 14 | bmk = '36900a889961162138c140ce4ae3c205' 15 | # bmk = '9d7b8c04b532df6c2d55ef497039b0ce' 16 | bmk = '4481aa4efa9926683002a673d2ed3dac' 17 | bmk = '00593cd8c03d769669d7b46585161726' 18 | bmk = '3725ab7cd0a0657d7ae70f171c877cea' 19 | bmk = '3cd376d6d6ef802cdea49ac47a67be21' 20 | bmk2 = '459225186023853494bc345fd180f395' 21 | bmk = 'c22ca82e0cfba8dc42595103113c7da3' 22 | bmk = 'e0e651a8e9fbf0270ab68137f8b9df5f' 23 | bmk = '96bda4b9a60e17acf92a243580f2a0c3' 24 | 25 | 26 | def get_results(bmk): 27 | results = con.execute( 28 | "select * from results where checksum='%s'" % bmk).fetchall() 29 | x = Series(dict((t[1], t[3]) for t in results)) 30 | x.index = x.index.map(repo.timestamps.get) 31 | x = x.sort_index() 32 | return x 33 | 34 | x = get_results(bmk) 35 | 36 | 37 | def graph1(): 38 | dm_getitem = get_results('459225186023853494bc345fd180f395') 39 | dm_getvalue = get_results('c22ca82e0cfba8dc42595103113c7da3') 40 | 41 | plt.figure() 42 | ax = plt.gca() 43 | 44 | dm_getitem.plot(label='df[col][idx]', ax=ax) 45 | dm_getvalue.plot(label='df.get_value(idx, col)', ax=ax) 46 | 47 | plt.ylabel('ms') 48 | plt.legend(loc='best') 49 | 50 | 51 | def graph2(): 52 | bm = get_results('96bda4b9a60e17acf92a243580f2a0c3') 53 | plt.figure() 54 | ax = plt.gca() 55 | 56 | bm.plot(ax=ax) 57 | plt.ylabel('ms') 58 | 59 | bm = get_results('36900a889961162138c140ce4ae3c205') 60 | fig = plt.figure() 61 | ax = plt.gca() 62 | bm.plot(ax=ax) 63 | fig.autofmt_xdate() 64 | 65 | plt.xlim([bm.dropna().index[0] - datetools.MonthEnd(), 66 | bm.dropna().index[-1] + datetools.MonthEnd()]) 67 | plt.ylabel('ms') 68 | -------------------------------------------------------------------------------- /vb_suite/timedelta.py: -------------------------------------------------------------------------------- 1 | from vbench.api import Benchmark 2 | from datetime import datetime 3 | 4 | common_setup = """from pandas_vb_common import * 5 | from pandas import to_timedelta 6 | """ 7 | 8 | #---------------------------------------------------------------------- 9 | # conversion 10 | 11 | setup = common_setup + """ 12 | arr = np.random.randint(0,1000,size=10000) 13 | """ 14 | 15 | stmt = "to_timedelta(arr,unit='s')" 16 | timedelta_convert_int = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1)) 17 | 18 | setup = common_setup + """ 19 | arr = np.random.randint(0,1000,size=10000) 20 | arr = [ '{0} days'.format(i) for i in arr ] 21 | """ 22 | 23 | stmt = "to_timedelta(arr)" 24 | timedelta_convert_string = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1)) 25 | 26 | setup = common_setup + """ 27 | arr = np.random.randint(0,60,size=10000) 28 | arr = [ '00:00:{0:02d}'.format(i) for i in arr ] 29 | """ 30 | 31 | stmt = "to_timedelta(arr)" 32 | timedelta_convert_string_seconds = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1)) 33 | --------------------------------------------------------------------------------