├── .gitignore ├── README.md ├── example ├── readme.txt └── tokyo_gwr.py └── pygwr ├── __init__.py ├── gaussian.py ├── gwr.py └── gwstatsmodels ├── __init__.py ├── api.py ├── base ├── __init__.py ├── data.py ├── model.py ├── tests │ ├── __init__.py │ ├── test_data.py │ └── test_shrink_pickle.py └── wrapper.py ├── compatnp ├── __init__.py ├── collections.py ├── iter_compat.py ├── ordereddict.py ├── py3k.py └── tests │ └── test_itercompat.py ├── datasets ├── COPYING ├── README.txt ├── __init__.py ├── anes96 │ ├── __init__.py │ ├── anes96.csv │ ├── data.py │ └── src │ │ └── anes96.csv ├── ccard │ ├── R_wls.s │ ├── __init__.py │ ├── ccard.csv │ ├── data.py │ └── src │ │ ├── ccard.csv │ │ └── names.txt ├── committee │ ├── R_committee.s │ ├── __init__.py │ ├── committee.csv │ ├── data.py │ └── src │ │ └── committee.dat ├── copper │ ├── __init__.py │ ├── copper.csv │ ├── data.py │ └── src │ │ └── copper.dat ├── cpunish │ ├── R_cpunish.s │ ├── __init__.py │ ├── cpunish.csv │ ├── data.py │ └── src │ │ └── cpunish.dat ├── elnino │ ├── __init__.py │ ├── data.py │ ├── elnino.csv │ └── src │ │ └── elnino.dat ├── grunfeld │ ├── __init__.py │ ├── data.py │ ├── grunfeld.csv │ └── src │ │ └── grunfeld.csv ├── longley │ ├── R_gls.s │ ├── R_lm.s │ ├── __init__.py │ ├── data.py │ └── longley.csv ├── macrodata │ ├── __init__.py │ ├── data.py │ ├── macrodata.csv │ ├── macrodata.dta │ └── src │ │ ├── macrodata.xls │ │ └── macrodata.xls │ │ └── unemp.csv ├── nile │ ├── __init__.py │ ├── data.py │ └── nile.csv ├── randhie │ ├── __init__.py │ ├── data.py │ ├── randhie.csv │ └── src │ │ ├── randdesc.txt │ │ └── randhie.csv ├── scotland │ ├── R_scotvote.s │ ├── __init__.py │ ├── data.py │ ├── scotvote.csv │ └── src │ │ ├── scotland.readme │ │ ├── scotland_births.html │ │ ├── scotland_changes.html │ │ ├── scotland_devolution.html │ │ ├── scotland_econ_summary.html │ │ ├── scotland_economics.html │ │ ├── scotland_education.html │ │ ├── scotland_housing.html │ │ ├── scotland_population.csv │ │ ├── scotland_population.html │ │ ├── scotvote.csv │ │ └── scotvote.dat ├── spector │ ├── __init__.py │ ├── data.py │ └── spector.csv ├── stackloss │ ├── R_stackloss.s │ ├── __init__.py │ ├── data.py │ └── stackloss.csv ├── star98 │ ├── __init__.py │ ├── data.py │ ├── r_glm.s │ ├── src │ │ ├── star.bi.dat │ │ ├── star98.dat │ │ └── star98.names │ └── star98.csv ├── strikes │ ├── __init__.py │ ├── data.py │ └── strikes.csv ├── sunspots │ ├── R_sunspots.s │ ├── __init__.py │ ├── arima_mod.R │ ├── data.py │ ├── src │ │ ├── sunspots_monthly.dat │ │ └── sunspots_yearly.dat │ └── sunspots.csv └── template_data.py ├── discrete ├── __init__.py ├── discrete_model.py └── tests │ ├── __init__.py │ ├── results │ ├── __init__.py │ ├── nbinom_resids.csv │ ├── phat_mnlogit.csv │ ├── results_discrete.py │ ├── yhat_mnlogit.csv │ └── yhat_poisson.csv │ └── test_discrete.py ├── distributions ├── __init__.py ├── empirical_distribution.py └── tests │ ├── __init__.py │ └── test_ecdf.py ├── genmod ├── __init__.py ├── families │ ├── __init__.py │ ├── family.py │ ├── links.py │ └── varfuncs.py ├── generalized_linear_model.py └── tests │ ├── __init__.py │ ├── results │ ├── __init__.py │ ├── glm_test_resids.py │ ├── igaussident_resids.csv │ ├── inv_gaussian.csv │ ├── iris.csv │ ├── medparlogresids.csv │ ├── results_glm.py │ ├── stata_cancer_glm.csv │ ├── stata_lbw_glm.csv │ └── stata_medpar1_glm.csv │ └── test_glm.py ├── graphics ├── __init__.py ├── api.py ├── boxplots.py ├── correlation.py ├── functional.py ├── gofplots.py ├── plot_grids.py ├── regressionplots.py ├── tests │ ├── __init__.py │ ├── test_boxplots.py │ ├── test_functional.py │ ├── test_gofplots.py │ ├── test_regressionplots.py │ └── test_tsaplots.py ├── tsaplots.py ├── tukeyplot.py └── utils.py ├── info.py ├── interface └── __init__.py ├── iolib ├── __init__.py ├── foreign.py ├── smpickle.py ├── stata_summary_examples.py ├── summary.py ├── table.py ├── tableformatting.py └── tests │ ├── __init__.py │ ├── results │ ├── __init__.py │ ├── macrodata.npy_ │ └── macrodata.py │ ├── test_data.csv │ ├── test_foreign.py │ ├── test_pickle.py │ ├── test_summary.py │ ├── test_summary_old.py │ ├── test_table.py │ └── test_table_econpy.py ├── miscmodels ├── __init__.py ├── count.py ├── nonlinls.py ├── tests │ ├── __init__.py │ └── test_poisson.py ├── tmodel.py └── try_mlecov.py ├── nonparametric ├── __init__.py ├── bandwidths.py ├── kde.py ├── kdetools.py ├── lowess.py ├── setup.py └── tests │ ├── Xi_test_data.csv │ ├── __init__.py │ ├── results │ ├── __init__.py │ ├── results_kde.csv │ ├── results_kde_fft.csv │ └── results_kde_weights.csv │ ├── test_kde.py │ └── test_lowess.py ├── regression ├── __init__.py ├── feasible_gls.py ├── linear_model.py └── tests │ ├── __init__.py │ ├── results │ ├── __init__.py │ ├── leverage_influence_ols_nostars.txt │ └── results_regression.py │ ├── test_cov.py │ ├── test_glsar_gretl.py │ └── test_regression.py ├── resampling └── __init__.py ├── robust ├── __init__.py ├── norms.py ├── robust_linear_model.py ├── scale.py └── tests │ ├── __init__.py │ ├── results │ ├── __init__.py │ └── results_rlm.py │ ├── test_rlm.py │ └── test_scale.py ├── sandbox ├── __init__.py ├── archive │ ├── __init__.py │ ├── linalg_covmat.py │ ├── linalg_decomp_1.py │ └── tsa.py ├── bspline.py ├── contrast_old.py ├── cox.py ├── datarich │ ├── __init__.py │ └── factormodels.py ├── descstats.py ├── distributions │ ├── __init__.py │ ├── copula.py │ ├── estimators.py │ ├── examples │ │ ├── __init__.py │ │ ├── ex_fitfr.py │ │ ├── ex_gof.py │ │ ├── ex_mvelliptical.py │ │ ├── ex_transf2.py │ │ └── matchdist.py │ ├── extras.py │ ├── genpareto.py │ ├── gof_new.py │ ├── mixture_rvs.py │ ├── multivariate.py │ ├── mv_measures.py │ ├── mv_normal.py │ ├── otherdist.py │ ├── quantize.py │ ├── sppatch.py │ ├── tests │ │ ├── __init__.py │ │ ├── _est_fit.py │ │ ├── check_moments.py │ │ ├── distparams.py │ │ ├── test_extras.py │ │ ├── test_multivariate.py │ │ └── testtransf.py │ ├── transform_functions.py │ ├── transformed.py │ ├── try_max.py │ └── try_pot.py ├── formula.py ├── gam.py ├── infotheo.py ├── km_class.py ├── mcevaluate │ ├── __init__.py │ └── arma.py ├── mle.py ├── nonparametric │ ├── __init__.py │ ├── densityorthopoly.py │ ├── kde2.py │ ├── kdecovclass.py │ ├── kernels.py │ ├── smoothers.py │ ├── testdata.py │ └── tests │ │ ├── ex_gam_am_new.py │ │ ├── ex_gam_new.py │ │ ├── ex_smoothers.py │ │ └── test_smoothers.py ├── panel │ ├── __init__.py │ ├── correlation_structures.py │ ├── ex_sandwich.py │ ├── ex_sandwich2.py │ ├── ex_sandwich3.py │ ├── mixed.py │ ├── panel_short.py │ ├── panelmod.py │ ├── random_panel.py │ ├── sandwich_covariance.py │ ├── sandwich_covariance_generic.py │ ├── test_data.txt │ └── test_sandwich.py ├── pca.py ├── regression │ ├── __init__.py │ ├── anova_nistcertified.py │ ├── ar_panel.py │ ├── example_kernridge.py │ ├── gmm.py │ ├── kernridgeregress_class.py │ ├── numdiff.py │ ├── ols_anova_original.py │ ├── onewaygls.py │ ├── penalized.py │ ├── predstd.py │ ├── runmnl.py │ ├── sympy_diff.py │ ├── test_numdiff.py │ ├── tools.py │ ├── treewalkerclass.py │ ├── try_catdata.py │ ├── try_ols_anova.py │ └── try_treewalker.py ├── rls.py ├── stats │ ├── __init__.py │ ├── contrast_tools.py │ ├── diagnostic.py │ ├── ex_newtests.py │ ├── multicomp.py │ ├── runs.py │ ├── stats_dhuard.py │ ├── stats_mstats_short.py │ └── tests │ │ └── __init__.py ├── survival.py ├── survival2.py ├── sysreg.py ├── tests │ ├── GreeneEx15_1.s │ ├── __init__.py │ ├── datamlw.py │ ├── macrodata.s │ ├── maketests_mlabwrap.py │ ├── model_results.py │ ├── savervs.py │ ├── sysreg.s │ ├── test_bspline.py.txt │ ├── test_formula.py │ ├── test_gam.py │ └── test_pca.py ├── tools │ ├── __init__.py │ ├── cross_val.py │ ├── mctools.py │ ├── tools_pca.py │ └── try_mctools.py ├── tsa │ ├── __init__.py │ ├── diffusion.py │ ├── diffusion2.py │ ├── example_arma.py │ ├── fftarma.py │ ├── garch.py │ ├── movstat.py │ ├── try_arma_more.py │ ├── try_fi.py │ ├── try_var_convolve.py │ └── varma.py └── utils_old.py ├── setup.py ├── stats ├── __init__.py ├── adnorm.py ├── api.py ├── contrast.py ├── descriptivestats.py ├── diagnostic.py ├── gof.py ├── libqsturng │ ├── CH.r │ ├── LICENSE.txt │ ├── __init__.py │ ├── make_tbls.py │ ├── qsturng.py │ └── tests │ │ ├── __init__.py │ │ ├── bootleg.dat │ │ └── test_qsturng.py ├── lilliefors.py ├── moment_helpers.py ├── multicomp.py ├── multitest.py ├── outliers_influence.py ├── stattools.py ├── tabledist.py ├── tests │ ├── __init__.py │ ├── results │ │ ├── influence_lsdiag_R.json │ │ ├── influence_measures_R.csv │ │ └── influence_measures_bool_R.csv │ ├── test_contrast.py │ ├── test_diagnostic.py │ ├── test_moment_helpers.py │ ├── test_multi.py │ ├── test_pairwise.py │ ├── test_qsturng.py │ ├── test_statstools.py │ └── test_weightstats.py └── weightstats.py ├── tests ├── R_ig.s ├── R_lbw.s ├── __init__.py ├── check_for_rpy.py ├── coverage_sm.py ├── results │ ├── __init__.py │ ├── cancer_resids.csv │ ├── cancerdata.csv │ ├── cancerident_resids.csv │ ├── gaussinvlink_resids.csv.xxx │ ├── gaussinvlinkdata.csv.xxx │ └── glm_gaussian_log_resid.csv.xxx └── rmodelwrap.py ├── tools ├── __init__.py ├── catadd.py ├── compatibility.py ├── data.py ├── datautils.py ├── decorators.py ├── dump2module.py ├── eval_measures.py ├── grouputils.py ├── linalg.py ├── parallel.py ├── sm_exceptions.py ├── tests │ ├── __init__.py │ ├── test_catadd.py │ ├── test_data.py │ ├── test_eval_measures.py │ ├── test_parallel.py │ └── test_tools.py ├── tools.py └── wrappers.py ├── tsa ├── __init__.py ├── adfvalues.py ├── api.py ├── ar_model.py ├── arima_model.py ├── arima_process.py ├── arma_mle.py ├── base │ ├── __init__.py │ ├── datetools.py │ ├── tests │ │ ├── __init__.py │ │ └── test_datetools.py │ └── tsa_model.py ├── descriptivestats.py ├── filters │ ├── __init__.py │ ├── bk_filter.py │ ├── cf_filter.py │ ├── filtertools.py │ ├── hp_filter.py │ └── tests │ │ ├── __init__.py │ │ └── test_filters.py ├── interp │ ├── __init__.py │ ├── denton.py │ └── tests │ │ └── test_denton.py ├── kalmanf │ ├── __init__.py │ ├── kalmanfilter.py │ └── setup.py ├── mlemodel.py ├── setup.py ├── stattools.py ├── tests │ ├── __init__.py │ ├── results │ │ ├── ARMLEConstantPredict.csv │ │ ├── AROLSConstantPredict.csv │ │ ├── AROLSNoConstantPredict.csv │ │ ├── __init__.py │ │ ├── datamlw_tls.py │ │ ├── make_arma.py │ │ ├── resids_css_c.csv │ │ ├── resids_css_nc.csv │ │ ├── resids_exact_c.csv │ │ ├── resids_exact_nc.csv │ │ ├── results_ar.py │ │ ├── results_arma.py │ │ ├── results_arma_forecasts.csv │ │ ├── results_corrgram.csv │ │ ├── results_process.py │ │ ├── savedrvs.py │ │ ├── y_arma_data.csv │ │ ├── yhat_css_c.csv │ │ ├── yhat_css_nc.csv │ │ ├── yhat_exact_c.csv │ │ └── yhat_exact_nc.csv │ ├── test_ar.py │ ├── test_arima_process.py │ ├── test_arma.py │ ├── test_stattools.py │ └── test_tsa_tools.py ├── tsatools.py ├── varma_process.py └── vector_ar │ ├── __init__.py │ ├── api.py │ ├── data │ ├── e1.dat │ ├── e2.dat │ ├── e3.dat │ ├── e4.dat │ ├── e5.dat │ └── e6.dat │ ├── dynamic.py │ ├── irf.py │ ├── output.py │ ├── plotting.py │ ├── svar_model.py │ ├── tests │ ├── __init__.py │ ├── example_svar.py │ ├── results │ │ ├── __init__.py │ │ ├── results_svar.py │ │ ├── results_var.py │ │ ├── results_var_data.py │ │ └── vars_results.npz │ ├── test_svar.py │ └── test_var.py │ ├── util.py │ └── var_model.py └── version.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | *.DS_Store 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Packages 9 | *.egg 10 | *.egg-info 11 | dist 12 | build 13 | eggs 14 | parts 15 | bin 16 | var 17 | sdist 18 | develop-eggs 19 | .installed.cfg 20 | lib 21 | lib64 22 | 23 | # Installer logs 24 | pip-log.txt 25 | 26 | # Unit test / coverage reports 27 | .coverage 28 | .tox 29 | nosetests.xml 30 | 31 | # Translations 32 | *.mo 33 | 34 | # Mr Developer 35 | .mr.developer.cfg 36 | .project 37 | .pydevproject 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pygwr: a simple GWR in Python 2 | ============================= 3 | 4 | Python library for Geographically Weighted Regression. Both Gaussian and Poisson GWR are supported. 5 | 6 | pygwr builds on top of the statsmodels Python package (http://statsmodels.sourceforge.net). statsmodels provides all statistical algorithms underlying to GWR. pygwr uses a slightly modified version of statsmodels for supporting geographically weighted Poisson regression. pygwr implements all the weighting scheme of GWR. 7 | 8 | An example of how to use pygwr can be found in the example folder, in the tokyo_gwr.py script. The readme.txt file in the example folder gives the instructions where to find the required dataset. 9 | 10 | 11 | Copyright 12 | --------- 13 | 14 | Copyright 2013 Maryam Kordi 15 | 16 | pygwr is distributed under an open-source licence, more specifically the GNU Public Licence (GPL) version 3 or later. 17 | 18 | The modified statsmodels package within pygwr (gwstatsmodels) is under its initial licence, which is a modified BSD licence. See the statsmodels website at http://statsmodels.sourceforge.net for more information. 19 | 20 | 21 | Licence (GPL) 22 | ------------- 23 | 24 | This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. 25 | 26 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 27 | 28 | You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/. 29 | 30 | 31 | -------------------------------------------------------------------------------- /example/readme.txt: -------------------------------------------------------------------------------- 1 | Tokyo Mortality example for pygwr 2 | ================================= 3 | 4 | This example shows how to use pygwr software. 5 | 6 | The example uses the Tokyo mortality dataset of Nakaya et al. 2005, and is available from this website: http://www.st-andrews.ac.uk/geoinformatics/gwr/gwr-downloads/ 7 | Only the file Tokyomortality.txt is needed. 8 | 9 | Once the file is downloaded and placed in the example folder, it is possible to run the GWR by running the tokyo_gwr.py script. 10 | 11 | The example runs a model similar to the one provided with GWR4, but with some simplification, because pygwr does not provide the possibility to include global variables. The provided example calculates a Poisson GWR with db2564 as dependent variable, and OCC_TEC and UNEMP as independent variables. A fixed Gaussian kernel is used, and for bandwidth selection, a simple interval search between 5000 and 20000 with steps of 1000 is performed. 12 | 13 | -------------------------------------------------------------------------------- /example/tokyo_gwr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os, sys 4 | import numpy as np 5 | 6 | # Import pygwr. Try first import of installed version. 7 | # If it fails, we assume we try to run the script from inside the example folder. 8 | # If this assumption is correct, we can simply add the parent folder to the 9 | # Python path, and load it again. 10 | try: 11 | import pygwr 12 | except: 13 | sys.path.append(os.path.abspath('../')) 14 | import pygwr 15 | 16 | 17 | # In Tokyomortality.txt file from 18 | # http://www.st-andrews.ac.uk/geoinformatics/gwr/gwr-downloads/ 19 | # column are separated by several spaces. We need to fix this as it is not 20 | # very handy for reading as simple CSV file. 21 | # So we first convert this file into a standard Tab-separated file. 22 | fin = open('Tokyomortality.txt') 23 | fout = open('tokyomortality.csv', 'w') 24 | for line in fin: 25 | fout.write('\t'.join(line.strip().split()) + '\n') 26 | fin.close() 27 | fout.close() 28 | 29 | print "Starting..." 30 | 31 | # Read now the data using the read_csv function in pygwr 32 | h,data = pygwr.read_csv('tokyomortality.csv', header=True, sep="\t") 33 | 34 | # Convert data into a Numpy array, make sure that the data are floats 35 | data = np.array(data, dtype=np.float64) 36 | 37 | # Separate data in dependent, independent, and location variables 38 | y = data[:, h.index('db2564')] # db2564 is the dependent variable 39 | x = data[:, [h.index('OCC_TEC'), h.index('UNEMP')]] # independent variables 40 | g = data[:, [h.index('X_CENTROID'), h.index('Y_CENTROID')]] # locations 41 | ids = data[:, h.index('IDnum0')] # list of IDs 42 | 43 | # Create our GWR model 44 | model = pygwr.GWR(targets=y, samples=x, locations=g, family='poisson') 45 | 46 | # Make the global model first 47 | print "Estimating global model..." 48 | globalfit = model.global_regression() 49 | print "Result for global model:" 50 | print globalfit.summary() 51 | 52 | # Make the bandwidth selection using simple interval search 53 | # We use AICc as selection criterion 54 | print "Estimating optimal bandwidth..." 55 | bwmin, bwmax, bwstep = 5000, 20000, 1000 56 | opt_bw, opt_aicc = None, np.inf # initial values (AICc = infinity) 57 | for bw in range(bwmin, bwmax+bwstep, bwstep): 58 | aicc = model.aicc(bw) # calculate AICc (and AIC, BIC, deviance and K) 59 | print " Bandwidth: %i -- AICc: %f" % (bw, aicc['aicc']) 60 | if aicc['aicc'] < opt_aicc: opt_bw, opt_aicc = bw, aicc['aicc'] 61 | print " Optimal bandwidth is: %i" % opt_bw 62 | 63 | # Estimate the GWR model at all data points 64 | print "Estimating GWR model at all data points..." 65 | gwr_result = model.estimate_at_target_locations(bandwidth=opt_bw) 66 | 67 | # Write the result into a result file 68 | gwr_result.export_csv('tokyomortality_gwr_result.csv') 69 | 70 | print "Done." 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /pygwr/__init__.py: -------------------------------------------------------------------------------- 1 | # We need to add gwstatsmodels package to our Python path 2 | # Otherwise, gwstatsmodels won't be able to refer to himself. 3 | import os, sys 4 | sys.path.append(os.path.abspath(os.path.dirname(__file__))) 5 | 6 | from gwr import * 7 | from gaussian import Gaussian 8 | 9 | __version__ = '1.0' 10 | -------------------------------------------------------------------------------- /pygwr/gaussian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Gaussian: 5 | """ 6 | A Gaussian kernel. 7 | """ 8 | 9 | def __init__(self, sigma=0.5): 10 | self.sigma = sigma 11 | 12 | def __call__(self, X, Z): 13 | return Gaussian.kernel(X, Z, self.sigma) 14 | 15 | @classmethod 16 | def kernel(cls, X, Z, sigma): 17 | """ 18 | Computes the Gaussian kernel for the matrices X and Z. 19 | """ 20 | X, Z = np.matrix(X, dtype="float32"), np.matrix(Z, dtype="float32") 21 | n, m = X.shape[0], Z.shape[0] 22 | XX, ZZ = np.multiply(X, X).sum(axis=1), np.multiply(Z, Z).sum(axis=1) 23 | d = np.tile(XX, (1, m)) + np.tile(ZZ.T, (n, 1)) - 2 * X * Z.T 24 | return np.array(np.exp(-d.T / (2. * sigma * sigma))) 25 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/api.py: -------------------------------------------------------------------------------- 1 | import iolib, datasets, tools 2 | from tools.tools import add_constant, categorical 3 | import regression 4 | from .regression.linear_model import OLS, GLS, WLS, GLSAR 5 | from .genmod.generalized_linear_model import GLM 6 | from .genmod import families 7 | import robust 8 | from .robust.robust_linear_model import RLM 9 | from .discrete.discrete_model import Poisson, Logit, Probit, MNLogit 10 | from .tsa import api as tsa 11 | import nonparametric 12 | import distributions 13 | from __init__ import test 14 | from . import version 15 | from info import __doc__ 16 | from graphics.gofplots import qqplot 17 | from .graphics import api as graphics 18 | 19 | import os 20 | 21 | chmpath = os.path.join(os.path.dirname(__file__), 'statsmodelsdoc.chm') 22 | if os.path.exists(chmpath): 23 | def open_help(chmpath=chmpath): 24 | from subprocess import Popen 25 | p = Popen(chmpath, shell=True) 26 | 27 | 28 | del os 29 | del chmpath 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/base/__init__.py: -------------------------------------------------------------------------------- 1 | from gwstatsmodels import NoseWrapper as Tester 2 | test = Tester().test 3 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/base/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/base/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/compatnp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/compatnp/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/compatnp/collections.py: -------------------------------------------------------------------------------- 1 | '''backported compatibility functions for Python's collections 2 | 3 | ''' 4 | 5 | try: 6 | #python >= 2.7 7 | from collections import OrderedDict 8 | except ImportError: 9 | #http://code.activestate.com/recipes/576693/ 10 | #author: Raymond Hettinger 11 | from ordereddict import OrderedDict 12 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/compatnp/py3k.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python 3 compatibility tools. 3 | 4 | """ 5 | 6 | __all__ = ['bytes', 'asbytes', 'isfileobj', 'getexception', 'strchar', 7 | 'unicode', 'asunicode', 'asbytes_nested', 'asunicode_nested', 8 | 'asstr', 'open_latin1'] 9 | 10 | import sys 11 | 12 | if sys.version_info[0] >= 3: 13 | import io 14 | bytes = bytes 15 | unicode = str 16 | asunicode = str 17 | def asbytes(s): 18 | if isinstance(s, bytes): 19 | return s 20 | return s.encode('latin1') 21 | def asstr(s): 22 | if isinstance(s, str): 23 | return s 24 | return s.decode('latin1') 25 | def isfileobj(f): 26 | return isinstance(f, io.FileIO) 27 | def open_latin1(filename, mode='r'): 28 | return open(filename, mode=mode, encoding='iso-8859-1') 29 | strchar = 'U' 30 | from io import BytesIO, StringIO #gwstatsmodels 31 | else: 32 | bytes = str 33 | unicode = unicode 34 | asbytes = str 35 | asstr = str 36 | strchar = 'S' 37 | def isfileobj(f): 38 | return isinstance(f, file) 39 | def asunicode(s): 40 | if isinstance(s, unicode): 41 | return s 42 | return s.decode('ascii') 43 | def open_latin1(filename, mode='r'): 44 | return open(filename, mode=mode) 45 | from StringIO import StringIO 46 | BytesIO = StringIO 47 | 48 | def getexception(): 49 | return sys.exc_info()[1] 50 | 51 | def asbytes_nested(x): 52 | if hasattr(x, '__iter__') and not isinstance(x, (bytes, unicode)): 53 | return [asbytes_nested(y) for y in x] 54 | else: 55 | return asbytes(x) 56 | 57 | def asunicode_nested(x): 58 | if hasattr(x, '__iter__') and not isinstance(x, (bytes, unicode)): 59 | return [asunicode_nested(y) for y in x] 60 | else: 61 | return asunicode(x) 62 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/compatnp/tests/test_itercompat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 4 | Created on Wed Feb 29 10:34:00 2012 5 | 6 | Author: Josef Perktold 7 | """ 8 | 9 | from numpy.testing import assert_ 10 | 11 | from gwstatsmodels.compatnp.iter_compat import zip_longest, combinations 12 | 13 | def test_zip_longest(): 14 | lili = [['a0', 'b0', 'c0', 'd0'], 15 | ['a1', 'b1', 'c1'], 16 | ['a2', 'b2', 'c2', 'd2'], 17 | ['a3', 'b3', 'c3', 'd3'], 18 | ['a4', 'b4']] 19 | 20 | transposed = [('a0', 'a1', 'a2', 'a3', 'a4'), 21 | ('b0', 'b1', 'b2', 'b3', 'b4'), 22 | ('c0', 'c1', 'c2', 'c3', None), 23 | ('d0', None, 'd2', 'd3', None)] 24 | 25 | assert_(list(zip_longest(*lili)) == transposed, '%r not equal %r' % ( 26 | zip_longest(*lili), transposed)) 27 | 28 | def test_combinations(): 29 | actual = list(combinations('ABCD', 2)) 30 | desired = [('A', 'B'), ('A', 'C'), ('A', 'D'), ('B', 'C'), ('B', 'D'), 31 | ('C', 'D')] 32 | assert_(actual == desired, '%r not equal %r' % (actual, desired)) 33 | 34 | actual = list(combinations(range(4), 3)) 35 | desired = [(0, 1, 2), (0, 1, 3), (0, 2, 3), (1, 2, 3)] 36 | assert_(actual == desired, '%r not equal %r' % (actual, desired)) 37 | 38 | 39 | 40 | 41 | if __name__ == '__main__': 42 | test_zip_longest() 43 | test_combinations() 44 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/COPYING: -------------------------------------------------------------------------------- 1 | Last Change: Tue Jul 17 05:00 PM 2007 J 2 | 3 | The code and descriptive text is copyrighted and offered under the terms of 4 | the BSD License from the authors; see below. However, the actual dataset may 5 | have a different origin and intellectual property status. See the SOURCE and 6 | COPYRIGHT variables for this information. 7 | 8 | Copyright (c) 2007 David Cournapeau 9 | All rights reserved. 10 | 11 | Redistribution and use in source and binary forms, with or without 12 | modification, are permitted provided that the following conditions are 13 | met: 14 | 15 | * Redistributions of source code must retain the above copyright 16 | notice, this list of conditions and the following disclaimer. 17 | * Redistributions in binary form must reproduce the above copyright 18 | notice, this list of conditions and the following disclaimer in 19 | the documentation and/or other materials provided with the 20 | distribution. 21 | * Neither the author nor the names of any contributors may be used 22 | to endorse or promote products derived from this software without 23 | specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 29 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 30 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 31 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 32 | OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 33 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 34 | OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 35 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/README.txt: -------------------------------------------------------------------------------- 1 | This README was copied from 2 | http://projects.scipy.org/scikits/browser/trunk/learn/scikits/learn/datasets/ 3 | ----------------------------------------------------------------------------- 4 | 5 | Last Change: Tue Jul 17 04:00 PM 2007 J 6 | 7 | This packages datasets defines a set of packages which contain datasets useful 8 | for demo, examples, etc... This can be seen as an equivalent of the R dataset 9 | package, but for python. 10 | 11 | Each subdir is a python package, and should define the function load, returning 12 | the corresponding data. For example, to access datasets data1, you should be able to do: 13 | 14 | >> from datasets.data1 import load 15 | >> d = load() # -> d contains the data of the datasets data1 16 | 17 | load can do whatever it wants: fetching data from a file (python script, csv 18 | file, etc...), from the internet, etc... Some special variables must be defined 19 | for each package, containing a python string: 20 | - COPYRIGHT: copyright informations 21 | - SOURCE: where the data are coming from 22 | - DESCHOSRT: short description 23 | - DESCLONG: long description 24 | - NOTE: some notes on the datasets. 25 | 26 | For the datasets to be useful in the learn scikits, which is the project which initiated this datasets package, the data returned by load has to be a dict with the following conventions: 27 | - 'data': this value should be a record array containing the actual data. 28 | - 'label': this value should be a rank 1 array of integers, contains the 29 | label index for each sample, that is label[i] should be the label index 30 | of data[i]. 31 | - 'class': a record array such as class[i] is the class name. In other 32 | words, this makes the correspondance label index <> label name. 33 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Datasets module 3 | """ 4 | #__all__ = filter(lambda s:not s.startswith('_'),dir()) 5 | import anes96, committee, ccard, copper, cpunish, elnino, grunfeld, longley, \ 6 | macrodata, nile, randhie, scotland, spector, stackloss, star98, \ 7 | strikes, sunspots 8 | 9 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/anes96/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/ccard/R_wls.s: -------------------------------------------------------------------------------- 1 | d <- read.csv('./ccard.csv') 2 | attach(d) 3 | 4 | 5 | m1 <- lm(AVGEXP ~ AGE + INCOME + INCOMESQ + OWNRENT, weights=1/INCOMESQ) 6 | results <- summary(m1) 7 | 8 | m2 <- lm(AVGEXP ~ AGE + INCOME + INCOMESQ + OWNRENT - 1, weights=1/INCOMESQ) 9 | results2 <- summary(m2) 10 | 11 | print('m1 has a constant, which theoretically should be INCOME') 12 | print('m2 include -1 for no constant') 13 | print('See ccard/R_wls.s') 14 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/ccard/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/ccard/ccard.csv: -------------------------------------------------------------------------------- 1 | "AVGEXP","AGE","INCOME","INCOMESQ","OWNRENT" 2 | 124.98,38,4.52,20.4304,1 3 | 9.85,33,2.42,5.8564,0 4 | 15,34,4.5,20.25,1 5 | 137.87,31,2.54,6.4516,0 6 | 546.5,32,9.79,95.8441,1 7 | 92,23,2.5,6.25,0 8 | 40.83,28,3.96,15.6816,0 9 | 150.79,29,2.37,5.6169,1 10 | 777.82,37,3.8,14.44,1 11 | 52.58,28,3.2,10.24,0 12 | 256.66,31,3.95,15.6025,1 13 | 78.87,29,2.45,6.0025,1 14 | 42.62,35,1.91,3.6481,1 15 | 335.43,41,3.2,10.24,1 16 | 248.72,40,4,16,1 17 | 548.03,40,10,100,1 18 | 43.34,35,2.35,5.5225,1 19 | 218.52,34,2,4,1 20 | 170.64,36,4,16,0 21 | 37.58,43,5.14,26.4196,1 22 | 502.2,30,4.51,20.3401,0 23 | 73.18,22,1.5,2.25,0 24 | 1532.77,40,5.5,30.25,1 25 | 42.69,22,2.03,4.1209,0 26 | 417.83,29,3.2,10.24,0 27 | 552.72,21,2.47,6.1009,1 28 | 222.54,24,3,9,0 29 | 541.3,43,3.54,12.5316,1 30 | 568.77,37,5.7,32.49,1 31 | 344.47,27,3.5,12.25,0 32 | 405.35,28,4.6,21.16,1 33 | 310.94,26,3,9,1 34 | 53.65,23,2.59,6.7081,0 35 | 63.92,30,1.51,2.2801,0 36 | 165.85,30,1.85,3.4225,0 37 | 9.58,38,2.6,6.76,0 38 | 319.49,36,2,4,0 39 | 83.08,26,2.35,5.5225,0 40 | 644.83,28,7,49,1 41 | 93.2,24,2,4,0 42 | 105.04,21,1.7,2.89,0 43 | 34.13,24,2.8,7.84,0 44 | 41.19,26,2.4,5.76,0 45 | 169.89,33,3,9,0 46 | 1898.03,34,4.8,23.04,0 47 | 810.39,33,3.18,10.1124,0 48 | 32.78,21,1.5,2.25,0 49 | 95.8,25,3,9,0 50 | 27.78,27,2.28,5.1984,0 51 | 215.07,26,2.8,7.84,0 52 | 79.51,22,2.7,7.29,0 53 | 306.03,41,6,36,0 54 | 104.54,42,3.9,15.21,0 55 | 642.47,25,3.07,9.4249,0 56 | 308.05,31,2.46,6.0516,1 57 | 186.35,27,2,4,0 58 | 56.15,33,3.25,10.5625,0 59 | 129.37,37,2.72,7.3984,0 60 | 93.11,27,2.2,4.84,0 61 | 292.66,24,3.75,14.0625,0 62 | 98.46,25,2.88,8.2944,0 63 | 258.55,36,3.05,9.3025,0 64 | 101.68,33,2.55,6.5025,0 65 | 65.25,55,2.64,6.9696,1 66 | 108.61,20,1.65,2.7225,0 67 | 49.56,29,2.4,5.76,0 68 | 235.57,41,7.24,52.4176,1 69 | 68.38,43,2.4,5.76,0 70 | 474.15,33,6,36,1 71 | 234.05,25,3.6,12.96,0 72 | 451.2,26,5,25,1 73 | 251.52,46,5.5,30.25,1 74 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/ccard/data.py: -------------------------------------------------------------------------------- 1 | """Bill Greene's credit scoring data.""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | COPYRIGHT = """Used with express permission of the original author, who 6 | retains all rights.""" 7 | TITLE = __doc__ 8 | SOURCE = """ 9 | William Greene's `Econometric Analysis` 10 | 11 | More information can be found at the web site of the text: 12 | http://pages.stern.nyu.edu/~wgreene/Text/econometricanalysis.htm 13 | """ 14 | 15 | DESCRSHORT = """William Greene's credit scoring data""" 16 | 17 | DESCRLONG = """More information on this data can be found on the 18 | homepage for Greene's `Econometric Analysis`. See source. 19 | """ 20 | 21 | NOTE = """ 22 | Number of observations - 72 23 | Number of variables - 5 24 | Variable name definitions - See Source for more information on the variables. 25 | """ 26 | 27 | from numpy import recfromtxt, column_stack, array 28 | import gwstatsmodels.tools.datautils as du 29 | from os.path import dirname, abspath 30 | 31 | def load(): 32 | """Load the credit card data and returns a Dataset class. 33 | 34 | Returns 35 | ------- 36 | Dataset instance: 37 | See DATASET_PROPOSAL.txt for more information. 38 | """ 39 | data = _get_data() 40 | return du.process_recarray(data, endog_idx=0, dtype=float) 41 | 42 | def load_pandas(): 43 | """Load the credit card data and returns a Dataset class. 44 | 45 | Returns 46 | ------- 47 | Dataset instance: 48 | See DATASET_PROPOSAL.txt for more information. 49 | """ 50 | data = _get_data() 51 | return du.process_recarray_pandas(data, endog_idx=0) 52 | 53 | def _get_data(): 54 | filepath = dirname(abspath(__file__)) 55 | data = recfromtxt(open(filepath + '/ccard.csv', 'rb'), delimiter=",", 56 | names=True, dtype=float) 57 | return data 58 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/ccard/src/ccard.csv: -------------------------------------------------------------------------------- 1 | "MDR","Acc","Age","Income","Avgexp","Ownrent","Selfempl" 2 | 0,1,38,4.52,124.98,1,0 3 | 0,1,33,2.42,9.85,0,0 4 | 0,1,34,4.5,15,1,0 5 | 0,1,31,2.54,137.87,0,0 6 | 0,1,32,9.79,546.5,1,0 7 | 0,1,23,2.5,92,0,0 8 | 0,1,28,3.96,40.83,0,0 9 | 0,1,29,2.37,150.79,1,0 10 | 0,1,37,3.8,777.82,1,0 11 | 0,1,28,3.2,52.58,0,0 12 | 0,1,31,3.95,256.66,1,0 13 | 0,0,42,1.98,0,1,0 14 | 0,0,30,1.73,0,1,0 15 | 0,1,29,2.45,78.87,1,0 16 | 0,1,35,1.91,42.62,1,0 17 | 0,1,41,3.2,335.43,1,0 18 | 0,1,40,4,248.72,1,0 19 | 7,0,30,3,0,1,0 20 | 0,1,40,10,548.03,1,1 21 | 3,0,46,3.4,0,0,0 22 | 0,1,35,2.35,43.34,1,0 23 | 1,0,25,1.88,0,0,0 24 | 0,1,34,2,218.52,1,0 25 | 1,1,36,4,170.64,0,0 26 | 0,1,43,5.14,37.58,1,0 27 | 0,1,30,4.51,502.2,0,0 28 | 0,0,22,3.84,0,0,1 29 | 0,1,22,1.5,73.18,0,0 30 | 0,0,34,2.5,0,1,0 31 | 0,1,40,5.5,1532.77,1,0 32 | 0,1,22,2.03,42.69,0,0 33 | 1,1,29,3.2,417.83,0,0 34 | 1,0,25,3.15,0,1,0 35 | 0,1,21,2.47,552.72,1,0 36 | 0,1,24,3,222.54,0,0 37 | 0,1,43,3.54,541.3,1,0 38 | 0,0,43,2.28,0,0,0 39 | 0,1,37,5.7,568.77,1,0 40 | 0,1,27,3.5,344.47,0,0 41 | 0,1,28,4.6,405.35,1,0 42 | 0,1,26,3,310.94,1,0 43 | 0,1,23,2.59,53.65,0,0 44 | 0,1,30,1.51,63.92,0,0 45 | 0,1,30,1.85,165.85,0,0 46 | 0,1,38,2.6,9.58,0,0 47 | 0,0,28,1.8,0,0,1 48 | 0,1,36,2,319.49,0,0 49 | 0,0,38,3.26,0,0,0 50 | 0,1,26,2.35,83.08,0,0 51 | 0,1,28,7,644.83,1,0 52 | 0,0,50,3.6,0,0,0 53 | 0,1,24,2,93.2,0,0 54 | 0,1,21,1.7,105.04,0,0 55 | 0,1,24,2.8,34.13,0,0 56 | 0,1,26,2.4,41.19,0,0 57 | 1,1,33,3,169.89,0,0 58 | 0,1,34,4.8,1898.03,0,0 59 | 0,1,33,3.18,810.39,0,0 60 | 0,0,45,1.8,0,0,0 61 | 0,1,21,1.5,32.78,0,0 62 | 2,1,25,3,95.8,0,0 63 | 0,1,27,2.28,27.78,0,0 64 | 0,1,26,2.8,215.07,0,0 65 | 0,1,22,2.7,79.51,0,0 66 | 3,0,27,4.9,0,1,0 67 | 0,0,26,2.5,0,0,1 68 | 0,1,41,6,306.03,0,1 69 | 0,1,42,3.9,104.54,0,0 70 | 0,0,22,5.1,0,0,0 71 | 0,1,25,3.07,642.47,0,0 72 | 0,1,31,2.46,308.05,1,0 73 | 0,1,27,2,186.35,0,0 74 | 0,1,33,3.25,56.15,0,0 75 | 0,1,37,2.72,129.37,0,0 76 | 0,1,27,2.2,93.11,0,0 77 | 1,0,24,4.1,0,0,0 78 | 0,1,24,3.75,292.66,0,0 79 | 0,1,25,2.88,98.46,0,0 80 | 0,1,36,3.05,258.55,0,0 81 | 0,1,33,2.55,101.68,0,0 82 | 0,0,33,4,0,0,0 83 | 1,1,55,2.64,65.25,1,0 84 | 0,1,20,1.65,108.61,0,0 85 | 0,1,29,2.4,49.56,0,0 86 | 3,0,40,3.71,0,0,0 87 | 0,1,41,7.24,235.57,1,0 88 | 0,0,41,4.39,0,1,0 89 | 0,0,35,3.3,0,1,0 90 | 0,0,24,2.3,0,0,0 91 | 1,0,54,4.18,0,0,0 92 | 2,0,34,2.49,0,0,0 93 | 0,0,45,2.81,0,1,0 94 | 0,1,43,2.4,68.38,0,0 95 | 4,0,35,1.5,0,0,0 96 | 2,0,36,8.4,0,0,0 97 | 0,1,22,1.56,0,0,0 98 | 1,1,33,6,474.15,1,0 99 | 1,1,25,3.6,234.05,0,0 100 | 0,1,26,5,451.2,1,0 101 | 0,1,46,5.5,251.52,1,0 102 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/ccard/src/names.txt: -------------------------------------------------------------------------------- 1 | MDR = Number of derogator reports 2 | 3 | Acc = Credit card application accpeted (1=yes) 4 | 5 | Age = Age in years + 12ths of a year 6 | 7 | Income = Income divided by 10,000 8 | 9 | Avgexp = Avg. monthly credit card expenditure 10 | 11 | Ownrent = Indiviual owns(1) or rents(0) home 12 | 13 | Selfempl = (1=yes, 0=no) 14 | 15 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/committee/R_committee.s: -------------------------------------------------------------------------------- 1 | ### SETUP ### 2 | d <- read.table("./committee.csv",sep=",", header=T) 3 | attach(d) 4 | 5 | LNSTAFF <- log(STAFF) 6 | SUBS.LNSTAFF <- SUBS*LNSTAFF 7 | library(MASS) 8 | #m1 <- glm.nb(BILLS104 ~ SIZE + SUBS + LNSTAFF + PRESTIGE + BILLS103 + SUBS.LNSTAFF) 9 | m1 <- glm(BILLS104 ~ SIZE + SUBS + LNSTAFF + PRESTIGE + BILLS103 + SUBS.LNSTAFF, family=negative.binomial(1)) # Disp should be 1 by default 10 | 11 | results <- summary.glm(m1) 12 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/committee/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/committee/committee.csv: -------------------------------------------------------------------------------- 1 | "COMMITTEE","BILLS104","SIZE","SUBS","STAFF","PRESTIGE","BILLS103" 2 | "Appropriations",6,58,13,109,1,9 3 | "Budget",23,42,0,39,1,101 4 | "Rules",44,13,2,25,1,54 5 | "Ways_and_Means",355,39,5,23,1,542 6 | "Banking",125,51,5,61,0,101 7 | "Economic_Educ_Oppor",131,43,5,69,0,158 8 | "Commerce",271,49,4,79,0,196 9 | "International_Relations",63,44,3,68,0,40 10 | "Government_Reform",149,51,7,99,0,72 11 | "Judiciary",253,35,5,56,0,168 12 | "Agriculture",81,49,5,46,0,60 13 | "National_Security",89,55,7,48,0,75 14 | "Resources",142,44,5,58,0,98 15 | "TransInfrastructure",155,61,6,74,0,69 16 | "Science",27,50,4,58,0,25 17 | "Small_Business",8,43,4,29,0,9 18 | "Veterans_Affairs",28,33,3,36,0,41 19 | "House_Oversight",68,12,0,24,0,233 20 | "Stds_of_Conduct",1,10,0,9,0,0 21 | "Intelligence",4,16,2,24,0,2 22 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/committee/data.py: -------------------------------------------------------------------------------- 1 | """First 100 days of the US House of Representatives 1995""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | COPYRIGHT = """Used with express permission from the original author, 6 | who retains all rights.""" 7 | TITLE = __doc__ 8 | SOURCE = """ 9 | Jeff Gill's `Generalized Linear Models: A Unifited Approach` 10 | 11 | http://jgill.wustl.edu/research/books.html 12 | """ 13 | 14 | DESCRSHORT = """Number of bill assignments in the 104th House in 1995""" 15 | 16 | DESCRLONG = """The example in Gill, seeks to explain the number of bill 17 | assignments in the first 100 days of the US' 104th House of Representatives. 18 | The response variable is the number of bill assignments in the first 100 days 19 | over 20 Committees. The explanatory variables in the example are the number of 20 | assignments in the first 100 days of the 103rd House, the number of members on 21 | the committee, the number of subcommittees, the log of the number of staff 22 | assigned to the committee, a dummy variable indicating whether 23 | the committee is a high prestige committee, and an interaction term between 24 | the number of subcommittees and the log of the staff size. 25 | 26 | The data returned by load are not cleaned to represent the above example. 27 | """ 28 | 29 | NOTE = """Number of Observations - 20 30 | 31 | Number of Variables - 6 32 | 33 | Variable name definitions:: 34 | 35 | BILLS104 - Number of bill assignments in the first 100 days of the 104th 36 | House of Representatives. 37 | SIZE - Number of members on the committee. 38 | SUBS - Number of subcommittees. 39 | STAFF - Number of staff members assigned to the committee. 40 | PRESTIGE - PRESTIGE == 1 is a high prestige committee. 41 | BILLS103 - Number of bill assignments in the first 100 days of the 103rd 42 | House of Representatives. 43 | 44 | Committee names are included as a variable in the data file though not 45 | returned by load. 46 | """ 47 | 48 | from numpy import recfromtxt, column_stack, array 49 | import gwstatsmodels.tools.datautils as du 50 | from os.path import dirname, abspath 51 | 52 | def load(): 53 | """Load the committee data and returns a data class. 54 | 55 | Returns 56 | -------- 57 | Dataset instance: 58 | See DATASET_PROPOSAL.txt for more information. 59 | """ 60 | data = _get_data() 61 | return du.process_recarray(data, endog_idx=0, dtype=float) 62 | 63 | def load_pandas(): 64 | data = _get_data() 65 | return du.process_recarray_pandas(data, endog_idx=0, dtype=float) 66 | 67 | def _get_data(): 68 | filepath = dirname(abspath(__file__)) 69 | data = recfromtxt(open(filepath + '/committee.csv', 'rb'), delimiter=",", 70 | names=True, dtype=float, usecols=(1,2,3,4,5,6)) 71 | return data 72 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/committee/src/committee.dat: -------------------------------------------------------------------------------- 1 | SIZE SUBS STAFF PRESTIGE POLICY CONSTIT SERVICE BILLS103 BILLS104 2 | Appropriations 58 13 109 1 0 0 0 9 6 3 | Budget 42 0 39 1 0 0 0 101 23 4 | Rules 13 2 25 1 0 0 0 54 44 5 | Ways_and_Means 39 5 23 1 0 0 0 542 355 6 | Banking 51 5 61 0 1 0 0 101 125 7 | Economic_Educ_Oppor 43 5 69 0 1 0 0 158 131 8 | Commerce 49 4 79 0 1 0 0 196 271 9 | International_Relations 44 3 68 0 1 0 0 40 63 10 | Government_Reform 51 7 99 0 1 0 0 72 149 11 | Judiciary 35 5 56 0 1 0 0 168 253 12 | Agriculture 49 5 46 0 0 1 0 60 81 13 | National_Security 55 7 48 0 0 1 0 75 89 14 | Resources 44 5 58 0 0 1 0 98 142 15 | TransInfrastructure 61 6 74 0 0 1 0 69 155 16 | Science 50 4 58 0 0 1 0 25 27 17 | Small_Business 43 4 29 0 0 1 0 9 8 18 | Veterans_Affairs 33 3 36 0 0 1 0 41 28 19 | House_Oversight 12 0 24 0 0 0 1 233 68 20 | Stds_of_Conduct 10 0 9 0 0 0 1 0 1 21 | Intelligence 16 2 24 0 0 0 1 2 4 22 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/copper/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/copper/copper.csv: -------------------------------------------------------------------------------- 1 | "YEAR","WORLDCONSUMPTION","COPPERPRICE","INCOMEINDEX","ALUMPRICE","INVENTORYINDEX","TIME" 2 | 1951,3173,26.56,0.7,19.76,0.98,1 3 | 1952,3281.1,27.31,0.71,20.78,1.04,2 4 | 1953,3135.7,32.95,0.72,22.55,1.05,3 5 | 1954,3359.1,33.9,0.7,23.06,0.97,4 6 | 1955,3755.1,42.7,0.74,24.93,1.02,5 7 | 1956,3875.9,46.11,0.74,26.5,1.04,6 8 | 1957,3905.7,31.7,0.74,27.24,0.98,7 9 | 1958,3957.6,27.23,0.72,26.21,0.98,8 10 | 1959,4279.1,32.89,0.75,26.09,1.03,9 11 | 1960,4627.9,33.78,0.77,27.4,1.03,10 12 | 1961,4910.2,31.66,0.76,26.94,0.98,11 13 | 1962,4908.4,32.28,0.79,25.18,1,12 14 | 1963,5327.9,32.38,0.83,23.94,0.97,13 15 | 1964,5878.4,33.75,0.85,25.07,1.03,14 16 | 1965,6075.2,36.25,0.89,25.37,1.08,15 17 | 1966,6312.7,36.24,0.93,24.55,1.05,16 18 | 1967,6056.8,38.23,0.95,24.98,1.03,17 19 | 1968,6375.9,40.83,0.99,24.96,1.03,18 20 | 1969,6974.3,44.62,1,25.52,0.99,19 21 | 1970,7101.6,52.27,1,26.01,1,20 22 | 1971,7071.7,45.16,1.02,25.46,0.96,21 23 | 1972,7754.8,42.5,1.07,22.17,0.97,22 24 | 1973,8480.3,43.7,1.12,18.56,0.98,23 25 | 1974,8105.2,47.88,1.1,21.32,1.01,24 26 | 1975,7157.2,36.33,1.07,22.75,0.94,25 27 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/copper/data.py: -------------------------------------------------------------------------------- 1 | """World Copper Prices 1951-1975 dataset.""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | COPYRIGHT = """Used with express permission from the original author, 6 | who retains all rights.""" 7 | TITLE = "World Copper Market 1951-1975 Dataset" 8 | SOURCE = """ 9 | Jeff Gill's `Generalized Linear Models: A Unified Approach` 10 | 11 | http://jgill.wustl.edu/research/books.html 12 | """ 13 | 14 | DESCRSHORT = """World Copper Market 1951-1975""" 15 | 16 | DESCRLONG = """This data describes the world copper market from 1951 through 1975. In an 17 | example, in Gill, the outcome variable (of a 2 stage estimation) is the world 18 | consumption of copper for the 25 years. The explanatory variables are the 19 | world consumption of copper in 1000 metric tons, the constant dollar adjusted 20 | price of copper, the price of a substitute, aluminum, an index of real per 21 | capita income base 1970, an annual measure of manufacturer inventory change, 22 | and a time trend. 23 | """ 24 | 25 | NOTE = """ 26 | Number of Observations - 25 27 | 28 | Number of Variables - 6 29 | 30 | Variable name definitions:: 31 | 32 | WORLDCONSUMPTION - World consumption of copper (in 1000 metric tons) 33 | COPPERPRICE - Constant dollar adjusted price of copper 34 | INCOMEINDEX - An index of real per capita income (base 1970) 35 | ALUMPRICE - The price of aluminum 36 | INVENTORYINDEX - A measure of annual manufacturer inventory trend 37 | TIME - A time trend 38 | 39 | Years are included in the data file though not returned by load. 40 | """ 41 | 42 | from numpy import recfromtxt, column_stack, array 43 | import gwstatsmodels.tools.datautils as du 44 | from os.path import dirname, abspath 45 | 46 | def load(): 47 | """ 48 | Load the copper data and returns a Dataset class. 49 | 50 | Returns 51 | -------- 52 | Dataset instance: 53 | See DATASET_PROPOSAL.txt for more information. 54 | """ 55 | data = _get_data() 56 | return du.process_recarray(data, endog_idx=0, dtype=float) 57 | 58 | def _get_data(): 59 | filepath = dirname(abspath(__file__)) 60 | data = recfromtxt(open(filepath + '/copper.csv', 'rb'), delimiter=",", 61 | names=True, dtype=float, usecols=(1,2,3,4,5,6)) 62 | return data 63 | 64 | def load_pandas(): 65 | """ 66 | Load the copper data and returns a Dataset class. 67 | 68 | Returns 69 | -------- 70 | Dataset instance: 71 | See DATASET_PROPOSAL.txt for more information. 72 | """ 73 | data = _get_data() 74 | return du.process_recarray_pandas(data, endog_idx=0, dtype=float) 75 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/copper/src/copper.dat: -------------------------------------------------------------------------------- 1 | YEAR WORLDCONSUMPTION COPPERPRICE INCOMEINDEX ALUMPRICE INVENTORYINDEX TIME 2 | 1951 3173.0 26.56 0.70 19.76 0.97679 1 3 | 1952 3281.1 27.31 0.71 20.78 1.03937 2 4 | 1953 3135.7 32.95 0.72 22.55 1.05153 3 5 | 1954 3359.1 33.90 0.70 23.06 0.97312 4 6 | 1955 3755.1 42.70 0.74 24.93 1.02349 5 7 | 1956 3875.9 46.11 0.74 26.50 1.04135 6 8 | 1957 3905.7 31.70 0.74 27.24 0.97686 7 9 | 1958 3957.6 27.23 0.72 26.21 0.98069 8 10 | 1959 4279.1 32.89 0.75 26.09 1.02888 9 11 | 1960 4627.9 33.78 0.77 27.40 1.03392 10 12 | 1961 4910.2 31.66 0.76 26.94 0.97922 11 13 | 1962 4908.4 32.28 0.79 25.18 0.99679 12 14 | 1963 5327.9 32.38 0.83 23.94 0.96630 13 15 | 1964 5878.4 33.75 0.85 25.07 1.02915 14 16 | 1965 6075.2 36.25 0.89 25.37 1.07950 15 17 | 1966 6312.7 36.24 0.93 24.55 1.05073 16 18 | 1967 6056.8 38.23 0.95 24.98 1.02788 17 19 | 1968 6375.9 40.83 0.99 24.96 1.02799 18 20 | 1969 6974.3 44.62 1.00 25.52 0.99151 19 21 | 1970 7101.6 52.27 1.00 26.01 1.00191 20 22 | 1971 7071.7 45.16 1.02 25.46 0.95644 21 23 | 1972 7754.8 42.50 1.07 22.17 0.96947 22 24 | 1973 8480.3 43.70 1.12 18.56 0.98220 23 25 | 1974 8105.2 47.88 1.10 21.32 1.00793 24 26 | 1975 7157.2 36.33 1.07 22.75 0.93810 25 27 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/cpunish/R_cpunish.s: -------------------------------------------------------------------------------- 1 | ### SETUP ### 2 | d <- read.table("./cpunish.csv",sep=",", header=T) 3 | attach(d) 4 | LN_VC100k96 = log(VC100k96) 5 | ### MODEL ### 6 | m1 <- glm(EXECUTIONS ~ INCOME + PERPOVERTY + PERBLACK + LN_VC100k96 + SOUTH + DEGREE, 7 | family=poisson) 8 | results <- summary.glm(m1) 9 | results 10 | results['coefficients'] 11 | 12 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/cpunish/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/cpunish/cpunish.csv: -------------------------------------------------------------------------------- 1 | "STATE","EXECUTIONS","INCOME","PERPOVERTY","PERBLACK","VC100k96","SOUTH","DEGREE" 2 | "Texas",37,34453,16.7,12.2,644,1,0.16 3 | "Virginia",9,41534,12.5,20,351,1,0.27 4 | "Missouri",6,35802,10.6,11.2,591,0,0.21 5 | "Arkansas",4,26954,18.4,16.1,524,1,0.16 6 | "Alabama",3,31468,14.8,25.9,565,1,0.19 7 | "Arizona",2,32552,18.8,3.5,632,0,0.25 8 | "Illinois",2,40873,11.6,15.3,886,0,0.25 9 | "South_Carolina",2,34861,13.1,30.1,997,1,0.21 10 | "Colorado",1,42562,9.4,4.3,405,0,0.31 11 | "Florida",1,31900,14.3,15.4,1051,1,0.24 12 | "Indiana",1,37421,8.2,8.2,537,0,0.19 13 | "Kentucky",1,33305,16.4,7.2,321,0,0.16 14 | "Louisiana",1,32108,18.4,32.1,929,1,0.18 15 | "Maryland",1,45844,9.3,27.4,931,0,0.29 16 | "Nebraska",1,34743,10,4,435,0,0.24 17 | "Oklahoma",1,29709,15.2,7.7,597,0,0.21 18 | "Oregon",1,36777,11.7,1.8,463,0,0.25 19 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/cpunish/src/cpunish.dat: -------------------------------------------------------------------------------- 1 | STATE EXECUTIONS INCOME PERPOVERTY PERBLACK VC100k96 SOUTH <9thGRADE 9thTO12th HSOREQUIV SOMECOLL AADEGREE BACHELORS GRAD/PROF 2 | Texas 37 34453 16.7 12.2 644 1 1492112 1924831 3153187 2777973 598956 530849 673250 3 | Virginia 9 41534 12.5 20.0 351 1 461475 669851 1297714 969191 244488 676710 363602 4 | Missouri 6 35802 10.6 11.2 591 0 391097 578440 1251550 785555 170146 420521 204294 5 | Arkansas 4 26954 18.4 16.1 524 1 234071 328690 571252 323016 62246 143038 67144 6 | Alabama 3 31468 14.8 25.9 565 1 362434 597455 875703 575123 146228 281466 142177 7 | Arizona 2 32552 18.8 3.5 632 0 224662 368279 708340 724228 173801 325575 161560 8 | Illinois 2 40873 11.6 15.3 886 0 786815 1203134 2531465 1817238 490791 1101193 552145 9 | South_Carolina 2 34861 13.1 30.1 997 1 303694 479916 776053 466145 152671 267365 118811 10 | Colorado 1 42562 9.4 4.3 405 0 124477 270560 654510 630445 161331 402917 190168 11 | Florida 1 31900 14.3 15.4 1051 1 883820 1706839 3045682 2054574 682005 1133053 567453 12 | Indiana 1 37421 8.2 8.2 537 0 310403 673362 1530741 775605 212379 360087 224057 13 | Kentucky 1 33305 16.4 7.2 321 0 456107 467956 881795 476362 108409 209055 129994 14 | Louisiana 1 32108 18.4 32.1 929 1 391630 534570 951832 586477 94409 288154 143624 15 | Maryland 1 45844 9.3 27.4 931 0 257518 514788 1044976 744604 182465 532883 342012 16 | Nebraska 1 34743 10.0 4.0 435 0 81690 124792 388540 272981 80956 141231 59008 17 | Oklahoma 1 29709 15.2 7.7 597 0 201228 375155 706003 539511 113434 253635 119774 18 | Oregon 1 36777 11.7 1.8 463 0 122513 283409 613983 561176 139269 267161 130403 19 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/elnino/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/elnino/data.py: -------------------------------------------------------------------------------- 1 | """El Nino dataset, 1950 - 2010""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | COPYRIGHT = """This data is in the public domain.""" 6 | 7 | TITLE = """El Nino - Sea Surface Temperatures""" 8 | 9 | SOURCE = """ 10 | National Oceanic and Atmospheric Administration's National Weather Service 11 | 12 | ERSST.V3B dataset, Nino 1+2 13 | http://www.cpc.ncep.noaa.gov/data/indices/ 14 | """ 15 | 16 | DESCRSHORT = """Averaged monthly sea surface temperature - Pacific Ocean.""" 17 | 18 | DESCRLONG = """This data contains the averaged monthly sea surface 19 | temperature in degrees Celcius of the Pacific Ocean, between 0-10 degrees South 20 | and 90-80 degrees West, from 1950 to 2010. This dataset was obtained from 21 | NOAA. 22 | """ 23 | 24 | NOTE = """ 25 | Number of Observations - 61 x 12 26 | 27 | Number of Variables - 1 28 | 29 | Variable name definitions:: 30 | 31 | TEMPERATURE - average sea surface temperature in degrees Celcius 32 | (12 columns, one per month). 33 | 34 | """ 35 | 36 | 37 | from numpy import recfromtxt, column_stack, array 38 | from pandas import DataFrame 39 | 40 | from gwstatsmodels.tools import Dataset 41 | from os.path import dirname, abspath 42 | 43 | 44 | def load(): 45 | """ 46 | Load the El Nino data and return a Dataset class. 47 | 48 | Returns 49 | ------- 50 | Dataset instance: 51 | See DATASET_PROPOSAL.txt for more information. 52 | 53 | Notes 54 | ----- 55 | The elnino Dataset instance does not contain endog and exog attributes. 56 | """ 57 | data = _get_data() 58 | names = data.dtype.names 59 | dataset = Dataset(data=data, names=names) 60 | return dataset 61 | 62 | 63 | def load_pandas(): 64 | dataset = load() 65 | dataset.data = DataFrame(dataset.data) 66 | return dataset 67 | 68 | 69 | def _get_data(): 70 | filepath = dirname(abspath(__file__)) 71 | data = recfromtxt(open(filepath + '/elnino.csv', 'rb'), delimiter=",", 72 | names=True, dtype=float) 73 | return data 74 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/grunfeld/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/longley/R_gls.s: -------------------------------------------------------------------------------- 1 | ### GLS Example with Longley Data 2 | ### Done the long way... 3 | 4 | d <- read.table('./longley.csv', sep=',', header=T) 5 | attach(d) 6 | m1 <- lm(TOTEMP ~ GNP + POP) 7 | rho <- cor(m1$res[-1],m1$res[-16]) 8 | sigma <- diag(16) # diagonal matrix of ones 9 | sigma <- rho^abs(row(sigma)-col(sigma)) 10 | # row sigma is a matrix of the row index 11 | # col sigma is a matrix of the column index 12 | # this gives a upper-lower triangle with the 13 | # covariance structure of an AR1 process... 14 | sigma_inv <- solve(sigma) # inverse of sigma 15 | x <- model.matrix(m1) 16 | xPrimexInv <- solve(t(x) %*% sigma_inv %*% x) 17 | beta <- xPrimexInv %*% t(x) %*% sigma_inv %*% TOTEMP 18 | beta 19 | # residuals 20 | res <- TOTEMP - x %*% beta 21 | # whitened residuals, not sure if this is right 22 | # xPrimexInv is different than cholsigmainv obviously... 23 | wres = sigma_inv %*% TOTEMP - sigma_inv %*% x %*% beta 24 | 25 | sig <- sqrt(sum(res^2)/m1$df) 26 | wsig <- sqrt(sum(wres^2)/m1$df) 27 | wvc <- sqrt(diag(xPrimexInv))*wsig 28 | vc <- sqrt(diag(xPrimexInv))*sig 29 | vc 30 | 31 | ### Attempt to use a varFunc for GLS 32 | library(nlme) 33 | m1 <- gls(TOTEMP ~ GNP + POP, correlation=corAR1(value=rho, fixed=TRUE)) 34 | results <- summary(m1) 35 | bse <- sqrt(diag(vcov(m1))) 36 | 37 | 38 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/longley/R_lm.s: -------------------------------------------------------------------------------- 1 | d <- read.table('./longley.csv', sep=',', header=T) 2 | attach(d) 3 | 4 | library(nlme) # to be able to get BIC 5 | m1 <- lm(TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR) 6 | results <-summary(m1) 7 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/longley/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/longley/data.py: -------------------------------------------------------------------------------- 1 | """Longley dataset""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | COPYRIGHT = """This is public domain.""" 6 | TITLE = __doc__ 7 | SOURCE = """ 8 | The classic 1967 Longley Data 9 | 10 | http://www.itl.nist.gov/div898/strd/lls/data/Longley.shtml 11 | 12 | :: 13 | 14 | Longley, J.W. (1967) "An Appraisal of Least Squares Programs for the 15 | Electronic Comptuer from the Point of View of the User." Journal of 16 | the American Statistical Association. 62.319, 819-41. 17 | """ 18 | 19 | DESCRSHORT = """""" 20 | 21 | DESCRLONG = """The Longley dataset contains various US macroeconomic 22 | variables that are known to be highly collinear. It has been used to appraise 23 | the accuracy of least squares routines.""" 24 | 25 | NOTE = """ 26 | Number of Observations - 16 27 | 28 | Number of Variables - 6 29 | 30 | Variable name definitions:: 31 | 32 | TOTEMP - Total Employment 33 | GNPDEFL - GNP deflator 34 | GNP - GNP 35 | UNEMP - Number of unemployed 36 | ARMED - Size of armed forces 37 | POP - Population 38 | YEAR - Year (1947 - 1962) 39 | """ 40 | 41 | from numpy import recfromtxt, array, column_stack 42 | import gwstatsmodels.tools.datautils as du 43 | from os.path import dirname, abspath 44 | 45 | def load(): 46 | """ 47 | Load the Longley data and return a Dataset class. 48 | 49 | Returns 50 | ------- 51 | Dataset instance 52 | See DATASET_PROPOSAL.txt for more information. 53 | """ 54 | data = _get_data() 55 | return du.process_recarray(data, endog_idx=0, dtype=float) 56 | 57 | def load_pandas(): 58 | """ 59 | Load the Longley data and return a Dataset class. 60 | 61 | Returns 62 | ------- 63 | Dataset instance 64 | See DATASET_PROPOSAL.txt for more information. 65 | """ 66 | data = _get_data() 67 | return du.process_recarray_pandas(data, endog_idx=0) 68 | 69 | def _get_data(): 70 | filepath = dirname(abspath(__file__)) 71 | data = recfromtxt(open(filepath+'/longley.csv',"rb"), delimiter=",", 72 | names=True, dtype=float, usecols=(1,2,3,4,5,6,7)) 73 | return data 74 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/longley/longley.csv: -------------------------------------------------------------------------------- 1 | "Obs","TOTEMP","GNPDEFL","GNP","UNEMP","ARMED","POP","YEAR" 2 | 1,60323,83,234289,2356,1590,107608,1947 3 | 2,61122,88.5,259426,2325,1456,108632,1948 4 | 3,60171,88.2,258054,3682,1616,109773,1949 5 | 4,61187,89.5,284599,3351,1650,110929,1950 6 | 5,63221,96.2,328975,2099,3099,112075,1951 7 | 6,63639,98.1,346999,1932,3594,113270,1952 8 | 7,64989,99,365385,1870,3547,115094,1953 9 | 8,63761,100,363112,3578,3350,116219,1954 10 | 9,66019,101.2,397469,2904,3048,117388,1955 11 | 10,67857,104.6,419180,2822,2857,118734,1956 12 | 11,68169,108.4,442769,2936,2798,120445,1957 13 | 12,66513,110.8,444546,4681,2637,121950,1958 14 | 13,68655,112.6,482704,3813,2552,123366,1959 15 | 14,69564,114.2,502601,3931,2514,125368,1960 16 | 15,69331,115.7,518173,4806,2572,127852,1961 17 | 16,70551,116.9,554894,4007,2827,130081,1962 18 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/macrodata/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/macrodata/macrodata.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/datasets/macrodata/macrodata.dta -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/macrodata/src/macrodata.xls/macrodata.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/datasets/macrodata/src/macrodata.xls/macrodata.xls -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/nile/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/nile/data.py: -------------------------------------------------------------------------------- 1 | """Name of dataset.""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | COPYRIGHT = """E.g., This is public domain.""" 6 | TITLE = """Title of the dataset""" 7 | SOURCE = """ 8 | This section should provide a link to the original dataset if possible and 9 | attribution and correspondance information for the dataset's original author 10 | if so desired. 11 | """ 12 | 13 | DESCRSHORT = """A short description.""" 14 | 15 | DESCRLONG = """A longer description of the dataset.""" 16 | 17 | #suggested notes 18 | NOTE = """ 19 | Number of observations: 20 | Number of variables: 21 | Variable name definitions: 22 | 23 | Any other useful information that does not fit into the above categories. 24 | """ 25 | 26 | from numpy import recfromtxt, column_stack, array 27 | from pandas import Series, DataFrame 28 | 29 | from gwstatsmodels.tools import Dataset 30 | from os.path import dirname, abspath 31 | 32 | def load(): 33 | """ 34 | Load the Nile data and return a Dataset class instance. 35 | 36 | Returns 37 | ------- 38 | Dataset instance: 39 | See DATASET_PROPOSAL.txt for more information. 40 | """ 41 | data = _get_data() 42 | names = list(data.dtype.names) 43 | endog_name = 'volume' 44 | endog = array(data[endog_name], dtype=float) 45 | dataset = Dataset(data=data, names=[endog_name], endog=endog, 46 | endog_name=endog_name) 47 | return dataset 48 | 49 | def load_pandas(): 50 | data = DataFrame(_get_data()) 51 | # TODO: time series 52 | endog = Series(data['volume'], index=data['year'].astype(int)) 53 | dataset = Dataset(data=data, names=list(data.columns), 54 | endog=endog, endog_name='volume') 55 | return dataset 56 | 57 | def _get_data(): 58 | filepath = dirname(abspath(__file__)) 59 | data = recfromtxt(open(filepath + '/nile.csv', 'rb'), delimiter=",", 60 | names=True, dtype=float) 61 | return data 62 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/nile/nile.csv: -------------------------------------------------------------------------------- 1 | year,volume 2 | 1871,1120 3 | 1872,1160 4 | 1873,963 5 | 1874,1210 6 | 1875,1160 7 | 1876,1160 8 | 1877,813 9 | 1878,1230 10 | 1879,1370 11 | 1880,1140 12 | 1881,995 13 | 1882,935 14 | 1883,1110 15 | 1884,994 16 | 1885,1020 17 | 1886,960 18 | 1887,1180 19 | 1888,799 20 | 1889,958 21 | 1890,1140 22 | 1891,1100 23 | 1892,1210 24 | 1893,1150 25 | 1894,1250 26 | 1895,1260 27 | 1896,1220 28 | 1897,1030 29 | 1898,1100 30 | 1899,774 31 | 1900,840 32 | 1901,874 33 | 1902,694 34 | 1903,940 35 | 1904,833 36 | 1905,701 37 | 1906,916 38 | 1907,692 39 | 1908,1020 40 | 1909,1050 41 | 1910,969 42 | 1911,831 43 | 1912,726 44 | 1913,456 45 | 1914,824 46 | 1915,702 47 | 1916,1120 48 | 1917,1100 49 | 1918,832 50 | 1919,764 51 | 1920,821 52 | 1921,768 53 | 1922,845 54 | 1923,864 55 | 1924,862 56 | 1925,698 57 | 1926,845 58 | 1927,744 59 | 1928,796 60 | 1929,1040 61 | 1930,759 62 | 1931,781 63 | 1932,865 64 | 1933,845 65 | 1934,944 66 | 1935,984 67 | 1936,897 68 | 1937,822 69 | 1938,1010 70 | 1939,771 71 | 1940,676 72 | 1941,649 73 | 1942,846 74 | 1943,812 75 | 1944,742 76 | 1945,801 77 | 1946,1040 78 | 1947,860 79 | 1948,874 80 | 1949,848 81 | 1950,890 82 | 1951,744 83 | 1952,749 84 | 1953,838 85 | 1954,1050 86 | 1955,918 87 | 1956,986 88 | 1957,797 89 | 1958,923 90 | 1959,975 91 | 1960,815 92 | 1961,1020 93 | 1962,906 94 | 1963,901 95 | 1964,1170 96 | 1965,912 97 | 1966,746 98 | 1967,919 99 | 1968,718 100 | 1969,714 101 | 1970,740 102 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/randhie/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/scotland/R_scotvote.s: -------------------------------------------------------------------------------- 1 | ### SETUP ### 2 | d <- read.table("./scotvote.csv",sep=",", header=T) 3 | attach(d) 4 | 5 | ### MODEL ### 6 | m1 <- glm(YES ~ COUTAX * UNEMPF + MOR + ACT + GDP + AGE, 7 | family=Gamma) 8 | results <- summary.glm(m1) 9 | results 10 | results['coefficients'] 11 | logLik(m1) 12 | scale <- results$disp 13 | Y <- YES 14 | mu <- m1$fitted 15 | llf <- -1/scale * sum(Y/mu+log(mu)+(scale-1)*log(Y)+log(scale)+scale*lgamma(1/scale)) 16 | print(llf) 17 | print("This is the llf calculated with the formula") 18 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/scotland/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/scotland/scotvote.csv: -------------------------------------------------------------------------------- 1 | "COUNCILDIST","YES","COUTAX","UNEMPF","MOR","ACT","GDP","AGE","COUTAX_FEMALEUNEMP" 2 | "Aberdeen_City",60.3,712,21,105,82.4,13566,12.3,14952 3 | "Aberdeenshire",52.3,643,26.5,97,80.2,13566,15.3,17039.5 4 | "Angus",53.4,679,28.3,113,86.3,9611,13.9,19215.7 5 | "Argyll_and_Bute",57,801,27.1,109,80.4,9483,13.6,21707.1 6 | "Clackmannanshire",68.7,753,22,115,64.7,9265,14.6,16566 7 | "Dumfries_and_Galloway",48.8,714,24.3,107,79,9555,13.8,17350.2 8 | "Dundee_City",65.5,920,21.2,118,72.2,9611,13.3,19504 9 | "East_Ayrshire",70.5,779,20.5,114,75.2,9483,14.5,15969.5 10 | "East_Dunbartonshire",59.1,771,23.2,102,81.1,9483,14.2,17887.2 11 | "East_Lothian",62.7,724,20.5,112,80.3,12656,13.7,14842 12 | "East_Renfrewshire",51.6,682,23.8,96,83,9483,14.6,16231.6 13 | "Edinburgh_City",62,837,22.1,111,74.5,12656,11.6,18497.7 14 | "Eilean_Siar_(Western_Isles)",68.4,599,19.9,117,83.8,8298,15.1,11920.1 15 | "Falkirk",69.2,680,21.5,121,77.6,9265,13.7,14620 16 | "Fife",64.7,747,22.5,109,77.9,8314,14.4,16807.5 17 | "Glasgow_City",75,982,19.4,137,65.3,9483,13.3,19050.8 18 | "Highland",62.1,719,25.9,109,80.9,8298,14.9,18622.1 19 | "Inverclyde",67.2,831,18.5,138,80.2,9483,14.6,15373.5 20 | "Midlothian",67.7,858,19.4,119,84.8,12656,14.3,16645.2 21 | "Moray",52.7,652,27.2,108,86.4,13566,14.6,17734.4 22 | "North_Ayrshire",65.7,718,23.7,115,73.5,9483,15,17016.6 23 | "North_Lanarkshire",72.2,787,20.8,126,74.7,9483,14.9,16369.6 24 | "Orkney_Islands",47.4,515,26.8,106,87.8,8298,15.3,13802 25 | "Perth_and_Kinross",51.3,732,23,103,86.6,9611,13.8,16836 26 | "Renfrewshire",63.6,783,20.5,125,78.5,9483,14.1,16051.5 27 | "Scottish_Borders_The",50.7,612,23.7,100,80.6,9033,13.3,14504.4 28 | "Shetland_Islands",51.6,486,23.2,117,84.8,8298,15.9,11275.2 29 | "South_Ayrshire",56.2,765,23.6,105,79.2,9483,13.7,18054 30 | "South_Lanarkshire",67.6,793,21.7,125,78.4,9483,14.5,17208.1 31 | "Stirling",58.9,776,23,110,77.2,9265,13.6,17848 32 | "West_Dunbartonshire",74.7,978,19.3,130,71.5,9483,15.3,18875.4 33 | "West_Lothian",67.3,792,21.2,126,82.2,12656,15.1,16790.4 34 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/scotland/src/scotland.readme: -------------------------------------------------------------------------------- 1 | ######################################################################################################### 2 | # # 3 | # This archive is part of the free distribution of data and statistical software code for # 4 | # "Generalized Linear Models: A Unified Approach", Jeff Gill, Sage QASS Series. You are # 5 | # free to use, modify, distribute, publish, etc. provided attribution. Please forward # 6 | # bugs, complaints, comments, and useful changes to: jgill@latte.harvard.edu. # 7 | # # 8 | ######################################################################################################### 9 | 10 | Electoral Politics in Scotland. These data are from the 1997 vote that established a Scottish 11 | Parliament with taxing powers. The data are culled from several different official UK documents 12 | provided by the Office for National Statistics, the General Register Office for Scotland, the 13 | Scottish Office: Education and Industry Department, the Scottish Department for Education 14 | and Employment, The Scottish Office Office: Development Department, and David Boothroyd (thank you). 15 | The files in this zip archive are: 16 | 17 | scotland.readme this file 18 | scotvote.dat the data file with a header indicating 19 | 20 | scotland_births.html 21 | scotland_changes.html 22 | scotland_devolution.html 23 | scotland_econ_summary.html 24 | scotland_economics.html 25 | scotland_education.html 26 | scotland_housing.html 27 | scotland_population.html these are html files with various details on the variables included. 28 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/scotland/src/scotland_econ_summary.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/datasets/scotland/src/scotland_econ_summary.html -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/scotland/src/scotland_economics.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/datasets/scotland/src/scotland_economics.html -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/scotland/src/scotland_housing.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/datasets/scotland/src/scotland_housing.html -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/spector/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/spector/data.py: -------------------------------------------------------------------------------- 1 | """Spector and Mazzeo (1980) - Program Effectiveness Data""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | COPYRIGHT = """Used with express permission of the original author, who 6 | retains all rights. """ 7 | TITLE = __doc__ 8 | SOURCE = """ 9 | http://pages.stern.nyu.edu/~wgreene/Text/econometricanalysis.htm 10 | 11 | The raw data was downloaded from Bill Greene's Econometric Analysis web site, 12 | though permission was obtained from the original researcher, Dr. Lee Spector, 13 | Professor of Economics, Ball State University.""" 14 | 15 | DESCRSHORT = """Experimental data on the effectiveness of the personalized 16 | system of instruction (PSI) program""" 17 | 18 | DESCRLONG = DESCRSHORT 19 | 20 | NOTE = """ 21 | Number of Observations - 32 22 | 23 | Number of Variables - 4 24 | 25 | Variable name definitions:: 26 | 27 | Grade - binary variable indicating whether or not a student's grade 28 | improved. 1 indicates an improvement. 29 | TUCE - Test score on economics test 30 | PSI - participation in program 31 | GPA - Student's grade point average 32 | """ 33 | 34 | import numpy as np 35 | import gwstatsmodels.tools.datautils as du 36 | from os.path import dirname, abspath 37 | 38 | def load(): 39 | """ 40 | Load the Spector dataset and returns a Dataset class instance. 41 | 42 | Returns 43 | ------- 44 | Dataset instance: 45 | See DATASET_PROPOSAL.txt for more information. 46 | """ 47 | data = _get_data() 48 | return du.process_recarray(data, endog_idx=3, dtype=float) 49 | 50 | def load_pandas(): 51 | """ 52 | Load the Spector dataset and returns a Dataset class instance. 53 | 54 | Returns 55 | ------- 56 | Dataset instance: 57 | See DATASET_PROPOSAL.txt for more information. 58 | """ 59 | data = _get_data() 60 | return du.process_recarray_pandas(data, endog_idx=3, dtype=float) 61 | 62 | def _get_data(): 63 | filepath = dirname(abspath(__file__)) 64 | ##### EDIT THE FOLLOWING TO POINT TO DatasetName.csv ##### 65 | data = np.recfromtxt(open(filepath + '/spector.csv',"rb"), delimiter=" ", 66 | names=True, dtype=float, usecols=(1,2,3,4)) 67 | return data 68 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/spector/spector.csv: -------------------------------------------------------------------------------- 1 | 'OBS' 'GPA' 'TUCE' 'PSI' 'GRADE' 2 | 1 2.66 20 0 0 3 | 2 2.89 22 0 0 4 | 3 3.28 24 0 0 5 | 4 2.92 12 0 0 6 | 5 4 21 0 1 7 | 6 2.86 17 0 0 8 | 7 2.76 17 0 0 9 | 8 2.87 21 0 0 10 | 9 3.03 25 0 0 11 | 10 3.92 29 0 1 12 | 11 2.63 20 0 0 13 | 12 3.32 23 0 0 14 | 13 3.57 23 0 0 15 | 14 3.26 25 0 1 16 | 15 3.53 26 0 0 17 | 16 2.74 19 0 0 18 | 17 2.75 25 0 0 19 | 18 2.83 19 0 0 20 | 19 3.12 23 1 0 21 | 20 3.16 25 1 1 22 | 21 2.06 22 1 0 23 | 22 3.62 28 1 1 24 | 23 2.89 14 1 0 25 | 24 3.51 26 1 0 26 | 25 3.54 24 1 1 27 | 26 2.83 27 1 1 28 | 27 3.39 17 1 1 29 | 28 2.67 24 1 0 30 | 29 3.65 21 1 1 31 | 30 4 23 1 1 32 | 31 3.1 21 1 0 33 | 32 2.39 19 1 1 34 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/stackloss/R_stackloss.s: -------------------------------------------------------------------------------- 1 | ### SETUP ### 2 | d <- read.table("./stackloss.csv",sep=",", header=T) 3 | attach(d) 4 | library(MASS) 5 | 6 | 7 | m1 <- rlm(STACKLOSS ~ AIRFLOW + WATERTEMP + ACIDCONC) # psi.huber default 8 | 9 | m2 <- rlm(STACKLOSS ~ AIRFLOW + WATERTEMP + ACIDCONC, psi = psi.hampel, init = "lts") 10 | 11 | m3 <- rlm(STACKLOSS ~ AIRFLOW + WATERTEMP + ACIDCONC, psi = psi.bisquare) 12 | 13 | results1 <- summary(m1) 14 | 15 | results2 <- summary(m2) 16 | 17 | results3 <- summary(m3) 18 | 19 | m4 <- rlm(STACKLOSS ~ AIRFLOW + WATERTEMP + ACIDCONC, scale.est="Huber") # psi.huber default 20 | 21 | m5 <- rlm(STACKLOSS ~ AIRFLOW + WATERTEMP + ACIDCONC, scale.est="Huber", psi = psi.hampel, init = "lts") 22 | 23 | m6 <- rlm(STACKLOSS ~ AIRFLOW + WATERTEMP + ACIDCONC, scale.est="Huber", psi = psi.bisquare) 24 | 25 | results4 <- summary(m4) 26 | 27 | results5 <- summary(m5) 28 | 29 | results6 <- summary(m6) 30 | 31 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/stackloss/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/stackloss/data.py: -------------------------------------------------------------------------------- 1 | """Stack loss data""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | COPYRIGHT = """This is public domain. """ 6 | TITLE = __doc__ 7 | SOURCE = """ 8 | Brownlee, K. A. (1965), "Statistical Theory and Methodology in 9 | Science and Engineering", 2nd edition, New York:Wiley. 10 | """ 11 | 12 | DESCRSHORT = """Stack loss plant data of Brownlee (1965)""" 13 | 14 | DESCRLONG = """The stack loss plant data of Brownlee (1965) contains 15 | 21 days of measurements from a plant's oxidation of ammonia to nitric acid. 16 | The nitric oxide pollutants are captured in an absorption tower.""" 17 | 18 | NOTE = """ 19 | Number of Observations - 21 20 | 21 | Number of Variables - 4 22 | 23 | Variable name definitions:: 24 | 25 | STACKLOSS - 10 times the percentage of ammonia going into the plant that 26 | escapes from the absoroption column 27 | AIRFLOW - Rate of operation of the plant 28 | WATERTEMP - Cooling water temperature in the absorption tower 29 | ACIDCONC - Acid concentration of circulating acid minus 50 times 10. 30 | """ 31 | 32 | from numpy import recfromtxt, column_stack, array 33 | import gwstatsmodels.tools.datautils as du 34 | from os.path import dirname, abspath 35 | 36 | def load(): 37 | """ 38 | Load the stack loss data and returns a Dataset class instance. 39 | 40 | Returns 41 | -------- 42 | Dataset instance: 43 | See DATASET_PROPOSAL.txt for more information. 44 | """ 45 | data = _get_data() 46 | return du.process_recarray(data, endog_idx=0, dtype=float) 47 | 48 | def load_pandas(): 49 | """ 50 | Load the stack loss data and returns a Dataset class instance. 51 | 52 | Returns 53 | -------- 54 | Dataset instance: 55 | See DATASET_PROPOSAL.txt for more information. 56 | """ 57 | data = _get_data() 58 | return du.process_recarray_pandas(data, endog_idx=0, dtype=float) 59 | 60 | def _get_data(): 61 | filepath = dirname(abspath(__file__)) 62 | data = recfromtxt(open(filepath + '/stackloss.csv',"rb"), delimiter=",", 63 | names=True, dtype=float) 64 | return data 65 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/stackloss/stackloss.csv: -------------------------------------------------------------------------------- 1 | "STACKLOSS","AIRFLOW","WATERTEMP","ACIDCONC" 2 | 42,80,27,89 3 | 37,80,27,88 4 | 37,75,25,90 5 | 28,62,24,87 6 | 18,62,22,87 7 | 18,62,23,87 8 | 19,62,24,93 9 | 20,62,24,93 10 | 15,58,23,87 11 | 14,58,18,80 12 | 14,58,18,89 13 | 13,58,17,88 14 | 11,58,18,82 15 | 12,58,19,93 16 | 8,50,18,89 17 | 7,50,18,86 18 | 8,50,19,72 19 | 8,50,19,79 20 | 9,50,20,80 21 | 15,56,20,82 22 | 15,70,20,91 23 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/star98/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/star98/r_glm.s: -------------------------------------------------------------------------------- 1 | ### SETUP 2 | #star.data <- as.matrix(read.csv("./star98.csv",header=T)) 3 | #star.factors3 <- data.frame( LOWINC=star.data[,3], PERASIAN=star.data[,4], PERBLACK=star.data[,5], 4 | # PERHISP=star.data[,6], PERMINTE=star.data[,7], AVYRSEXP=star.data[,8], AVSAL=star.data[,9], 5 | # PERSPEN=star.data[,10], PTRATIO=star.data[,11], PCTAF=star.data[,12], PCTCHRT=star.data[,13], 6 | # PCTYRRND=star.data[,14], PERMINTE.AVYRSEXP=star.data[,15], PERMINTE.AVSAL=star.data[,16], 7 | # AVYRSEXP.AVSAL=star.data[,17], PERSPEN.PTRATIO=star.data[,18], PERSPEN.PCTAF=star.data[,19], 8 | # PTRATIO.PCTAF=star.data[,20], PERMINTE.AVYRSEXP.AVSAL=star.data[,21], 9 | # PERSPEN.PTRATIO.PCTAF=star.data[,22], MATHTOT=star.data[,1], PR50M=star.data[,2] ) 10 | d <- read.table("./star98.csv", sep=",", header=T) 11 | attach(d) 12 | #attach(star.factors3) 13 | 14 | 15 | ### MATH MODEL 16 | m1 <- glm(cbind(PR50M,MATHTOT-PR50M) ~ LOWINC + PERASIAN + PERBLACK + PERHISP + 17 | PERMINTE + AVYRSEXP + AVSALK + PERSPENK + PTRATIO + PCTAF + PCTCHRT + PCTYRRND + 18 | PERMINTE_AVYRSEXP + PERMINTE_AVSAL + AVYRSEXP_AVSAL + PERSPEN_PTRATIO + PERSPEN_PCTAF + 19 | PTRATIO_PCTAF + PERMINTE_AVYRSEXP_AVSAL + PERSPEN_PTRATIO_PCTAF, 20 | family=binomial) 21 | #as.numeric(m1$coef) 22 | #as.numeric(sqrt(diag(vcov(m1)))) 23 | results <- summary.glm(m1) 24 | 25 | #star.logit.fit3 <- glm(cbind(PR50M,MATHTOT-PR50M) ~ LOWINC + PERASIAN + PERBLACK + PERHISP + 26 | # PERMINTE + AVYRSEXP + AVSAL + PERSPEN + PTRATIO + PCTAF + PCTCHRT + PCTYRRND + 27 | # PERMINTE.AVYRSEXP + PERMINTE.AVSAL + AVYRSEXP.AVSAL + PERSPEN.PTRATIO + PERSPEN.PCTAF + 28 | # PTRATIO.PCTAF + PERMINTE.AVYRSEXP.AVSAL + PERSPEN.PTRATIO.PCTAF, 29 | # family = binomial(), data=star.factors3) 30 | #results <- summary.glm(star.logit.fit3) 31 | # WITH R STYLE INTERACTIONS 32 | #star.logit.fit4 <- glm(cbind(PR50M,MATHTOT-PR50M) ~ LOWINC + PERASIAN + PERBLACK + PERHISP + 33 | # PERMINTE + AVYRSEXP + AVSAL + PERSPEN + PTRATIO + PCTAF + PCTCHRT + PCTYRRND + 34 | # PERMINTE*AVYRSEXP*AVSAL + PERSPEN*PTRATIO*PCTAF, 35 | # family = binomial(), data=star.factors3) 36 | 37 | 38 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/strikes/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/strikes/data.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """U.S. Strike Duration Data""" 3 | 4 | __docformat__ = 'restructuredtext' 5 | 6 | COPYRIGHT = """This is public domain.""" 7 | TITLE = __doc__ 8 | SOURCE = """ 9 | This is a subset of the data used in Kennan (1985). It was originally 10 | published by the Bureau of Labor Statistics. 11 | 12 | :: 13 | 14 | Kennan, J. 1985. "The duration of contract strikes in US manufacturing. 15 | `Journal of Econometrics` 28.1, 5-28. 16 | """ 17 | 18 | DESCRSHORT = """Contains data on the length of strikes in US manufacturing and 19 | unanticipated industrial production.""" 20 | 21 | DESCRLONG = """Contains data on the length of strikes in US manufacturing and 22 | unanticipated industrial production. The data is a subset of the data originally 23 | used by Kennan. The data here is data for the months of June only to avoid 24 | seasonal issues.""" 25 | 26 | #suggested notes 27 | NOTE = """ 28 | Number of observations - 62 29 | 30 | Number of variables - 2 31 | 32 | Variable name definitions:: 33 | 34 | duration - duration of the strike in days 35 | iprod - unanticipated industrial production 36 | """ 37 | 38 | from numpy import recfromtxt, column_stack, array 39 | import gwstatsmodels.tools.datautils as du 40 | from os.path import dirname, abspath 41 | 42 | def load(): 43 | """ 44 | Load the strikes data and return a Dataset class instance. 45 | 46 | Returns 47 | ------- 48 | Dataset instance: 49 | See DATASET_PROPOSAL.txt for more information. 50 | """ 51 | data = _get_data() 52 | return du.process_recarray(data, endog_idx=0, dtype=float) 53 | 54 | def load_pandas(): 55 | """ 56 | Load the strikes data and return a Dataset class instance. 57 | 58 | Returns 59 | ------- 60 | Dataset instance: 61 | See DATASET_PROPOSAL.txt for more information. 62 | """ 63 | data = _get_data() 64 | return du.process_recarray_pandas(data, endog_idx=0, dtype=float) 65 | 66 | def _get_data(): 67 | filepath = dirname(abspath(__file__)) 68 | data = recfromtxt(open(filepath + '/strikes.csv', 'rb'), delimiter=",", 69 | names=True, dtype=float) 70 | return data 71 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/strikes/strikes.csv: -------------------------------------------------------------------------------- 1 | duration, iprod 2 | 7, .01138 3 | 9, .01138 4 | 13, .01138 5 | 14, .01138 6 | 26, .01138 7 | 29, .01138 8 | 52, .01138 9 | 130, .01138 10 | 9, .02299 11 | 37, .02299 12 | 41, .02299 13 | 49, .02299 14 | 52, .02299 15 | 119, .02299 16 | 3, -.03957 17 | 17, -.03957 18 | 19, -.03957 19 | 28, -.03957 20 | 72, -.03957 21 | 99, -.03957 22 | 104, -.03957 23 | 114, -.03957 24 | 152, -.03957 25 | 153, -.03957 26 | 216, -.03957 27 | 15, -.05467 28 | 61, -.05467 29 | 98, -.05467 30 | 2, .00535 31 | 25, .00535 32 | 85, .00535 33 | 3, .07427 34 | 10, .07427 35 | 1, .06450 36 | 2, .06450 37 | 2, .06450 38 | 3, .06450 39 | 3, .06450 40 | 4, .06450 41 | 8, .06450 42 | 11, .06450 43 | 22, .06450 44 | 23, .06450 45 | 27, .06450 46 | 32, .06450 47 | 33, .06450 48 | 35, .06450 49 | 43, .06450 50 | 43, .06450 51 | 44, .06450 52 | 100, .06450 53 | 5, -.10443 54 | 49, -.10443 55 | 2, -.00700 56 | 12, -.00700 57 | 12, -.00700 58 | 21, -.00700 59 | 21, -.00700 60 | 27, -.00700 61 | 38, -.00700 62 | 42, -.00700 63 | 117, -.00700 64 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/sunspots/R_sunspots.s: -------------------------------------------------------------------------------- 1 | d <- read.table('./sunspots.csv', sep=',', header=T) 2 | attach(d) 3 | 4 | mod_ols <- ar(SUNACTIVITY, aic=FALSE, order.max=9, method="ols", intercept=FALSE) 5 | mod_yw <- ar(SUNACTIVITY, aic=FALSE, order.max=9, method="yw") 6 | mod_burg <- ar(SUNACTIVITY, aic=FALSE, order.max=9, method="burg") 7 | mod_mle <- ar(SUNACTIVITY, aic=FALSE, order.max=9, method="mle") 8 | 9 | select_ols <- ar(SUNACTIVITY, aic=TRUE, method="ols") 10 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/sunspots/__init__.py: -------------------------------------------------------------------------------- 1 | from data import * 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/sunspots/arima_mod.R: -------------------------------------------------------------------------------- 1 | dta <- read.csv('./sunspots.csv') 2 | attach(dta) 3 | arma_mod <- arima(SUNACTIVITY, order=c(9,0,0), xreg=rep(1,309), include.mean=FALSE) 4 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/sunspots/data.py: -------------------------------------------------------------------------------- 1 | """Yearly sunspots data 1700-2008""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | COPYRIGHT = """This data is public domain.""" 6 | TITLE = __doc__ 7 | SOURCE = """ 8 | http://www.ngdc.noaa.gov/stp/SOLAR/ftpsunspotnumber.html 9 | 10 | The original dataset contains monthly data on sunspot activity in the file 11 | ./src/sunspots_yearly.dat. There is also sunspots_monthly.dat. 12 | """ 13 | 14 | DESCRSHORT = """Yearly (1700-2008) data on sunspots from the National 15 | Geophysical Data Center.""" 16 | 17 | DESCRLONG = DESCRSHORT 18 | 19 | NOTE = """ 20 | Number of Observations - 309 (Annual 1700 - 2008) 21 | Number of Variables - 1 22 | Variable name definitions:: 23 | 24 | SUNACTIVITY - Number of sunspots for each year 25 | 26 | The data file contains a 'YEAR' variable that is not returned by load. 27 | """ 28 | 29 | from numpy import recfromtxt, column_stack, array 30 | from pandas import Series, DataFrame 31 | 32 | from gwstatsmodels.tools import Dataset 33 | from os.path import dirname, abspath 34 | 35 | def load(): 36 | """ 37 | Load the yearly sunspot data and returns a data class. 38 | 39 | Returns 40 | -------- 41 | Dataset instance: 42 | See DATASET_PROPOSAL.txt for more information. 43 | 44 | Notes 45 | ----- 46 | This dataset only contains data for one variable, so the attributes 47 | data, raw_data, and endog are all the same variable. There is no exog 48 | attribute defined. 49 | """ 50 | data = _get_data() 51 | endog_name = 'SUNACTIVITY' 52 | endog = array(data[endog_name], dtype=float) 53 | dataset = Dataset(data=data, names=[endog_name], endog=endog, 54 | endog_name=endog_name) 55 | return dataset 56 | 57 | def load_pandas(): 58 | data = DataFrame(_get_data()) 59 | # TODO: time series 60 | endog = Series(data['SUNACTIVITY'], index=data['YEAR'].astype(int)) 61 | dataset = Dataset(data=data, names=list(data.columns), 62 | endog=endog, endog_name='volume') 63 | return dataset 64 | 65 | def _get_data(): 66 | filepath = dirname(abspath(__file__)) 67 | data = recfromtxt(open(filepath + '/sunspots.csv', 'rb'), delimiter=",", 68 | names=True, dtype=float) 69 | return data 70 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/datasets/template_data.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | """Name of dataset.""" 4 | 5 | __docformat__ = 'restructuredtext' 6 | 7 | COPYRIGHT = """E.g., This is public domain.""" 8 | TITLE = """Title of the dataset""" 9 | SOURCE = """ 10 | This section should provide a link to the original dataset if possible and 11 | attribution and correspondance information for the dataset's original author 12 | if so desired. 13 | """ 14 | 15 | DESCRSHORT = """A short description.""" 16 | 17 | DESCRLONG = """A longer description of the dataset.""" 18 | 19 | #suggested notes 20 | NOTE = """ 21 | Number of observations: 22 | Number of variables: 23 | Variable name definitions: 24 | 25 | Any other useful information that does not fit into the above categories. 26 | """ 27 | 28 | import numpy as np 29 | from gwstatsmodels.tools import datautils as du 30 | from os.path import dirname, abspath 31 | 32 | def load(): 33 | """ 34 | Load the data and return a Dataset class instance. 35 | 36 | Returns 37 | ------- 38 | Dataset instance: 39 | See DATASET_PROPOSAL.txt for more information. 40 | """ 41 | data = _get_data() 42 | ##### SET THE INDICES ##### 43 | #NOTE: None for exog_idx is the complement of endog_idx 44 | return du.process_recarray(data, endog_idx=0, exog_idx=None, dtype=float) 45 | 46 | def load_pandas(): 47 | data = _get_data() 48 | ##### SET THE INDICES ##### 49 | #NOTE: None for exog_idx is the complement of endog_idx 50 | return du.process_recarray_pandas(data, endog_idx=0, exog_idx=None, 51 | dtype=float) 52 | 53 | def _get_data(): 54 | filepath = dirname(abspath(__file__)) 55 | ##### EDIT THE FOLLOWING TO POINT TO DatasetName.csv ##### 56 | data = np.recfromtxt(open(filepath + '/DatasetName.csv', 'rb'), 57 | delimiter=",", names = True, dtype=float) 58 | return data 59 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/discrete/__init__.py: -------------------------------------------------------------------------------- 1 | from gwstatsmodels import NoseWrapper as Tester 2 | test = Tester().test 3 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/discrete/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/discrete/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/discrete/tests/results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/discrete/tests/results/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/discrete/tests/results/nbinom_resids.csv: -------------------------------------------------------------------------------- 1 | -.5049508,-.6297218,-83.54991,-1.306285,-6.620286 2 | -.2341522,-.2554235,-216.8307,-.758866,-7.183702 3 | 1.024237,.7987758,483.7363,2.503515,22.51353 4 | -.2850617,-.3177966,-70411.51,-2.379918,-141.7456 5 | .2099025,.1967877,2247.514,.9519455,21.77242 6 | -.4034835,-.4757415,-19563.36,-2.635026,-88.94614 7 | -.1644134,-.1744011,-17331.03,-1.162355,-53.42135 8 | -.4296077,-.5134667,-5300.37,-2.244962,-47.82603 9 | .323713,.2941846,4110.794,1.486844,36.55984 10 | .1503672,.1434294,7285.321,.8855429,33.1355 11 | .4212886,.373428,1373.157,1.521332,24.15702 12 | .4506587,.3965867,1701.469,1.661779,27.80326 13 | .2435375,.226174,3184.023,1.136562,27.90734 14 | 1.051829,.8162054,6001.352,3.890797,79.71313 15 | -.5544503,-.712749,-2094.852,-2.454965,-34.21899 16 | -.6057506,-.8064111,-274.7382,-1.907744,-13.05105 17 | -.3412157,-.3902446,-631.138,-1.270229,-14.76001 18 | .2218985,.2073287,691.1358,.8168764,12.43929 19 | .2455925,.2266392,.1992506,.2579483,.2747237 20 | -.7589526,-1.153008,-256.739,-2.407166,-14.14742 21 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | from empirical_distribution import ECDF, monotone_fn_inverter, StepFunction 2 | 3 | from gwstatsmodels import NoseWrapper as Tester 4 | test = Tester().test 5 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/distributions/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/distributions/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/distributions/tests/test_ecdf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.testing as npt 3 | from gwstatsmodels.distributions import StepFunction, monotone_fn_inverter 4 | 5 | class TestDistributions(npt.TestCase): 6 | 7 | def test_StepFunction(self): 8 | x = np.arange(20) 9 | y = np.arange(20) 10 | f = StepFunction(x, y) 11 | npt.assert_almost_equal(f( np.array([[3.2,4.5],[24,-3.1],[3.0, 4.0]])), 12 | [[ 3, 4], [19, 0], [2, 3]]) 13 | 14 | def test_StepFunctionBadShape(self): 15 | x = np.arange(20) 16 | y = np.arange(21) 17 | self.assertRaises(ValueError, StepFunction, x, y) 18 | x = np.zeros((2, 2)) 19 | y = np.zeros((2, 2)) 20 | self.assertRaises(ValueError, StepFunction, x, y) 21 | 22 | def test_StepFunctionValueSideRight(self): 23 | x = np.arange(20) 24 | y = np.arange(20) 25 | f = StepFunction(x, y, side='right') 26 | npt.assert_almost_equal(f( np.array([[3.2,4.5],[24,-3.1],[3.0, 4.0]])), 27 | [[ 3, 4], [19, 0], [3, 4]]) 28 | 29 | def test_StepFunctionRepeatedValues(self): 30 | x = [1, 1, 2, 2, 2, 3, 3, 3, 4, 5] 31 | y = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15] 32 | f = StepFunction(x, y) 33 | npt.assert_almost_equal(f([1, 2, 3, 4, 5]), [0, 7, 10, 13, 14]) 34 | f2 = StepFunction(x, y, side='right') 35 | npt.assert_almost_equal(f2([1, 2, 3, 4, 5]), [7, 10, 13, 14, 15]) 36 | 37 | def test_monotone_fn_inverter(self): 38 | x = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15] 39 | fn = lambda x : 1./x 40 | f = monotone_fn_inverter(fn, x) 41 | npt.assert_array_equal(f.y, x[::1]) 42 | npt.assert_array_equal(f.x, y[::-1]) 43 | 44 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/genmod/__init__.py: -------------------------------------------------------------------------------- 1 | from gwstatsmodels import NoseWrapper as Tester 2 | test = Tester().test 3 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/genmod/families/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This module contains the one-parameter exponential families used 3 | for fitting GLMs and GAMs. 4 | 5 | These families are described in 6 | 7 | P. McCullagh and J. A. Nelder. "Generalized linear models." 8 | Monographs on Statistics and Applied Probability. 9 | Chapman & Hall, London, 1983. 10 | 11 | ''' 12 | 13 | #from gwstatsmodels.family.family import Gaussian, Family, Poisson, Gamma, \ 14 | # InverseGaussian, Binomial, NegativeBinomial 15 | from family import Gaussian, Family, Poisson, Gamma, \ 16 | InverseGaussian, Binomial, NegativeBinomial 17 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/genmod/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/genmod/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/genmod/tests/results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/genmod/tests/results/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/genmod/tests/results/stata_cancer_glm.csv: -------------------------------------------------------------------------------- 1 | studytime,age,drug 2 | 1,61,1 3 | 1,65,1 4 | 2,59,1 5 | 3,52,1 6 | 4,56,1 7 | 4,67,1 8 | 5,63,1 9 | 5,58,1 10 | 8,56,1 11 | 8,58,1 12 | 8,52,1 13 | 8,49,1 14 | 11,50,1 15 | 11,55,1 16 | 12,49,1 17 | 12,62,1 18 | 15,51,1 19 | 17,49,1 20 | 22,57,1 21 | 23,52,1 22 | 6,67,2 23 | 6,65,2 24 | 7,58,2 25 | 9,56,2 26 | 10,49,2 27 | 11,61,2 28 | 13,62,2 29 | 15,50,2 30 | 16,67,2 31 | 19,50,2 32 | 20,55,2 33 | 22,58,2 34 | 23,47,2 35 | 32,52,2 36 | 6,55,3 37 | 10,54,3 38 | 17,60,3 39 | 19,49,3 40 | 24,58,3 41 | 25,50,3 42 | 25,55,3 43 | 28,57,3 44 | 28,48,3 45 | 32,56,3 46 | 33,60,3 47 | 34,62,3 48 | 35,48,3 49 | 39,52,3 50 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/graphics/__init__.py: -------------------------------------------------------------------------------- 1 | from gwstatsmodels import NoseWrapper as Tester 2 | test = Tester().test 3 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/graphics/api.py: -------------------------------------------------------------------------------- 1 | from .functional import fboxplot, rainbowplot 2 | from .gofplots import qqplot 3 | from .boxplots import violinplot, beanplot 4 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/graphics/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/graphics/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/graphics/tests/test_boxplots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import dec 3 | 4 | from gwstatsmodels.graphics.boxplots import violinplot, beanplot 5 | from gwstatsmodels.datasets import anes96 6 | 7 | 8 | try: 9 | import matplotlib.pyplot as plt 10 | have_matplotlib = True 11 | except: 12 | have_matplotlib = False 13 | 14 | 15 | @dec.skipif(not have_matplotlib) 16 | def test_violinplot_beanplot(): 17 | """Test violinplot and beanplot with the same dataset.""" 18 | data = anes96.load_pandas() 19 | party_ID = np.arange(7) 20 | labels = ["Strong Democrat", "Weak Democrat", "Independent-Democrat", 21 | "Independent-Independent", "Independent-Republican", 22 | "Weak Republican", "Strong Republican"] 23 | 24 | age = [data.exog['age'][data.endog == id] for id in party_ID] 25 | 26 | fig = plt.figure() 27 | ax = fig.add_subplot(111) 28 | violinplot(age, ax=ax, labels=labels, 29 | plot_opts={'cutoff_val':5, 'cutoff_type':'abs', 30 | 'label_fontsize':'small', 31 | 'label_rotation':30}) 32 | 33 | plt.close(fig) 34 | 35 | 36 | fig = plt.figure() 37 | ax = fig.add_subplot(111) 38 | beanplot(age, ax=ax, labels=labels, 39 | plot_opts={'cutoff_val':5, 'cutoff_type':'abs', 40 | 'label_fontsize':'small', 41 | 'label_rotation':30}) 42 | 43 | plt.close(fig) 44 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/graphics/tests/test_gofplots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import dec 3 | 4 | import gwstatsmodels.api as sm 5 | from gwstatsmodels.graphics.tsaplots import plotacf 6 | import gwstatsmodels.tsa.arima_process as tsp 7 | 8 | 9 | try: 10 | import matplotlib.pyplot as plt 11 | have_matplotlib = True 12 | except: 13 | have_matplotlib = False 14 | 15 | 16 | @dec.skipif(not have_matplotlib) 17 | def test_plotacf(): 18 | # Just test that it runs. 19 | fig = plt.figure() 20 | ax = fig.add_subplot(111) 21 | 22 | ar = np.r_[1., -0.9] 23 | ma = np.r_[1., 0.9] 24 | armaprocess = tsp.ArmaProcess(ar, ma) 25 | acf = armaprocess.acf(20)[:20] 26 | plotacf(acf, ax=ax) 27 | 28 | plt.close(fig) 29 | 30 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/graphics/tests/test_regressionplots.py: -------------------------------------------------------------------------------- 1 | '''Tests for regressionplots, entire module is skipped 2 | 3 | ''' 4 | 5 | import numpy as np 6 | import nose 7 | 8 | import gwstatsmodels.api as sm 9 | from gwstatsmodels.graphics.regressionplots import (plot_fit, plot_ccpr, 10 | plot_partregress, plot_regress_exog ) 11 | 12 | try: 13 | import matplotlib.pyplot as plt #makes plt available for test functions 14 | have_matplotlib = True 15 | except: 16 | have_matplotlib = False 17 | 18 | def setup(): 19 | if not have_matplotlib: 20 | raise nose.SkipTest('No tests here') 21 | 22 | def teardown_module(): 23 | plt.close('all') 24 | 25 | class TestPlot(object): 26 | 27 | def __init__(self): 28 | self.setup() #temp: for testing without nose 29 | 30 | def setup(self): 31 | nsample = 100 32 | sig = 0.5 33 | x1 = np.linspace(0, 20, nsample) 34 | x2 = 5 + 3* np.random.randn(nsample) 35 | X = np.c_[x1, x2, np.sin(0.5*x1), (x2-5)**2, np.ones(nsample)] 36 | beta = [0.5, 0.5, 1, -0.04, 5.] 37 | y_true = np.dot(X, beta) 38 | y = y_true + sig * np.random.normal(size=nsample) 39 | exog0 = sm.add_constant(np.c_[x1, x2], prepend=False) 40 | res = sm.OLS(y, exog0).fit() 41 | 42 | self.res = res 43 | 44 | def test_plot_fit(self): 45 | res = self.res 46 | 47 | fig = plot_fit(res, 0, y_true=None) 48 | 49 | x0 = res.model.exog[:, 0] 50 | yf = res.fittedvalues 51 | y = res.model.endog 52 | 53 | px1, px2 = fig.axes[0].get_lines()[0].get_data() 54 | np.testing.assert_equal(x0, px1) 55 | np.testing.assert_equal(y, px2) 56 | 57 | px1, px2 = fig.axes[0].get_lines()[1].get_data() 58 | np.testing.assert_equal(x0, px1) 59 | np.testing.assert_equal(yf, px2) 60 | 61 | plt.close(fig) 62 | 63 | def test_plot_oth(self): 64 | #just test that they run 65 | res = self.res 66 | endog = res.model.endog 67 | exog = res.model.exog 68 | 69 | plot_fit(res, 0, y_true=None) 70 | plot_partregress(endog, exog, exog_idx=[0,1]) 71 | plot_regress_exog(res, exog_idx=0) 72 | plot_ccpr(res, exog_idx=[0]) 73 | plot_ccpr(res, exog_idx=[0,1]) 74 | 75 | plt.close('all') 76 | 77 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/graphics/tests/test_tsaplots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import dec 3 | 4 | import gwstatsmodels.api as sm 5 | from gwstatsmodels.graphics.gofplots import qqplot 6 | 7 | 8 | try: 9 | import matplotlib.pyplot as plt 10 | have_matplotlib = True 11 | except: 12 | have_matplotlib = False 13 | 14 | 15 | @dec.skipif(not have_matplotlib) 16 | def test_qqplot(): 17 | #just test that it runs 18 | data = sm.datasets.longley.load() 19 | data.exog = sm.add_constant(data.exog) 20 | mod_fit = sm.OLS(data.endog, data.exog).fit() 21 | res = mod_fit.resid 22 | fig = sm.qqplot(res) 23 | 24 | plt.close(fig) 25 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/graphics/tsaplots.py: -------------------------------------------------------------------------------- 1 | """Correlation plot functions.""" 2 | 3 | 4 | import numpy as np 5 | 6 | from . import utils 7 | 8 | 9 | #copied/moved from sandbox/tsa/example_arma.py 10 | def plotacf(corr, ax=None, lags=None, use_vlines=True, **kwargs): 11 | """ Plot the auto or cross correlation. 12 | 13 | Plots lags on the horizontal and the correlations on vertical axis. 14 | 15 | Parameters 16 | ---------- 17 | corr : array_like 18 | Array of correlation values, used on the vertical axis. 19 | ax : Matplotlib AxesSubplot instance, optional 20 | If given, this subplot is used to plot in instead of a new figure being 21 | created. 22 | lags : array_like, optional 23 | Array of lag values, used on horizontal axis. 24 | If not given, ``lags=np.arange(len(corr))`` is used. 25 | use_vlines : bool, optional 26 | If True, vertical lines and markers are plotted. 27 | If False, only markers are plotted. The default marker is 'o'; it can 28 | be overridden with a ``marker`` kwarg. 29 | **kwargs : kwargs, optional 30 | Optional keyword arguments that are directly passed on to the 31 | Matplotlib ``plot`` and ``axhline`` functions. 32 | 33 | Returns 34 | ------- 35 | fig : Matplotlib figure instance 36 | If `ax` is None, the created figure. Otherwise the figure to which 37 | `ax` is connected. 38 | 39 | See Also 40 | -------- 41 | matplotlib.pyplot.xcorr 42 | matplotlib.pyplot.acorr 43 | mpl_examples/pylab_examples/xcorr_demo.py 44 | 45 | Notes 46 | ----- 47 | Adapted from matplotlib's `xcorr`. 48 | 49 | Data are plotted as ``plot(lags, corr, **kwargs)`` 50 | 51 | """ 52 | fig, ax = utils.create_mpl_ax(ax) 53 | 54 | corr = np.asarray(corr) 55 | if lags is None: 56 | lags = np.arange(len(corr)) 57 | else: 58 | if len(lags) != len(corr): 59 | raise ValueError('lags and corr must be of equal length') 60 | 61 | if use_vlines: 62 | ax.vlines(lags, [0], corr, **kwargs) 63 | ax.axhline(**kwargs) 64 | 65 | kwargs.setdefault('marker', 'o') 66 | kwargs.setdefault('linestyle', 'None') 67 | ax.plot(lags, corr, **kwargs) 68 | 69 | return fig 70 | 71 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/graphics/tukeyplot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib.ticker as mticker 4 | import matplotlib.lines as lines 5 | 6 | 7 | def tukeyplot(results, dim=None, yticklabels=None): 8 | npairs = len(results) 9 | 10 | fig = plt.figure() 11 | fsp = fig.add_subplot(111) 12 | fsp.axis([-50,50,0.5,10.5]) 13 | fsp.set_title('95 % family-wise confidence level') 14 | fsp.title.set_y(1.025) 15 | fsp.set_yticks(np.arange(1,11)) 16 | fsp.set_yticklabels(['V-T','V-S','T-S','V-P','T-P','S-P','V-M', 17 | 'T-M','S-M','P-M']) 18 | #fsp.yaxis.set_major_locator(mticker.MaxNLocator(npairs)) 19 | fsp.yaxis.grid(True, linestyle='-', color='gray') 20 | fsp.set_xlabel('Differences in mean levels of Var', labelpad=8) 21 | fsp.xaxis.tick_bottom() 22 | fsp.yaxis.tick_left() 23 | 24 | xticklines = fsp.get_xticklines() 25 | for xtickline in xticklines: 26 | xtickline.set_marker(lines.TICKDOWN) 27 | xtickline.set_markersize(10) 28 | 29 | xlabels = fsp.get_xticklabels() 30 | for xlabel in xlabels: 31 | xlabel.set_y(-.04) 32 | 33 | yticklines = fsp.get_yticklines() 34 | for ytickline in yticklines: 35 | ytickline.set_marker(lines.TICKLEFT) 36 | ytickline.set_markersize(10) 37 | 38 | ylabels = fsp.get_yticklabels() 39 | for ylabel in ylabels: 40 | ylabel.set_x(-.04) 41 | 42 | for pair in range(npairs): 43 | data = .5+results[pair]/100. 44 | #fsp.axhline(y=npairs-pair, xmin=data[0], xmax=data[1], linewidth=1.25, 45 | fsp.axhline(y=npairs-pair, xmin=data.mean(), xmax=data[1], linewidth=1.25, 46 | color='blue', marker="|", markevery=1) 47 | 48 | fsp.axhline(y=npairs-pair, xmin=data[0], xmax=data.mean(), linewidth=1.25, 49 | color='blue', marker="|", markevery=1) 50 | 51 | #for pair in range(npairs): 52 | # data = .5+results[pair]/100. 53 | # data = results[pair] 54 | # data = np.r_[data[0],data.mean(),data[1]] 55 | # l = plt.plot(data, [npairs-pair]*len(data), color='black', 56 | # linewidth=.5, marker="|", markevery=1) 57 | 58 | fsp.axvline(x=0, linestyle="--", color='black') 59 | 60 | fig.subplots_adjust(bottom=.125) 61 | 62 | 63 | 64 | results = np.array([[-10.04391794, 26.34391794], 65 | [-21.45225794, 14.93557794], 66 | [ 5.61441206, 42.00224794], 67 | [-13.40225794, 22.98557794], 68 | [-29.60225794, 6.78557794], 69 | [ -2.53558794, 33.85224794], 70 | [-21.55225794, 14.83557794], 71 | [ 8.87275206, 45.26058794], 72 | [-10.14391794, 26.24391794], 73 | [-37.21058794, -0.82275206]]) 74 | 75 | 76 | #plt.show() 77 | 78 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/graphics/utils.py: -------------------------------------------------------------------------------- 1 | """Helper functions for graphics with Matplotlib.""" 2 | 3 | 4 | __all__ = ['create_mpl_ax', 'create_mpl_fig'] 5 | 6 | 7 | def _import_mpl(): 8 | """This function is not needed outside this utils module.""" 9 | try: 10 | import matplotlib.pyplot as plt 11 | except: 12 | raise ImportError("Matplotlib is not found.") 13 | 14 | return plt 15 | 16 | 17 | def create_mpl_ax(ax=None): 18 | """Helper function for when a single plot axis is needed. 19 | 20 | Parameters 21 | ---------- 22 | ax : Matplotlib AxesSubplot instance, optional 23 | If given, this subplot is used to plot in instead of a new figure being 24 | created. 25 | 26 | Returns 27 | ------- 28 | fig : Matplotlib figure instance 29 | If `ax` is None, the created figure. Otherwise the figure to which 30 | `ax` is connected. 31 | ax : Matplotlib AxesSubplot instance 32 | The created axis if `ax` is None, otherwise the axis that was passed 33 | in. 34 | 35 | Notes 36 | ----- 37 | This function imports `matplotlib.pyplot`, which should only be done to 38 | create (a) figure(s) with ``plt.figure``. All other functionality exposed 39 | by the pyplot module can and should be imported directly from its 40 | Matplotlib module. 41 | 42 | See Also 43 | -------- 44 | create_mpl_fig 45 | 46 | Examples 47 | -------- 48 | A plotting function has a keyword ``ax=None``. Then calls: 49 | 50 | >>> from gwstatsmodels.graphics import utils 51 | >>> fig, ax = utils.create_mpl_ax(ax) 52 | 53 | """ 54 | if ax is None: 55 | plt = _import_mpl() 56 | fig = plt.figure() 57 | ax = fig.add_subplot(111) 58 | else: 59 | fig = ax.figure 60 | 61 | return fig, ax 62 | 63 | 64 | def create_mpl_fig(fig=None): 65 | """Helper function for when multiple plot axes are needed. 66 | 67 | Those axes should be created in the functions they are used in, with 68 | ``fig.add_subplot()``. 69 | 70 | Parameters 71 | ---------- 72 | fig : Matplotlib figure instance, optional 73 | If given, this figure is simply returned. Otherwise a new figure is 74 | created. 75 | 76 | Returns 77 | ------- 78 | fig : Matplotlib figure instance 79 | If `fig` is None, the created figure. Otherwise the input `fig` is 80 | returned. 81 | 82 | See Also 83 | -------- 84 | create_mpl_ax 85 | 86 | """ 87 | if fig is None: 88 | plt = _import_mpl() 89 | fig = plt.figure() 90 | 91 | return fig 92 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/info.py: -------------------------------------------------------------------------------- 1 | """ 2 | Statistical models 3 | 4 | - standard `regression` models 5 | 6 | - `GLS` (generalized least squares regression) 7 | - `OLS` (ordinary least square regression) 8 | - `WLS` (weighted least square regression) 9 | - `GLASAR` (GLS with autoregressive errors model) 10 | 11 | - `GLM` (generalized linear models) 12 | - robust statistical models 13 | 14 | - `RLM` (robust linear models using M estimators) 15 | - `robust.norms` estimates 16 | - `robust.scale` estimates (MAD, Huber's proposal 2). 17 | - sandbox models 18 | - `mixed` effects models 19 | - `gam` (generalized additive models) 20 | """ 21 | __docformat__ = 'restructuredtext en' 22 | 23 | depends = ['numpy', 24 | 'scipy'] 25 | 26 | postpone_import = True 27 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/interface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/interface/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/iolib/__init__.py: -------------------------------------------------------------------------------- 1 | from foreign import StataReader, genfromdta, savetxt 2 | from table import SimpleTable, csv2st 3 | 4 | from gwstatsmodels import NoseWrapper as Tester 5 | test = Tester().test 6 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/iolib/smpickle.py: -------------------------------------------------------------------------------- 1 | '''Helper files for pickling''' 2 | 3 | def _get_file_obj(fname, mode): 4 | """ 5 | Light wrapper to handle strings and let files (anything else) pass through 6 | """ 7 | try: 8 | fh = open(fname, mode) 9 | except (IOError, TypeError): 10 | fh = fname 11 | return fh 12 | 13 | def save_pickle(obj, fname): 14 | """ 15 | Save the object to file via pickling. 16 | 17 | Parameters 18 | --------- 19 | fname : str 20 | Filename to pickle to 21 | """ 22 | import cPickle as pickle 23 | fout = _get_file_obj(fname, 'wb') 24 | pickle.dump(obj, fout, protocol=-1) 25 | 26 | 27 | def load_pickle(fname): 28 | """ 29 | Load a previously saved object from file 30 | 31 | Parameters 32 | ---------- 33 | fname : str 34 | Filename to unpickle 35 | 36 | Notes 37 | ----- 38 | This method can be used to load *both* models and results. 39 | """ 40 | import cPickle as pickle 41 | fin = _get_file_obj(fname, 'rb') 42 | return pickle.load(fin) 43 | 44 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/iolib/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/iolib/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/iolib/tests/results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/iolib/tests/results/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/iolib/tests/results/macrodata.npy_: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/iolib/tests/results/macrodata.npy_ -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/iolib/tests/test_data.csv: -------------------------------------------------------------------------------- 1 | "year","quarter","realgdp","realcons","realinv","realgovt","realdpi","cpi","m1","tbilrate","unemp","pop","infl","realint" 2 | 1959,1,2710.349,1707.4,286.898,470.045,1886.9,28.980,139.7,2.82,5.8,177.146,0,0 3 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/iolib/tests/test_foreign.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for iolib/foreign.py 3 | """ 4 | 5 | from numpy.testing import * 6 | import numpy as np 7 | import gwstatsmodels.api as sm 8 | import os 9 | 10 | # Test precisions 11 | DECIMAL_4 = 4 12 | DECIMAL_3 = 3 13 | 14 | 15 | def test_genfromdta(): 16 | """ 17 | Test genfromdta vs. results/macrodta.npy created with genfromtxt. 18 | """ 19 | #NOTE: Stata handles data very oddly. Round tripping from csv to dta 20 | # to ndarray 2710.349 (csv) -> 2510.2491 (stata) -> 2710.34912109375 21 | # (dta/ndarray) 22 | curdir = os.path.dirname(os.path.abspath(__file__)) 23 | #res2 = np.load(curdir+'/results/macrodata.npy') 24 | #res2 = res2.view((float,len(res2[0]))) 25 | from results.macrodata import macrodata_result as res2 26 | res1 = sm.iolib.genfromdta(curdir+'/../../datasets/macrodata/macrodata.dta') 27 | #res1 = res1.view((float,len(res1[0]))) 28 | assert_array_equal(res1 == res2, True) 29 | 30 | if __name__ == "__main__": 31 | import nose 32 | nose.runmodule(argv=[__file__,'-vvs','-x','--pdb'], 33 | exit=False) 34 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/iolib/tests/test_pickle.py: -------------------------------------------------------------------------------- 1 | 2 | from gwstatsmodels.iolib.smpickle import save_pickle, load_pickle 3 | 4 | def test_pickle(): 5 | import tempfile 6 | from numpy.testing import assert_equal 7 | tmpdir = tempfile.mkdtemp(prefix='pickle') 8 | a = range(10) 9 | save_pickle(a, tmpdir+'/res.pkl') 10 | b = load_pickle(tmpdir+'/res.pkl') 11 | assert_equal(a, b) 12 | 13 | #cleanup, tested on Windows 14 | try: 15 | import os 16 | os.remove(tmpdir+'/res.pkl') 17 | os.rmdir(tmpdir) 18 | except (OSError, IOError): 19 | pass 20 | assert not os.path.exists(tmpdir) 21 | 22 | #test with file handle 23 | from gwstatsmodels.compatnp.py3k import BytesIO 24 | fh = BytesIO() 25 | save_pickle(a, fh) 26 | fh.seek(0,0) 27 | c = load_pickle(fh) 28 | fh.close() 29 | assert_equal(a,b) 30 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/iolib/tests/test_summary.py: -------------------------------------------------------------------------------- 1 | '''examples to check summary, not converted to tests yet 2 | 3 | 4 | ''' 5 | 6 | if __name__ == '__main__': 7 | 8 | from gwstatsmodels.regression.tests.test_regression import TestOLS 9 | 10 | #def mytest(): 11 | aregression = TestOLS() 12 | TestOLS.setupClass() 13 | results = aregression.res1 14 | r_summary = str(results.summary_old()) 15 | print r_summary 16 | olsres = results 17 | 18 | print '\n\n' 19 | 20 | r_summary = str(results.summary()) 21 | print r_summary 22 | print '\n\n' 23 | 24 | 25 | from gwstatsmodels.discrete.tests.test_discrete import TestProbitNewton 26 | 27 | aregression = TestProbitNewton() 28 | TestProbitNewton.setupClass() 29 | results = aregression.res1 30 | r_summary = str(results.summary()) 31 | print r_summary 32 | print '\n\n' 33 | 34 | probres = results 35 | 36 | from gwstatsmodels.robust.tests.test_rlm import TestHampel 37 | 38 | aregression = TestHampel() 39 | #TestHampel.setupClass() 40 | results = aregression.res1 41 | r_summary = str(results.summary()) 42 | print r_summary 43 | rlmres = results 44 | 45 | print '\n\n' 46 | 47 | from gwstatsmodels.genmod.tests.test_glm import TestGlmBinomial 48 | 49 | aregression = TestGlmBinomial() 50 | #TestGlmBinomial.setupClass() 51 | results = aregression.res1 52 | r_summary = str(results.summary()) 53 | print r_summary 54 | 55 | #print results.summary2(return_fmt='latex') 56 | #print results.summary2(return_fmt='csv') 57 | 58 | smry = olsres.summary() 59 | print smry.as_csv() 60 | 61 | # import matplotlib.pyplot as plt 62 | # plt.plot(rlmres.model.endog,'o') 63 | # plt.plot(rlmres.fittedvalues,'-') 64 | # 65 | # plt.show() -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/miscmodels/__init__.py: -------------------------------------------------------------------------------- 1 | from tmodel import TLinearModel 2 | from count import * #remove this after debugging/refactoring 3 | 4 | from gwstatsmodels import NoseWrapper as Tester 5 | test = Tester().test 6 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/miscmodels/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/miscmodels/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/nonparametric/__init__.py: -------------------------------------------------------------------------------- 1 | from kde import KDE 2 | #from lowess import lowess #don't do that, shadowing the module 3 | import bandwidths 4 | 5 | from gwstatsmodels import NoseWrapper as Tester 6 | test = Tester().test 7 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/nonparametric/kdetools.py: -------------------------------------------------------------------------------- 1 | #### Convenience Functions to be moved to kerneltools #### 2 | import numpy as np 3 | 4 | def forrt(X,m=None): 5 | """ 6 | RFFT with order like Munro (1976) FORTT routine. 7 | """ 8 | if m is None: 9 | m = len(X) 10 | y = np.fft.rfft(X,m)/m 11 | return np.r_[y.real,y[1:-1].imag] 12 | 13 | def revrt(X,m=None): 14 | """ 15 | Inverse of forrt. Equivalent to Munro (1976) REVRT routine. 16 | """ 17 | if m is None: 18 | m = len(X) 19 | y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j 20 | return np.fft.irfft(y)*m 21 | 22 | def silverman_transform(bw, M, RANGE): 23 | """ 24 | FFT of Gaussian kernel following to Silverman AS 176. 25 | 26 | Notes 27 | ----- 28 | Underflow is intentional as a dampener. 29 | """ 30 | J = np.arange(M/2+1) 31 | FAC1 = 2*(np.pi*bw/RANGE)**2 32 | JFAC = J**2*FAC1 33 | BC = 1 - 1./3 * (J*1./M*np.pi)**2 34 | FAC = np.exp(-JFAC)/BC 35 | kern_est = np.r_[FAC,FAC[1:-1]] 36 | return kern_est 37 | 38 | def linbin(X,a,b,M, trunc=1): 39 | """ 40 | Linear Binning as described in Fan and Marron (1994) 41 | """ 42 | gcnts = np.zeros(M) 43 | delta = (b-a)/(M-1) 44 | 45 | for x in X: 46 | lxi = ((x - a)/delta) # +1 47 | li = int(lxi) 48 | rem = lxi - li 49 | if li > 1 and li < M: 50 | gcnts[li] = gcnts[li] + 1-rem 51 | gcnts[li+1] = gcnts[li+1] + rem 52 | if li > M and trunc == 0: 53 | gcnts[M] = gncts[M] + 1 54 | 55 | return gcnts 56 | 57 | def counts(x,v): 58 | """ 59 | Counts the number of elements of x that fall within the grid points v 60 | 61 | Notes 62 | ----- 63 | Using np.digitize and np.bincount 64 | """ 65 | idx = np.digitize(x,v) 66 | try: # numpy 1.6 67 | return np.bincount(idx, minlength=len(v)) 68 | except: 69 | bc = np.bincount(idx) 70 | return np.r_[bc,np.zeros(len(v)-len(bc))] 71 | 72 | def kdesum(x,axis=0): 73 | return np.asarray([np.sum(x[i] - x, axis) for i in range(len(x))]) 74 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/nonparametric/setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import os.path 4 | 5 | cur_dir = os.path.abspath(os.path.dirname(__file__)) 6 | 7 | import sys 8 | sys.path.insert(0, os.path.normpath(os.path.join(cur_dir, 9 | '..', '..', 'tools'))) 10 | from _build import cython, has_c_compiler 11 | sys.path.pop(0) 12 | del sys 13 | 14 | def configuration(parent_package='', top_path=None): 15 | from numpy.distutils.misc_util import (Configuration, 16 | get_numpy_include_dirs) 17 | config = Configuration('nonparametric', parent_package, top_path) 18 | 19 | #config.add_subpackage('tests') 20 | #config.add_subpackage('tests/results') 21 | config.add_data_dir('tests') 22 | config.add_data_dir('tests/results') 23 | config.add_data_files('tests/results/*.csv') 24 | #config.add_data_files('tests/Xi_test_data.csv') 25 | #config.add_data_files('tests/results/results_kde.csv') 26 | #config.add_data_files('tests/results/results_kde_fft.csv') 27 | #config.add_data_files('tests/results/results_kde_weights.csv') 28 | if has_c_compiler(): 29 | cython(['fast_linbin.pyx'], working_path=cur_dir) 30 | 31 | config.add_extension('fast_linbin', 32 | sources=['fast_linbin.c'], 33 | include_dirs=[get_numpy_include_dirs()]) 34 | 35 | 36 | return config 37 | 38 | if __name__ == '__main__': 39 | from numpy.distutils.core import setup 40 | setup(**configuration(top_path='').todict()) 41 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/nonparametric/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/nonparametric/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/nonparametric/tests/results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/nonparametric/tests/results/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/nonparametric/tests/results/results_kde_weights.csv: -------------------------------------------------------------------------------- 1 | gau_weights 2 | 0.00004091924899 3 | 0.00016959787885 4 | 0.00059934531213 5 | 0.00183319840015 6 | 0.00492202524768 7 | 0.01172550177328 8 | 0.02491718854874 9 | 0.04721194132906 10 | 0.07936862862330 11 | 0.11757973423560 12 | 0.15264633009913 13 | 0.17351807973589 14 | 0.17421758795524 15 | 0.15862831317676 16 | 0.13828143957016 17 | 0.12476810003411 18 | 0.12323362271923 19 | 0.13112340382949 20 | 0.14111905861875 21 | 0.14552831190887 22 | 0.14067623449047 23 | 0.13004541826287 24 | 0.12420085708049 25 | 0.13643388604719 26 | 0.17552814107200 27 | 0.23962814737430 28 | 0.31618787008368 29 | 0.38931148563352 30 | 0.44837216117094 31 | 0.48994687984284 32 | 0.51358114145063 33 | 0.51899512954218 34 | 0.50744342593895 35 | 0.48250310247562 36 | 0.44764331200409 37 | 0.40372099611583 38 | 0.34970522430843 39 | 0.28632534089734 40 | 0.21918724562177 41 | 0.15740327542860 42 | 0.10819442279597 43 | 0.07298952186968 44 | 0.04864752003907 45 | 0.03130415046243 46 | 0.01868831677998 47 | 0.00995437424043 48 | 0.00458727286872 49 | 0.00178822852106 50 | 0.00058037855879 51 | 0.00015511706223 52 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/regression/__init__.py: -------------------------------------------------------------------------------- 1 | from linear_model import yule_walker 2 | 3 | from gwstatsmodels import NoseWrapper as Tester 4 | test = Tester().test 5 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/regression/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/regression/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/regression/tests/results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/regression/tests/results/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/regression/tests/test_cov.py: -------------------------------------------------------------------------------- 1 | """Example: minimal OLS 2 | 3 | """ 4 | 5 | import numpy as np 6 | import gwstatsmodels.api as sm 7 | 8 | from numpy.testing import assert_almost_equal 9 | 10 | def test_HC_use(): 11 | np.random.seed(0) 12 | nsample = 100 13 | x = np.linspace(0,10, 100) 14 | X = sm.add_constant(np.column_stack((x, x**2)), prepend=False) 15 | beta = np.array([1, 0.1, 10]) 16 | y = np.dot(X, beta) + np.random.normal(size=nsample) 17 | 18 | results = sm.OLS(y, X).fit() 19 | 20 | #test cov_params 21 | idx = np.array([1,2]) 22 | #need to call HC0_se to have cov_HC0 available 23 | results.HC0_se 24 | cov12 = results.cov_params(column=[1,2], cov_p=results.cov_HC0) 25 | assert_almost_equal(cov12, results.cov_HC0[idx[:,None], idx], decimal=15) 26 | 27 | #test t_test 28 | tvals = results.params/results.HC0_se 29 | ttest = results.t_test(np.eye(3), cov_p=results.cov_HC0) 30 | assert_almost_equal(ttest.tvalue, tvals, decimal=14) 31 | assert_almost_equal(ttest.sd, results.HC0_se, decimal=14) 32 | 33 | #test f_test 34 | ftest = results.f_test(np.eye(3)[:-1], cov_p=results.cov_HC0) 35 | slopes = results.params[:-1] 36 | idx = np.array([0,1]) 37 | cov_slopes = results.cov_HC0[idx[:,None], idx] 38 | fval = np.dot(slopes, np.dot(np.linalg.inv(cov_slopes), slopes))/len(idx) 39 | assert_almost_equal(ftest.fvalue, fval, decimal=12) 40 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/resampling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/resampling/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/robust/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Robust statistical models 3 | """ 4 | import norms 5 | from .scale import mad, stand_mad, Huber, HuberScale, hubers_scale 6 | 7 | from gwstatsmodels import NoseWrapper as Tester 8 | test = Tester().test 9 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/robust/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/robust/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/robust/tests/results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/robust/tests/results/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/__init__.py: -------------------------------------------------------------------------------- 1 | '''This is sandbox code 2 | 3 | ''' 4 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/archive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/sandbox/archive/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/archive/tsa.py: -------------------------------------------------------------------------------- 1 | '''Collection of alternative implementations for time series analysis 2 | 3 | ''' 4 | 5 | 6 | ''' 7 | >>> signal.fftconvolve(x,x[::-1])[len(x)-1:len(x)+10]/x.shape[0] 8 | array([ 2.12286549e+00, 1.27450889e+00, 7.86898619e-02, 9 | -5.80017553e-01, -5.74814915e-01, -2.28006995e-01, 10 | 9.39554926e-02, 2.00610244e-01, 1.32239575e-01, 11 | 1.24504352e-03, -8.81846018e-02]) 12 | >>> sm.tsa.stattools.acovf(X, fft=True)[:order+1] 13 | array([ 2.12286549e+00, 1.27450889e+00, 7.86898619e-02, 14 | -5.80017553e-01, -5.74814915e-01, -2.28006995e-01, 15 | 9.39554926e-02, 2.00610244e-01, 1.32239575e-01, 16 | 1.24504352e-03, -8.81846018e-02]) 17 | 18 | >>> import nitime.utils as ut 19 | >>> ut.autocov(s)[:order+1] 20 | array([ 2.12286549e+00, 1.27450889e+00, 7.86898619e-02, 21 | -5.80017553e-01, -5.74814915e-01, -2.28006995e-01, 22 | 9.39554926e-02, 2.00610244e-01, 1.32239575e-01, 23 | 1.24504352e-03, -8.81846018e-02]) 24 | ''' 25 | 26 | def acovf_fft(x, demean=True): 27 | '''autocovariance function with call to fftconvolve, biased 28 | 29 | Parameters 30 | ---------- 31 | x : array_like 32 | timeseries, signal 33 | demean : boolean 34 | If true, then demean time series 35 | 36 | Returns 37 | ------- 38 | acovf : array 39 | autocovariance for data, same length as x 40 | 41 | might work for nd in parallel with time along axis 0 42 | 43 | ''' 44 | from scipy import signal 45 | x = np.asarray(x) 46 | 47 | if demean: 48 | x = x - x.mean() 49 | 50 | signal.fftconvolve(x,x[::-1])[len(x)-1:len(x)+10]/x.shape[0] 51 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | '''temporary location for enhancements to scipy.stats 2 | 3 | includes 4 | ^^^^^^^^ 5 | 6 | * Per Brodtkorb's estimation enhancements to scipy.stats.distributions 7 | - distributions_per.py is copy of scipy.stats.distributions.py with changes 8 | - distributions_profile.py partially extracted classes and functions to 9 | separate code into more managable pieces 10 | * josef's extra distribution and helper functions 11 | - moment helpers 12 | - goodness of fit test 13 | - fitting distributions with some fixed parameters 14 | - find best distribution that fits data: working script 15 | * example and test folders to keep all together 16 | 17 | status 18 | ^^^^^^ 19 | 20 | mixed status : from not-working to well-tested 21 | 22 | 23 | ''' 24 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/distributions/examples/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/distributions/examples/ex_fitfr.py: -------------------------------------------------------------------------------- 1 | '''Example for estimating distribution parameters when some are fixed. 2 | 3 | This uses currently a patched version of the distributions, two methods are 4 | added to the continuous distributions. This has no side effects. 5 | It also adds bounds to vonmises, which changes the behavior of it for some 6 | methods. 7 | 8 | ''' 9 | 10 | import numpy as np 11 | from scipy import stats 12 | #Note the following import attaches methods to scipy.stats.distributions 13 | # and adds bounds to stats.vonmises 14 | from gwstatsmodels.sandbox.distributions import sppatch 15 | 16 | 17 | np.random.seed(12345) 18 | x = stats.gamma.rvs(2.5, loc=0, scale=1.2, size=200) 19 | 20 | #estimate all parameters 21 | print stats.gamma.fit(x) 22 | print stats.gamma.fit_fr(x, frozen=[np.nan, np.nan, np.nan]) 23 | #estimate shape parameter only 24 | print stats.gamma.fit_fr(x, frozen=[np.nan, 0., 1.2]) 25 | 26 | np.random.seed(12345) 27 | x = stats.lognorm.rvs(2, loc=0, scale=2, size=200) 28 | print stats.lognorm.fit_fr(x, frozen=[np.nan, 0., np.nan]) 29 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/distributions/examples/ex_gof.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from scipy import stats 4 | from gwstatsmodels.stats import gof 5 | 6 | poissrvs = stats.poisson.rvs(0.6, size = 200) 7 | 8 | freq, expfreq, histsupp = gof.gof_binning_discrete(poissrvs, stats.poisson, (0.6,), nsupp=20) 9 | (chi2val, pval) = stats.chisquare(freq, expfreq) 10 | print chi2val, pval 11 | 12 | print gof.gof_chisquare_discrete(stats.poisson, (0.6,), poissrvs, 0.05, 13 | 'Poisson') 14 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/distributions/try_max.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3 | adjusted from Denis on pygwstatsmodels mailing list 4 | 5 | there might still be problems with loc and scale, 6 | 7 | ''' 8 | 9 | 10 | from __future__ import division 11 | import numpy as np 12 | from scipy import stats 13 | __date__ = "2010-12-29 dec" 14 | 15 | class MaxDist(stats.rv_continuous): 16 | """ max of n of scipy.stats normal expon ... 17 | Example: 18 | maxnormal10 = RVmax( scipy.stats.norm, 10 ) 19 | sample = maxnormal10( size=1000 ) 20 | sample.cdf = cdf ^ n, ppf ^ (1/n) 21 | """ 22 | def __init__( self, dist, n ): 23 | self.dist = dist 24 | self.n = n 25 | extradoc = 'maximumdistribution is the distribution of the '\ 26 | + 'maximum of n i.i.d. random variable' 27 | super(MaxDist, self).__init__(name='maxdist', a=dist.a, b=dist.b, 28 | longname = 'A maximumdistribution', extradoc = extradoc) 29 | 30 | def _pdf(self, x, *args, **kw): 31 | return self.n * self.dist.pdf(x, *args, **kw) \ 32 | * self.dist.cdf(x, *args, **kw )**(self.n-1) 33 | 34 | def _cdf(self, x, *args, **kw): 35 | return self.dist.cdf(x, *args, **kw)**self.n 36 | 37 | def _ppf(self, q, *args, **kw): 38 | # y = F(x) ^ n <=> x = F-1( y ^ 1/n) 39 | return self.dist.ppf(q**(1./self.n), *args, **kw) 40 | 41 | ## def rvs( self, *args, **kw ): 42 | ## size = kw.pop( "size", 1 ) 43 | ## u = np.random.uniform( size=size, **kw ) ** (1 / self.n) 44 | ## return self.dist.ppf( u, **kw ) 45 | 46 | 47 | maxdistr = MaxDist(stats.norm, 10) 48 | 49 | print maxdistr.rvs(size=10) 50 | print maxdistr.stats(moments = 'mvsk') 51 | 52 | ''' 53 | >>> print maxdistr.stats(moments = 'mvsk') 54 | (array(1.5387527308351818), array(0.34434382328492852), array(0.40990510188513779), array(0.33139861783918922)) 55 | >>> rvs = np.random.randn(1000,10) 56 | >>> stats.describe(rvs.max(1)) 57 | (1000, (-0.028558517753519492, 3.6134958002753685), 1.5560520428553426, 0.34965234046170773, 0.48504309950278557, 0.17691859056779258) 58 | >>> rvs2 = maxdistr.rvs(size=1000) 59 | >>> stats.describe(rvs2) 60 | (1000, (-0.015290995091401905, 3.3227019151170931), 1.5248146840651813, 0.32827518543128631, 0.23998620901199566, -0.080555658370268013) 61 | >>> rvs2 = maxdistr.rvs(size=10000) 62 | >>> stats.describe(rvs2) 63 | (10000, (-0.15855091764294812, 4.1898138060896937), 1.532862047388899, 0.34361316060467512, 0.43128838106936973, 0.41115043864619061) 64 | 65 | >>> maxdistr.pdf(1.5) 66 | 0.69513824417156755 67 | 68 | #integrating the pdf 69 | >>> maxdistr.expect() 70 | 1.5387527308351729 71 | >>> maxdistr.expect(lambda x:1) 72 | 0.99999999999999956 73 | 74 | 75 | ''' 76 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/distributions/try_pot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed May 04 06:09:18 2011 4 | 5 | @author: josef 6 | """ 7 | 8 | import numpy as np 9 | 10 | def mean_residual_life(x, frac=None, alpha=0.05): 11 | '''emprirical mean residual life or expected shortfall 12 | 13 | Parameters 14 | ---------- 15 | 16 | 17 | todo: check formula for std of mean 18 | doesn't include case for all observations 19 | last observations std is zero 20 | vectorize loop using cumsum 21 | frac doesn't work yet 22 | 23 | ''' 24 | 25 | axis = 0 #searchsorted is 1d only 26 | x = np.asarray(x) 27 | nobs = x.shape[axis] 28 | xsorted = np.sort(x, axis=axis) 29 | if frac is None: 30 | xthreshold = xsorted 31 | else: 32 | xthreshold = xsorted[np.floor(nobs * frac).astype(int)] 33 | #use searchsorted instead of simple index in case of ties 34 | xlargerindex = np.searchsorted(xsorted, xthreshold, side='right') 35 | 36 | #replace loop with cumsum ? 37 | result = [] 38 | for i in range(len(xthreshold)-1): 39 | k_ind = xlargerindex[i] 40 | rmean = x[k_ind:].mean() 41 | rstd = x[k_ind:].std() #this doesn't work for last observations, nans 42 | rmstd = rstd/np.sqrt(nobs-k_ind) #std error of mean, check formula 43 | result.append((k_ind, xthreshold[i], rmean, rmstd)) 44 | 45 | res = np.array(result) 46 | crit = 1.96 # todo: without loading stats, crit = -stats.t.ppf(0.05) 47 | confint = res[:,1:2] + crit * res[:,-1:] * np.array([[-1,1]]) 48 | return np.column_stack((res, confint)) 49 | 50 | expected_shortfall = mean_residual_life #alias 51 | 52 | 53 | if __name__ == "__main__": 54 | rvs = np.random.standard_t(5, size= 10) 55 | res = mean_residual_life(rvs) 56 | print res 57 | rmean = [rvs[i:].mean() for i in range(len(rvs))] 58 | print res[:,2] - rmean[1:] 59 | 60 | ''' 61 | >>> mean_residual_life(rvs, frac= 0.5) 62 | Traceback (most recent call last): 63 | File "", line 1, in 64 | File "E:\Josef\eclipsegworkspace\gwstatsmodels-josef-experimental-030\scikits\gwstatsmodels\sandbox\distributions\try_pot.py", line 35, in mean_residual_life 65 | for i in range(len(xthreshold)-1): 66 | TypeError: object of type 'numpy.float64' has no len() 67 | >>> mean_residual_life(rvs, frac= [0.5]) 68 | array([[ 1. , -1.16904459, 0.35165016, 0.41090978, -1.97442776, 69 | -0.36366142], 70 | [ 1. , -1.16904459, 0.35165016, 0.41090978, -1.97442776, 71 | -0.36366142], 72 | [ 1. , -1.1690445 73 | ''' 74 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/mle.py: -------------------------------------------------------------------------------- 1 | '''What's the origin of this file? It is not ours. 2 | Does not run because of missing mtx files, now included 3 | 4 | changes: JP corrections to imports so it runs, comment out print 5 | ''' 6 | 7 | import numpy as np 8 | from numpy import dot, outer, random, argsort 9 | from scipy import io, linalg, optimize 10 | from scipy.sparse import eye as speye 11 | import matplotlib.pyplot as plt 12 | 13 | def R(v): 14 | rq = dot(v.T,A*v)/dot(v.T,B*v) 15 | res = (A*v-rq*B*v)/linalg.norm(B*v) 16 | data.append(linalg.norm(res)) 17 | return rq 18 | 19 | def Rp(v): 20 | """ Gradient """ 21 | result = 2*(A*v-R(v)*B*v)/dot(v.T,B*v) 22 | #print "Rp: ", result 23 | return result 24 | 25 | def Rpp(v): 26 | """ Hessian """ 27 | result = 2*(A-R(v)*B-outer(B*v,Rp(v))-outer(Rp(v),B*v))/dot(v.T,B*v) 28 | #print "Rpp: ", result 29 | return result 30 | 31 | 32 | A = io.mmread('nos4.mtx') # clustered eigenvalues 33 | #B = io.mmread('bcsstm02.mtx.gz') 34 | #A = io.mmread('bcsstk06.mtx.gz') # clustered eigenvalues 35 | #B = io.mmread('bcsstm06.mtx.gz') 36 | n = A.shape[0] 37 | B = speye(n,n) 38 | random.seed(1) 39 | v_0=random.rand(n) 40 | 41 | print "try fmin_bfgs" 42 | full_output = 1 43 | data=[] 44 | v,fopt, gopt, Hopt, func_calls, grad_calls, warnflag, allvecs = \ 45 | optimize.fmin_bfgs(R,v_0,fprime=Rp,full_output=full_output,retall=1) 46 | if warnflag == 0: 47 | plt.semilogy(np.arange(0,len(data)),data) 48 | print 'Rayleigh quotient BFGS',R(v) 49 | 50 | 51 | print "fmin_bfgs OK" 52 | 53 | print "try fmin_ncg" 54 | 55 | # 56 | # WARNING: the program may hangs if fmin_ncg is used 57 | # 58 | data=[] 59 | v,fopt, fcalls, gcalls, hcalls, warnflag, allvecs = \ 60 | optimize.fmin_ncg(R,v_0,fprime=Rp,fhess=Rpp,full_output=full_output,retall=1) 61 | if warnflag==0: 62 | plt.figure() 63 | plt.semilogy(np.arange(0,len(data)),data) 64 | print 'Rayleigh quotient NCG',R(v) 65 | 66 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/nonparametric/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/nonparametric/densityorthopoly.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/sandbox/nonparametric/densityorthopoly.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/nonparametric/tests/ex_smoothers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Nov 04 10:51:39 2011 4 | 5 | @author: josef 6 | """ 7 | 8 | import numpy as np 9 | from numpy.testing import assert_almost_equal, assert_equal 10 | 11 | from gwstatsmodels.sandbox.nonparametric import smoothers, kernels 12 | from gwstatsmodels.regression.linear_model import OLS, WLS 13 | 14 | 15 | #DGP: simple polynomial 16 | order = 3 17 | sigma_noise = 0.5 18 | nobs = 100 19 | lb, ub = -1, 2 20 | x = np.linspace(lb, ub, nobs) 21 | x = np.sin(x) 22 | exog = x[:,None]**np.arange(order+1) 23 | y_true = exog.sum(1) 24 | y = y_true + sigma_noise * np.random.randn(nobs) 25 | 26 | 27 | 28 | #xind = np.argsort(x) 29 | pmod = smoothers.PolySmoother(2, x) 30 | pmod.fit(y) #no return 31 | y_pred = pmod.predict(x) 32 | error = y - y_pred 33 | mse = (error*error).mean() 34 | print mse 35 | res_ols = OLS(y, exog[:,:3]).fit() 36 | print np.squeeze(pmod.coef) - res_ols.params 37 | 38 | 39 | weights = np.ones(nobs) 40 | weights[:nobs//3] = 0.1 41 | weights[-nobs//5:] = 2 42 | 43 | pmodw = smoothers.PolySmoother(2, x) 44 | pmodw.fit(y, weights=weights) #no return 45 | y_predw = pmodw.predict(x) 46 | error = y - y_predw 47 | mse = (error*error).mean() 48 | print mse 49 | res_wls = WLS(y, exog[:,:3], weights=weights).fit() 50 | print np.squeeze(pmodw.coef) - res_wls.params 51 | 52 | 53 | 54 | doplot = 1 55 | if doplot: 56 | import matplotlib.pyplot as plt 57 | plt.plot(y, '.') 58 | plt.plot(y_true, 'b-', label='true') 59 | plt.plot(y_pred, '-', label='poly') 60 | plt.plot(y_predw, '-', label='poly -w') 61 | plt.legend(loc='upper left') 62 | 63 | plt.close() 64 | #plt.show() 65 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/panel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/sandbox/panel/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/panel/ex_sandwich2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 4 | Created on Fri Dec 16 12:52:13 2011 5 | 6 | Author: Josef Perktold 7 | """ 8 | 9 | import numpy as np 10 | from numpy.testing import assert_almost_equal 11 | 12 | import gwstatsmodels.api as sm 13 | 14 | import gwstatsmodels.sandbox.panel.sandwich_covariance as sw 15 | import gwstatsmodels.sandbox.panel.sandwich_covariance_generic as swg 16 | 17 | 18 | #http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta 19 | import gwstatsmodels.iolib.foreign as dta 20 | 21 | srs = dta.genfromdta("srs.dta") 22 | y = srs['api00'] 23 | #x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1) 24 | #force sequence 25 | x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']]) 26 | group = srs['dnum'] 27 | 28 | #xx = sm.add_constant(x, prepend=True) 29 | xx = sm.add_constant(x, prepend=False) #for Stata compatibility 30 | 31 | #remove nan observation 32 | mask = (xx!=-999.0).all(1) #nan code in dta file 33 | mask.shape 34 | y = y[mask] 35 | xx = xx[mask] 36 | group = group[mask] 37 | 38 | res_srs = sm.OLS(y, xx).fit() 39 | print res_srs.params 40 | print res_srs.bse 41 | 42 | bse_cr = sw.cov_cluster(res_srs, group.astype(int))[1] 43 | print bse_cr 44 | 45 | res_stata = np.rec.array( 46 | [ ('growth', '|', -0.1027121, 0.22917029999999999, -0.45000000000000001, 0.65500000000000003, -0.55483519999999997, 0.34941109999999997), 47 | ('emer', '|', -5.4449319999999997, 0.72939690000000001, -7.46, 0.0, -6.8839379999999997, -4.0059269999999998), 48 | ('yr_rnd', '|', -51.075690000000002, 22.83615, -2.2400000000000002, 0.027, -96.128439999999998, -6.0229350000000004), 49 | ('_cons', '|', 740.3981, 13.460760000000001, 55.0, 0.0, 713.84180000000003, 766.95439999999996)], 50 | dtype=[('exogname', '|S6'), ('del', '|S1'), ('params', '= 2.9.0 (working on 2.9.2 but not on 2.8.1 at least) 6 | 7 | library( systemfit ) 8 | data( "Kmenta" ) 9 | eqDemand <- consump ~ price + income 10 | eqSupply <- consump ~ price + farmPrice + trend 11 | system <- list( demand = eqDemand, supply = eqSupply ) 12 | 13 | ## performs OLS on each of the equations in the system 14 | fitols <- systemfit( system, data = Kmenta ) 15 | 16 | # all coefficients 17 | coef( fitols ) 18 | coef( summary ( fitols ) ) 19 | 20 | modReg <- matrix(0,7,6) 21 | colnames( modReg ) <- c( "demIntercept", "demPrice", "demIncome", 22 | "supIntercept", "supPrice2", "supTrend" ) 23 | 24 | # a lot of typing for a model 25 | modReg[ 1, "demIntercept" ] <- 1 26 | modReg[ 2, "demPrice" ] <- 1 27 | modReg[ 3, "demIncome" ] <- 1 28 | modReg[ 4, "supIntercept" ] <- 1 29 | modReg[ 5, "supPrice2" ] <- 1 30 | modReg[ 6, "supPrice2" ] <- 1 31 | modReg[ 7, "supTrend" ] <- 1 32 | fitols3 <- systemfit( system, data = Kmenta, restrict.regMat = modReg ) 33 | print(coef( fitols3, modified.regMat = TRUE )) 34 | # it seems to me like regMat does the opposite of what it says it does 35 | # in python 36 | # coef1 = np.array([99.8954229, -0.3162988, 0.3346356, 51.9296460, 0.2361566, 0.2361566, 0.2409308]) 37 | # i = np.eye(7,6) 38 | # i[-1,-1] = 1 39 | # i[-2,-1] = 0 40 | # i[-2,-2] = 1 41 | # np.dot(coef,i) # regMat = TRUE? 42 | print(coef( fitols3 )) 43 | 44 | ### SUR ### 45 | data("GrunfeldGreene") 46 | library(plm) 47 | GGPanel <- plm.data( GrunfeldGreene, c("firm","year") ) 48 | formulaGrunfeld <- invest ~ value + capital 49 | greeneSUR <- systemfit( formulaGrunfeld, "SUR", data = GGPanel, 50 | methodResidCov = "noDfCor" ) 51 | 52 | #usinvest <- as.matrix(invest[81:100]) 53 | #usvalue <- as.matrix(value 54 | col5tbl14_2 <- lm(invest[81:100] ~ value[81:100] + capital[81:100]) 55 | 56 | 57 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/tests/test_bspline.py.txt: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import numpy as np 4 | from nipy.testing import * 5 | 6 | bsp = None 7 | 8 | def setup(): 9 | # Suppress warnings during tests to reduce noise 10 | warnings.simplefilter("ignore") 11 | # import bspline module after suppressing UserWarnings 12 | global bsp 13 | import nipy.fixes.scipy.stats.models.bspline as bsp 14 | 15 | def teardown(): 16 | # Clear list of warning filters 17 | warnings.resetwarnings() 18 | 19 | 20 | class TestBSpline(TestCase): 21 | 22 | def test1(self): 23 | b = bsp.BSpline(np.linspace(0,10,11), x=np.linspace(0,10,101)) 24 | old = b._basisx.shape 25 | b.x = np.linspace(0,10,51) 26 | new = b._basisx.shape 27 | self.assertEqual((old[0], 51), new) 28 | 29 | # FIXME: Have no idea what this test does. It's here to simply verify the 30 | # C extension is working (in a technical sense, not functional). 31 | def test_basis(self): 32 | b = bsp.BSpline(np.linspace(0,1,11)) 33 | x = np.array([0.4, 0.5]) 34 | v = b.basis(x, lower=0, upper=13) 35 | t = np.array([[ 0. , 0. ], 36 | [ 0. , 0. ], 37 | [ 0. , 0. ], 38 | [ 0. , 0. ], 39 | [ 0.16666667, 0. ], 40 | [ 0.66666667, 0.16666667], 41 | [ 0.16666667, 0.66666667], 42 | [ 0. , 0.16666667], 43 | [ 0. , 0. ], 44 | [ 0. , 0. ], 45 | [ 0. , 0. ], 46 | [ 0. , 0. ], 47 | [ 0. , 0. ]]) 48 | assert_array_almost_equal(v, t, decimal=6) 49 | 50 | # FIXME: Have no idea what this test does. It's here to simply verify the 51 | # C extension is working (in a technical sense, not functional). 52 | def test_gram(self): 53 | b = bsp.BSpline(np.linspace(0,1,11)) 54 | grm = b.gram() 55 | assert grm.shape == (4, 13) 56 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/tests/test_pca.py: -------------------------------------------------------------------------------- 1 | '''tests for pca and arma to ar and ma representation 2 | 3 | compared with matlab princomp, and garchar, garchma 4 | 5 | TODO: 6 | * convert to generators with yield to have individual tests 7 | * incomplete: test relationship of pca-evecs and pinv (adding constant) 8 | ''' 9 | 10 | import numpy as np 11 | from numpy.testing import assert_array_almost_equal 12 | from gwstatsmodels.sandbox import tools 13 | from gwstatsmodels.sandbox.tools import pca, pcasvd 14 | from gwstatsmodels.tsa.arima_process import arma_impulse_response 15 | 16 | from datamlw import * 17 | 18 | 19 | def check_pca_princomp(pcares, princomp): 20 | factors, evals, evecs = pcares[1:] 21 | #res_princomp.coef, res_princomp.factors, res_princomp.values 22 | msign = (evecs/princomp.coef)[0] 23 | assert_array_almost_equal(msign*evecs, princomp.coef, 13) 24 | assert_array_almost_equal(msign*factors, princomp.factors, 13) 25 | assert_array_almost_equal(evals, princomp.values.ravel(), 13) 26 | 27 | def check_pca_svd(pcares, pcasvdres): 28 | xreduced, factors, evals, evecs = pcares 29 | xred_svd, factors_svd, evals_svd, evecs_svd = pcasvdres 30 | assert_array_almost_equal(evals_svd, evals, 14) 31 | msign = (evecs/evecs_svd)[0] 32 | assert_array_almost_equal(msign*evecs_svd, evecs, 14) 33 | assert_array_almost_equal(msign*factors_svd, factors, 13) 34 | assert_array_almost_equal(xred_svd, xreduced, 13) 35 | 36 | 37 | xf = data.xo/1000. 38 | 39 | def test_pca_princomp(): 40 | pcares = pca(xf) 41 | check_pca_princomp(pcares, princomp1) 42 | pcares = pca(xf[:20,:]) 43 | check_pca_princomp(pcares, princomp2) 44 | pcares = pca(xf[:20,:]-xf[:20,:].mean(0)) 45 | check_pca_princomp(pcares, princomp3) 46 | pcares = pca(xf[:20,:]-xf[:20,:].mean(0), demean=0) 47 | check_pca_princomp(pcares, princomp3) 48 | 49 | 50 | def test_pca_svd(): 51 | xreduced, factors, evals, evecs = pca(xf) 52 | factors_wconst = np.c_[factors, np.ones((factors.shape[0],1))] 53 | beta = np.dot(np.linalg.pinv(factors_wconst), xf) 54 | #np.dot(np.linalg.pinv(factors_wconst),x2/1000.).T[:,:4] - evecs 55 | assert_array_almost_equal(beta.T[:,:4], evecs, 14) 56 | 57 | xred_svd, factors_svd, evals_svd, evecs_svd = pcasvd(xf, keepdim=0) 58 | assert_array_almost_equal(evals_svd, evals, 14) 59 | msign = (evecs/evecs_svd)[0] 60 | assert_array_almost_equal(msign*evecs_svd, evecs, 14) 61 | assert_array_almost_equal(msign*factors_svd, factors, 13) 62 | assert_array_almost_equal(xred_svd, xreduced, 14) 63 | 64 | pcares = pca(xf, keepdim=2) 65 | pcasvdres = pcasvd(xf, keepdim=2) 66 | check_pca_svd(pcares, pcasvdres) 67 | 68 | #print np.dot(factors[:,:3], evecs.T[:3,:])[:5] 69 | 70 | 71 | if __name__ == '__main__': 72 | test_pca_svd() 73 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/tools/__init__.py: -------------------------------------------------------------------------------- 1 | '''some helper function for principal component and time series analysis 2 | 3 | 4 | Status 5 | ------ 6 | pca : tested against matlab 7 | pcasvd : tested against matlab 8 | ''' 9 | 10 | from tools_pca import * #pca, pcasvd 11 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/tools/try_mctools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Sep 30 15:20:45 2011 4 | 5 | @author: josef 6 | """ 7 | import numpy as np 8 | from scipy import stats 9 | from gwstatsmodels.sandbox.tools.mctools import StatTestMC 10 | from gwstatsmodels.sandbox.stats.diagnostic import ( 11 | acorr_ljungbox, unitroot_adf) 12 | 13 | def normalnoisesim(nobs=500, loc=0.0): 14 | return (loc+np.random.randn(nobs)) 15 | 16 | 17 | def lb(x): 18 | s,p = acorr_ljungbox(x, lags=4) 19 | return np.r_[s, p] 20 | 21 | 22 | mc1 = StatTestMC(normalnoisesim, lb) 23 | mc1.run(5000, statindices=range(4)) 24 | 25 | print mc1.summary_quantiles([1,2,3], stats.chi2([2,3,4]).ppf, 26 | varnames=['lag 1', 'lag 2', 'lag 3'], 27 | title='acorr_ljungbox') 28 | print '\n\n' 29 | 30 | frac = [0.01, 0.025, 0.05, 0.1, 0.975] 31 | crit = stats.chi2([2,3,4]).ppf(np.atleast_2d(frac).T) 32 | print mc1.summary_cdf([1,2,3], frac, crit, 33 | varnames=['lag 1', 'lag 2', 'lag 3'], 34 | title='acorr_ljungbox') 35 | print mc1.cdf(crit, [1,2,3])[1] 36 | 37 | #---------------------- 38 | 39 | def randwalksim(nobs=500, drift=0.0): 40 | return (drift+np.random.randn(nobs)).cumsum() 41 | 42 | def adf20(x): 43 | return unitroot_adf(x, 2, trendorder=0, autolag=None) 44 | 45 | print adf20(np.random.randn(100)) 46 | 47 | mc2 = StatTestMC(randwalksim, adf20) 48 | mc2.run(10000, statindices=[0,1]) 49 | frac = [0.01, 0.05, 0.1] 50 | #bug 51 | crit = np.array([-3.4996365338407074, -2.8918307730370025, -2.5829283377617176])[:,None] 52 | print mc2.summary_cdf([0], frac, crit, 53 | varnames=['adf'], 54 | title='adf') 55 | #bug 56 | #crit2 = np.column_stack((crit, frac)) 57 | #print mc2.summary_cdf([0, 1], frac, crit, 58 | # varnames=['adf'], 59 | # title='adf') 60 | 61 | print mc2.quantiles([0]) 62 | print mc2.cdf(crit, [0]) 63 | 64 | doplot=1 65 | if doplot: 66 | import matplotlib.pyplot as plt 67 | mc1.plot_hist([3],stats.chi2([4]).pdf) 68 | plt.title('acorr_ljungbox - MC versus chi2') 69 | plt.show() 70 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/sandbox/tsa/__init__.py: -------------------------------------------------------------------------------- 1 | '''functions and classes time series analysis 2 | 3 | 4 | Status 5 | ------ 6 | work in progress 7 | 8 | arima.py 9 | ^^^^^^^^ 10 | 11 | ARIMA : initial class, uses conditional least squares, needs merging with new class 12 | arma2ar 13 | arma2ma 14 | arma_acf 15 | arma_acovf 16 | arma_generate_sample 17 | arma_impulse_response 18 | deconvolve 19 | index2lpol 20 | lpol2index 21 | mcarma22 22 | 23 | movstat.py 24 | ^^^^^^^^^^ 25 | 26 | I had tested the next group against matlab, but where are the tests ? 27 | acf 28 | acovf 29 | ccf 30 | ccovf 31 | pacf_ols 32 | pacf_yw 33 | 34 | These hat incorrect array size, were my first implementation, slow compared 35 | to cumsum version in la and cython version in pandas. 36 | These need checking, and merging/comparing with new class MovStats 37 | check_movorder 38 | expandarr 39 | movmean : 40 | movmoment : corrected cutoff 41 | movorder 42 | movvar 43 | 44 | 45 | 46 | 47 | ''' 48 | 49 | 50 | #from arima import * 51 | from movstat import * 52 | #from stattools import * 53 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | try: 3 | from os.path import relpath 4 | except: # python 2.5 5 | 6 | def relpath(path, start=os.curdir): 7 | """Return a relative version of a path""" 8 | if not path: 9 | raise ValueError("no path specified") 10 | start_list = os.path.abspath(start).split(os.path.sep) 11 | path_list = os.path.abspath(path).split(os.path.sep) 12 | # Work out how much of the filepath is shared by start and path. 13 | i = len(os.path.commonprefix([start_list, path_list])) 14 | rel_list = [os.path.pardir] * (len(start_list)-i) + path_list[i:] 15 | if not rel_list: 16 | return os.curdir 17 | return os.path.join(*rel_list) 18 | 19 | def configuration(parent_package='', top_path=None): 20 | from numpy.distutils.misc_util import Configuration 21 | 22 | config = Configuration('gwstatsmodels', parent_package, top_path) 23 | 24 | # these are subpackages because they have Cython code 25 | config.add_subpackage('nonparametric') 26 | config.add_subpackage('tsa') 27 | 28 | #TODO: delegate the non-test stuff to subpackages 29 | config.add_data_files('sandbox/panel/test_data.txt') 30 | config.add_data_files('stats/libqsturng/tests/bootleg.dat') 31 | config.add_data_files('stats/libqsturng/CH.r') 32 | config.add_data_files('stats/libqsturng/LICENSE.txt') 33 | 34 | curdir = os.path.abspath(os.path.dirname(__file__)) 35 | 36 | extradatafiles = [relpath(os.path.join(r,d),start=curdir) 37 | for r,ds,f in os.walk(os.path.join(curdir, 'datasets')) 38 | for d in f if not os.path.splitext(d)[1] in 39 | ['.py', '.pyc']] 40 | for f in extradatafiles: 41 | config.add_data_files(f) 42 | 43 | # add all the test and results directories for non *.py files 44 | for root, dirnames, filenames in os.walk(curdir): 45 | for dir_name in dirnames: 46 | if dir_name in ['tests', 'results'] and root != 'sandbox': 47 | config.add_data_dir(relpath( 48 | os.path.join(root, dir_name), 49 | start = curdir) 50 | ) 51 | 52 | return config 53 | 54 | if __name__ == '__main__': 55 | from numpy.distutils.core import setup 56 | setup(**configuration(top_path='').todict()) 57 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from gwstatsmodels import NoseWrapper as Tester 3 | test = Tester().test 4 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/api.py: -------------------------------------------------------------------------------- 1 | 2 | import diagnostic 3 | from .diagnostic import (acorr_ljungbox, breaks_cusumolsresid, breaks_hansen, 4 | CompareCox, CompareJ, compare_cox, compare_j, het_breushpagan, 5 | HetGoldfeldQuandt, het_goldfeldquandt, het_white, 6 | recursive_olsresiduals) 7 | import multicomp 8 | from .multitest import (multipletests, fdrcorrection, fdrcorrection_twostage) 9 | from .multicomp import tukeyhsd 10 | import gof 11 | from .gof import powerdiscrepancy, gof_chisquare_discrete 12 | import stattools 13 | from .stattools import durbin_watson, omni_normtest, jarque_bera 14 | 15 | from weightstats import DescrStatsW 16 | 17 | from descriptivestats import Describe 18 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/diagnostic.py: -------------------------------------------------------------------------------- 1 | #collect some imports of verified (at least one example) functions 2 | from gwstatsmodels.sandbox.stats.diagnostic import ( 3 | acorr_ljungbox, breaks_cusumolsresid, breaks_hansen, CompareCox, CompareJ, 4 | compare_cox, compare_j, het_breushpagan, HetGoldfeldQuandt, 5 | het_goldfeldquandt, het_arch, 6 | het_white, recursive_olsresiduals, acorr_breush_godfrey, 7 | linear_harvey_collier, linear_rainbow, linear_lm, 8 | unitroot_adf) 9 | 10 | from .lilliefors import kstest_normal, lillifors 11 | from .adnorm import normal_ad 12 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/libqsturng/CH.r: -------------------------------------------------------------------------------- 1 | % Copyright (c) 2011, Roger Lew BSD [see LICENSE.txt] 2 | % This software is funded in part by NIH Grant P20 RR016454. 3 | 4 | 5 | % This is a collection of scripts used to generate C-H comparisons 6 | % for qsturng. As you can probably guess, my R's skills aren't all that good. 7 | 8 | setwd('D:\\USERS\\roger\\programming\\python\\development\\qsturng') 9 | 10 | ps = seq(length=100, from=.5, to=.999) 11 | 12 | for (r in c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21, 13 | 22,23,24,25,26,27,28,29,30,35,40,50,60,70,80,90,100,200)) { 14 | for (v in c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20, 15 | 22,24,26,30,35,40,50,60,90,120,240,480,1e38)) { 16 | m = qtukey(ps, r, v) 17 | fname = sprintf('CH_r=%i,v=%.0f.dat',r,v) 18 | print(fname) 19 | write(rbind(ps, m), 20 | file=fname, 21 | ncolumns=2, 22 | append=FALSE, 23 | sep=',') 24 | } 25 | } 26 | 27 | rs = c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,30,40,60,80,100) 28 | 29 | for (v in c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, 30 | 17,18,19,20,24,30,40,60,120,1e38)) { 31 | m = qtukey(0.30, rs, v) 32 | fname = sprintf('CH_p30.dat') 33 | print(fname) 34 | write(rbind(m), 35 | file=fname, 36 | ncolumns=26, 37 | append=TRUE, 38 | sep=' ') 39 | } 40 | 41 | for i in 42 | for (v in c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, 43 | 17,18,19,20,24,30,40,60,120,1e38)) { 44 | m = qtukey(0.675, rs, v) 45 | fname = sprintf('CH_p675.dat',r,v) 46 | print(fname) 47 | write(rbind(m), 48 | file=fname, 49 | ncolumns=26, 50 | append=TRUE, 51 | sep=' ') 52 | } 53 | 54 | for (v in c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, 55 | 17,18,19,20,24,30,40,60,120,1e38)) { 56 | m = qtukey(0.75, rs, v) 57 | fname = sprintf('CH_p75.dat',r,v) 58 | print(fname) 59 | write(rbind(m), 60 | file=fname, 61 | ncolumns=26, 62 | append=TRUE, 63 | sep=' ') 64 | } 65 | 66 | for (v in c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, 67 | 17,18,19,20,24,30,40,60,120,1e38)) { 68 | m = qtukey(0.975, rs, v) 69 | fname = sprintf('CH_p975.dat') 70 | print(fname) 71 | write(rbind(m), 72 | file=fname, 73 | ncolumns=26, 74 | append=TRUE, 75 | sep=' ') 76 | } 77 | 78 | i = 0; 79 | for (i in 0:9999) { 80 | p = runif(1, .5, .95); 81 | r = sample(2:100, 1); 82 | v = runif(1, 2, 1000); 83 | q = qtukey(p,r,v); 84 | if (!is.nan(q)) { 85 | write(c(p,r,v,q), 86 | file='bootleg.dat', 87 | ncolumns=4, 88 | append=TRUE, 89 | sep=','); 90 | i = i + 1; 91 | } 92 | } -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/libqsturng/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011, Roger Lew [see LICENSE.txt] 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following 11 | disclaimer in the documentation and/or other materials provided 12 | with the distribution. 13 | * Neither the name of the organizations affiliated with the 14 | contributors or the names of its contributors themselves may be 15 | used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 | COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 28 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/libqsturng/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from qsturng import * 3 | 4 | from numpy.testing import Tester 5 | test = Tester().test 6 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/libqsturng/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/stats/libqsturng/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/multicomp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 4 | Created on Fri Mar 30 18:27:25 2012 5 | Author: Josef Perktold 6 | """ 7 | 8 | from gwstatsmodels.sandbox.stats.multicomp import tukeyhsd, MultiComparison 9 | 10 | def pairwise_tukeyhsd(endog, groups, alpha=0.05): 11 | '''calculate all pairwise comparisons with TukeyHSD confidence intervals 12 | 13 | this is just a wrapper around tukeyhsd method of MultiComparison 14 | 15 | Parameters 16 | ---------- 17 | endog : ndarray, float, 1d 18 | response variable 19 | groups : ndarray, 1d 20 | array with groups, can be string or integers 21 | alpha : float 22 | significance level for the test 23 | 24 | Returns 25 | ------- 26 | table : SimpleTable instance 27 | table for printing 28 | tukeyhsd_res : list 29 | contains detailed results from tukeyhsd function 30 | [(idx1, idx2), reject, meandiffs, std_pairs, confint, q_crit, 31 | df_total, reject2] 32 | 33 | See Also 34 | -------- 35 | MultiComparison 36 | tukeyhsd 37 | 38 | ''' 39 | 40 | return MultiComparison(endog, groups).tukeyhsd(alpha=alpha) 41 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/stattools.py: -------------------------------------------------------------------------------- 1 | """ 2 | Statistical tests to be used in conjunction with the models 3 | 4 | Notes 5 | ----- 6 | These functions haven't been formally tested. 7 | """ 8 | 9 | from scipy import stats 10 | import numpy as np 11 | from numpy.testing.decorators import setastest # doesn't work for some reason 12 | from numpy.testing import dec 13 | 14 | #TODO: these are pretty straightforward but they should be tested 15 | def durbin_watson(resids): 16 | """ 17 | Calculates the Durbin-Watson statistic 18 | 19 | Parameters 20 | ----------- 21 | resids : array-like 22 | 23 | Returns 24 | -------- 25 | Durbin Watson statistic. This is defined as 26 | sum_(t=2)^(T)((e_t - e_(t-1))^(2))/sum_(t=1)^(T)e_t^(2) 27 | """ 28 | diff_resids = np.diff(resids,1) 29 | dw = np.dot(diff_resids,diff_resids) / \ 30 | np.dot(resids,resids); 31 | return dw 32 | 33 | def omni_normtest(resids, axis=0): 34 | """ 35 | Omnibus test for normality 36 | 37 | Parameters 38 | ----------- 39 | resid : array-like 40 | axis : int, optional 41 | Default is 0 42 | 43 | Returns 44 | ------- 45 | Chi^2 score, two-tail probability 46 | """ 47 | #TODO: change to exception in summary branch and catch in summary() 48 | #behavior changed between scipy 0.9 and 0.10 49 | resids = np.asarray(resids) 50 | n = resids.shape[axis] 51 | if n < 8: 52 | return np.nan, np.nan 53 | raise ValueError( 54 | "skewtest is not valid with less than 8 observations; %i samples" 55 | " were given." % int(n)) 56 | 57 | return stats.normaltest(resids, axis=0) 58 | 59 | def jarque_bera(resids): 60 | """ 61 | Calculate residual skewness, kurtosis, and do the JB test for normality 62 | 63 | Parameters 64 | ----------- 65 | resids : array-like 66 | 67 | Returns 68 | ------- 69 | JB, JBpv, skew, kurtosis 70 | 71 | JB = n/6*(S^2 + (K-3)^2/4) 72 | 73 | JBpv is the Chi^2 two-tail probability value 74 | 75 | skew is the measure of skewness 76 | 77 | kurtosis is the measure of kurtosis 78 | 79 | """ 80 | resids = np.asarray(resids) 81 | # Calculate residual skewness and kurtosis 82 | skew = stats.skew(resids) 83 | kurtosis = 3 + stats.kurtosis(resids) 84 | 85 | # Calculate the Jarque-Bera test for normality 86 | JB = (resids.shape[0]/6.) * (skew**2 + (1/4.)*(kurtosis-3)**2) 87 | JBpv = stats.chi2.sf(JB,2); 88 | 89 | return JB, JBpv, skew, kurtosis 90 | 91 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/tests/test_contrast.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as R 3 | from numpy.testing import * 4 | from gwstatsmodels.stats.contrast import Contrast 5 | 6 | class TestContrast(object): 7 | @classmethod 8 | def setupClass(cls): 9 | R.seed(54321) 10 | cls.X = R.standard_normal((40,10)) 11 | 12 | def test_contrast1(self): 13 | term = np.column_stack((self.X[:,0], self.X[:,2])) 14 | c = Contrast(term, self.X) 15 | test_contrast = [[1] + [0]*9, [0]*2 + [1] + [0]*7] 16 | assert_almost_equal(test_contrast, c.contrast_matrix) 17 | 18 | def test_contrast2(self): 19 | zero = np.zeros((40,)) 20 | term = np.column_stack((zero, self.X[:,2])) 21 | c = Contrast(term, self.X) 22 | test_contrast = [0]*2 + [1] + [0]*7 23 | assert_almost_equal(test_contrast, c.contrast_matrix) 24 | 25 | def test_contrast3(self): 26 | P = np.dot(self.X, np.linalg.pinv(self.X)) 27 | resid = np.identity(40) - P 28 | noise = np.dot(resid,R.standard_normal((40,5))) 29 | term = np.column_stack((noise, self.X[:,2])) 30 | c = Contrast(term, self.X) 31 | assert_equal(c.contrast_matrix.shape, (10,)) 32 | #TODO: this should actually test the value of the contrast, not only its dimension 33 | 34 | def test_estimable(self): 35 | X2 = np.column_stack((self.X, self.X[:,5])) 36 | c = Contrast(self.X[:,5],X2) 37 | #TODO: I don't think this should be estimable? isestimable correct? 38 | 39 | if __name__=="__main__": 40 | run_module_suite() 41 | 42 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/stats/tests/test_qsturng.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 4 | Created on Wed Mar 28 13:49:11 2012 5 | 6 | Author: Josef Perktold 7 | """ 8 | 9 | import numpy as np 10 | from numpy.testing import assert_almost_equal 11 | 12 | from gwstatsmodels.stats.libqsturng import qsturng, psturng 13 | from gwstatsmodels.sandbox.stats.multicomp import get_tukeyQcrit 14 | 15 | def test_qstrung(): 16 | rows = [ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17 | 16, 17, 18, 19, 20, 24, 30, 40, 60, 120, 9999] 18 | cols = np.arange(2,11) 19 | 20 | for alpha in [0.01, 0.05]: 21 | for k in cols: 22 | c1 = get_tukeyQcrit(k, rows, alpha=alpha) 23 | c2 = qsturng(1-alpha, k, rows) 24 | assert_almost_equal(c1, c2, decimal=2) 25 | #roundtrip 26 | assert_almost_equal(psturng(qsturng(1-alpha, k, rows), k, rows), alpha, 5) 27 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tests/R_ig.s: -------------------------------------------------------------------------------- 1 | ### SETUP ### 2 | d <- read.table("./inv_gaussian.csv",sep=",", header=T, nrows=5000) 3 | attach(d) 4 | 5 | ### MODEL ### 6 | library(nlme) 7 | m1 <- glm(xig ~ x1 + x2, family=inverse.gaussian) 8 | results <- summary.glm(m1) 9 | results 10 | results['coefficients'] 11 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tests/R_lbw.s: -------------------------------------------------------------------------------- 1 | ### SETUP ### 2 | d <- read.table("./stata_lbw_glm.csv",sep=",", header=T) 3 | attach(d) 4 | race.f <- factor(race) 5 | contrasts(race.f) <- contr.treatment(3, base = 3) # make white the intercept 6 | 7 | ### MODEL ### 8 | m1 <- glm(low ~ age + lwt + race.f + smoke + ptl + ht + ui, 9 | family=binomial) 10 | results <- summary.glm(m1) 11 | results 12 | results['coefficients'] 13 | 14 | library(boot) 15 | m1.diag <- glm.diag(m1) 16 | # note that this returns standardized residuals for diagnostics) 17 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #adding test directory to python path 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tests/check_for_rpy.py: -------------------------------------------------------------------------------- 1 | def skip_rpy(): 2 | try: 3 | import rpy 4 | return False 5 | except: 6 | return True 7 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tests/coverage_sm.py: -------------------------------------------------------------------------------- 1 | ''' 2 | create html coverage report using coverage 3 | 4 | Note that this will work on the *installed* version of gwstatsmodels; however, 5 | the script should be run from the source tree's test directory. 6 | ''' 7 | 8 | import sys 9 | import gwstatsmodels as sm 10 | from coverage import coverage 11 | 12 | # the generated html report will be placed in the tests directory 13 | report_directory = 'coverage_report_html' 14 | 15 | cov = coverage() 16 | cov.start() # start logging coverage 17 | sm.test() 18 | cov.stop() # stop the logging coverage 19 | cov.save() # save the logging coverage to ./.coverage 20 | modpath = sm.__file__.strip('__init__.pyc') # get install directory 21 | # set the module names to gwstatsmodels.path.to.module 22 | modnames = ['gwstatsmodels.'+f.replace(modpath,'').replace('/', 23 | '.').replace('.py','') for f in cov.data.executed_files() if 24 | 'gwstatsmodels' in f] 25 | # save only the use modules to the html report 26 | cov.html_report([sys.modules[mn] for mn in modnames if mn in sys.modules], 27 | directory=report_directory) 28 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tests/results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/tests/results/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tests/results/cancer_resids.csv: -------------------------------------------------------------------------------- 1 | -.8525981,-1.457391,-39.24081,-1.415269,-5.784172 2 | -.8236838,-1.350402,-26.49575,-1.31777,-4.671629 3 | -.7304504,-1.077546,-40.21364,-1.062088,-5.419785 4 | -.7044716,-1.014415,-72.59515,-1.001729,-7.151309 5 | -.528668,-.6686173,-38.07581,-.6653046,-4.486587 6 | -.2286585,-.2488597,-6.149136,-.2487072,-1.185771 7 | -.1939394,-.2081199,-7.462265,-.2080317,-1.203008 8 | -.3556357,-.409525,-21.41325,-.4088151,-2.759586 9 | -.057336,-.05847,-4.129462,-.0584681,-.4865869 10 | .0309828,.0306685,1.865511,.0306682,.2404138 11 | -.2119243,-.2290713,-21.83861,-.228953,-2.151309 12 | -.310989,-.3507393,-41.92495,-.3503004,-3.610845 13 | -.0092225,-.009251,-1.136797,-.009251,-.1023921 14 | .2394025,.2225897,18.85773,.2224935,2.124756 15 | .0335166,.0331493,4.518424,.0331489,.3891554 16 | .8498294,.6851802,35.76275,.6826899,5.512915 17 | .4129342,.3667852,46.53926,.3663704,4.383795 18 | .4641484,.4071232,62.57265,.4065619,5.389155 19 | 1.711046,1.194748,112.6765,1.183119,13.88505 20 | 1.265718,.946389,130.431,.9402446,12.84869 21 | -.3485326,-.3999883,-29.56381,-.3993286,-3.209977 22 | -.4043403,-.4769601,-41.02543,-.475818,-4.072865 23 | -.4920579,-.6088183,-93.45096,-.6063572,-6.781097 24 | -.4028764,-.4748784,-91.52262,-.4737519,-6.072257 25 | -.5150567,-.6460133,-219.0146,-.6430435,-10.62097 26 | -.0870423,-.0897043,-12.63614,-.0896975,-1.048751 27 | .1283623,.1232478,17.03833,.123231,1.478878 28 | -.2392719,-.2615621,-93.02833,-.2613844,-4.717951 29 | .7372465,.608186,62.53596,.6064097,6.790023 30 | -.036411,-.0368626,-14.15653,-.0368621,-.7179512 31 | .268833,.2479331,66.79341,.247801,4.237484 32 | .5963896,.5072377,113.2655,.5061801,8.218903 33 | .0198218,.0196923,10.08209,.0196923,.4470407 34 | .774936,.6343053,251.8839,.6323037,13.97118 35 | -.7639251,-1.165917,-493.4617,-1.14588,-19.41566 36 | -.6237717,-.8411748,-440.6796,-.8342663,-16.57961 37 | -.1632729,-.1731151,-67.39759,-.1730648,-3.317258 38 | -.4285625,-.5119329,-473.7878,-.5105074,-14.24948 39 | .0800693,.0780269,39.53534,.0780226,1.779205 40 | -.2136744,-.2311274,-215.987,-.2310057,-6.793446 41 | -.0163544,-.0164444,-10.56421,-.0164444,-.4156576 42 | .2049005,.1923721,110.6513,.1923094,4.761566 43 | -.1947589,-.2090677,-235.4841,-.2089782,-6.772194 44 | .3167274,.2883678,187.0656,.2881621,7.697324 45 | .6242349,.5276325,257.6785,.5264484,12.68274 46 | .8302411,.6720021,286.5137,.6696448,15.42321 47 | .0065514,.0065371,7.921307,.0065371,.2278058 48 | .3415952,.308985,288.6676,.3087333,9.930129 49 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from tools import add_constant, categorical 2 | from datautils import Dataset 3 | 4 | from gwstatsmodels import NoseWrapper as Tester 5 | test = Tester().test 6 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tools/catadd.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | from gwstatsmodels.tools.tools import rank as smrank 5 | 6 | 7 | def add_indep(x, varnames, dtype=None): 8 | ''' 9 | construct array with independent columns 10 | 11 | x is either iterable (list, tuple) or instance of ndarray or a subclass of it. 12 | If x is an ndarray, then each column is assumed to represent a variable with 13 | observations in rows. 14 | ''' 15 | #TODO: this needs tests for subclasses 16 | 17 | if isinstance(x, np.ndarray) and x.ndim == 2: 18 | x = x.T 19 | 20 | nvars_orig = len(x) 21 | nobs = len(x[0]) 22 | #print 'nobs, nvars_orig', nobs, nvars_orig 23 | if not dtype: 24 | dtype = np.asarray(x[0]).dtype 25 | xout = np.zeros((nobs, nvars_orig), dtype=dtype) 26 | count = 0 27 | rank_old = 0 28 | varnames_new = [] 29 | varnames_dropped = [] 30 | keepindx = [] 31 | for (xi, ni) in zip(x, varnames): 32 | #print xi.shape, xout.shape 33 | xout[:,count] = xi 34 | rank_new = smrank(xout) 35 | #print rank_new 36 | if rank_new > rank_old: 37 | varnames_new.append(ni) 38 | rank_old = rank_new 39 | count += 1 40 | else: 41 | varnames_dropped.append(ni) 42 | 43 | return xout[:,:count], varnames_new 44 | 45 | if __name__ == '__main__': 46 | x1 = np.array([0,0,0,0,0,1,1,1,2,2,2]) 47 | x2 = np.array([0,0,0,0,0,1,1,1,1,1,1]) 48 | x0 = np.ones(len(x2)) 49 | x = np.column_stack([x0, x1[:,None]*np.arange(3), x2[:,None]*np.arange(2)]) 50 | varnames = ['const'] + ['var1_%d' %i for i in np.arange(3)] \ 51 | + ['var2_%d' %i for i in np.arange(2)] 52 | xo,vo = add_indep(x, varnames) 53 | print xo.shape 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tools/compatibility.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | try: 5 | from numpy.linalg import slogdet as np_slogdet 6 | except: 7 | def np_slogdet(x): 8 | return 1, np.log(np.linalg.det(x)) 9 | 10 | 11 | 12 | def getZipFile(): 13 | '''return ZipFile class with open method for python < 2.6 14 | 15 | for python < 2.6, the open method returns a StringIO.StringIO file_like 16 | 17 | Examples 18 | -------- 19 | ZipFile = getZipFile() 20 | ... 21 | 22 | not fully tested yet 23 | written for pyecon 24 | 25 | ''' 26 | import sys, zipfile 27 | if sys.version >= '2.6': 28 | return zipfile.ZipFile 29 | else: 30 | class ZipFile(zipfile.ZipFile): 31 | 32 | def open(self, filename): 33 | fullfilename = [f for f in self.namelist() if filename in f][0] 34 | import StringIO 35 | return StringIO.StringIO(self.read(fullfilename)) 36 | return ZipFile 37 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tools/parallel.py: -------------------------------------------------------------------------------- 1 | '''Parallel utility function using joblib 2 | 3 | copied from https://github.com/mne-tools/mne-python 4 | 5 | Author: Alexandre Gramfort 6 | License: Simplified BSD 7 | 8 | changes for gwstatsmodels (Josef Perktold) 9 | - try import from joblib directly, (doesn't import all of sklearn) 10 | 11 | ''' 12 | 13 | 14 | def parallel_func(func, n_jobs, verbose=5): 15 | """Return parallel instance with delayed function 16 | 17 | Util function to use joblib only if available 18 | 19 | Parameters 20 | ---------- 21 | func: callable 22 | A function 23 | n_jobs: int 24 | Number of jobs to run in parallel 25 | verbose: int 26 | Verbosity level 27 | 28 | Returns 29 | ------- 30 | parallel: instance of joblib.Parallel or list 31 | The parallel object 32 | my_func: callable 33 | func if not parallel or delayed(func) 34 | n_jobs: int 35 | Number of jobs >= 0 36 | 37 | Examples 38 | -------- 39 | >>> from math import sqrt 40 | >>> from gwstatsmodels.tools.parallel import parallel_func 41 | >>> parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0) 42 | >>> print n_jobs 43 | >>> parallel(p_func(i**2) for i in range(10)) 44 | """ 45 | try: 46 | try: 47 | from joblib import Parallel, delayed 48 | except ImportError: 49 | from sklearn.externals.joblib import Parallel, delayed 50 | 51 | parallel = Parallel(n_jobs, verbose=verbose) 52 | my_func = delayed(func) 53 | 54 | if n_jobs == -1: 55 | try: 56 | import multiprocessing 57 | n_jobs = multiprocessing.cpu_count() 58 | except ImportError: 59 | print "multiprocessing not installed. Cannot run in parallel." 60 | n_jobs = 1 61 | 62 | except ImportError: 63 | print "joblib not installed. Cannot run in parallel." 64 | n_jobs = 1 65 | my_func = func 66 | parallel = list 67 | return parallel, my_func, n_jobs 68 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tools/sm_exceptions.py: -------------------------------------------------------------------------------- 1 | class PerfectSeparationError(Exception): pass 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tools/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/tools/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tools/tests/test_catadd.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from numpy.testing import assert_equal 4 | from gwstatsmodels.tools.catadd import add_indep 5 | 6 | from scipy import linalg 7 | 8 | def test_add_indep(): 9 | x1 = np.array([0,0,0,0,0,1,1,1,2,2,2]) 10 | x2 = np.array([0,0,0,0,0,1,1,1,1,1,1]) 11 | x0 = np.ones(len(x2)) 12 | x = np.column_stack([x0, x1[:,None]*np.arange(3), x2[:,None]*np.arange(2)]) 13 | varnames = ['const'] + ['var1_%d' %i for i in np.arange(3)] \ 14 | + ['var2_%d' %i for i in np.arange(2)] 15 | xo, vo = add_indep(x, varnames) 16 | 17 | assert_equal(xo, np.column_stack((x0, x1, x2))) 18 | assert_equal((linalg.svdvals(x) > 1e-12).sum(), 3) 19 | assert_equal(vo, ['const', 'var1_1', 'var2_1']) 20 | 21 | if __name__ == '__main__': 22 | test_add_indep() 23 | 24 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tools/tests/test_data.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import numpy as np 3 | 4 | from gwstatsmodels.tools import data 5 | 6 | def test_missing_data_pandas(): 7 | """ 8 | Fixes GH: #144 9 | """ 10 | X = np.random.random((10,5)) 11 | X[1,2] = np.nan 12 | df = pandas.DataFrame(X) 13 | vals, cnames, rnames = data.interpret_data(df) 14 | np.testing.assert_equal(rnames.tolist(), [0,2,3,4,5,6,7,8,9]) 15 | 16 | def test_structarray(): 17 | X = np.random.random((9,)).view([('var1', 'f8'), 18 | ('var2', 'f8'), 19 | ('var3', 'f8')]) 20 | vals, cnames, rnames = data.interpret_data(X) 21 | np.testing.assert_equal(cnames, X.dtype.names) 22 | np.testing.assert_equal(vals, X.view((float,3))) 23 | np.testing.assert_equal(rnames, None) 24 | 25 | def test_recarray(): 26 | X = np.random.random((9,)).view([('var1', 'f8'), 27 | ('var2', 'f8'), 28 | ('var3', 'f8')]) 29 | vals, cnames, rnames = data.interpret_data(X.view(np.recarray)) 30 | np.testing.assert_equal(cnames, X.dtype.names) 31 | np.testing.assert_equal(vals, X.view((float,3))) 32 | np.testing.assert_equal(rnames, None) 33 | 34 | 35 | def test_dataframe(): 36 | X = np.random.random((10,5)) 37 | df = pandas.DataFrame(X) 38 | vals, cnames, rnames = data.interpret_data(df) 39 | np.testing.assert_equal(vals, df.values) 40 | np.testing.assert_equal(rnames.tolist(), df.index.tolist()) 41 | np.testing.assert_equal(cnames, df.columns.tolist()) 42 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tools/tests/test_parallel.py: -------------------------------------------------------------------------------- 1 | from gwstatsmodels.tools.parallel import parallel_func 2 | from numpy import arange, testing 3 | from math import sqrt 4 | 5 | def test_parallel(): 6 | x = arange(10.) 7 | parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0) 8 | y = parallel(p_func(i**2) for i in range(10)) 9 | testing.assert_equal(x,y) 10 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tools/wrappers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Convenience Wrappers 3 | 4 | Created on Sat Oct 30 14:56:35 2010 5 | 6 | Author: josef-pktd 7 | License: BSD 8 | """ 9 | 10 | import numpy as np 11 | import gwstatsmodels.api as sm 12 | from gwstatsmodels import GLS, WLS, OLS 13 | 14 | def remove_nanrows(y, x): 15 | '''remove common rows in [y,x] that contain at least one nan 16 | 17 | TODO: this should be made more flexible, 18 | arbitrary number of arrays and 1d or 2d arrays 19 | 20 | duplicate: Skipper added sm.tools.drop_missing 21 | 22 | ''' 23 | mask = ~np.isnan(y) 24 | mask *= ~(np.isnan(x).any(-1)) #* or & 25 | y = y[mask] 26 | x = x[mask] 27 | return y, x 28 | 29 | 30 | def linmod(y, x, weights=None, sigma=None, add_const=True, filter_missing=True, 31 | **kwds): 32 | '''get linear model with extra options for entry 33 | 34 | dispatches to regular model class and does not wrap the output 35 | 36 | If several options are exclusive, for example sigma and weights, then the 37 | chosen class depends on the implementation sequence. 38 | ''' 39 | 40 | if filter_missing: 41 | y, x = remove_nanrows(y, x) 42 | #do the same for masked arrays 43 | 44 | if add_const: 45 | x = sm.add_constant(x, prepend=True) 46 | 47 | if not sigma is None: 48 | return GLS(y, x, sigma=sigma, **kwds) 49 | elif not weights is None: 50 | return WLS(y, x, weights=weights, **kwds) 51 | else: 52 | return OLS(y, x, **kwds) 53 | 54 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/__init__.py: -------------------------------------------------------------------------------- 1 | from gwstatsmodels import NoseWrapper as Tester 2 | test = Tester().test 3 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/api.py: -------------------------------------------------------------------------------- 1 | from .ar_model import AR 2 | from .arima_model import ARMA 3 | import vector_ar as var 4 | from .vector_ar.var_model import VAR 5 | from .vector_ar.svar_model import SVAR 6 | from .vector_ar.dynamic import DynamicVAR 7 | import filters 8 | import tsatools 9 | from .tsatools import (add_trend, detrend, lagmat, lagmat2ds, add_lag) 10 | import interp 11 | import stattools 12 | from .stattools import (adfuller, acovf, q_stat, acf, pacf_yw, pacf_ols, pacf, 13 | ccovf, ccf, periodogram, grangercausalitytests) 14 | from .base import datetools 15 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/tsa/base/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/base/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/tsa/base/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/descriptivestats.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Descriptive Statistics for Time Series 3 | 4 | Created on Sat Oct 30 14:24:08 2010 5 | 6 | Author: josef-pktd 7 | License: BSD(3clause) 8 | """ 9 | 10 | import numpy as np 11 | import stattools as stt 12 | 13 | 14 | #todo: check subclassing for descriptive stats classes 15 | class TsaDescriptive(object): 16 | '''collection of descriptive statistical methods for time series 17 | 18 | ''' 19 | 20 | def __init__(self, data, label=None, name=''): 21 | self.data = data 22 | self.label = label 23 | self.name = name 24 | 25 | def filter(self, num, den): 26 | from scipy.signal import lfilter 27 | xfiltered = lfilter(num, den, self.data) 28 | return self.__class__(xfiltered, self.label, self.name + '_filtered') 29 | 30 | def detrend(self, order=1): 31 | import tsatools 32 | xdetrended = tsatools.detrend(self.data, order=order) 33 | return self.__class__(xdetrended, self.label, self.name + '_detrended') 34 | 35 | def fit(self, order=(1,0,1), **kwds): 36 | from arima_model import ARMA 37 | self.mod = ARMA(self.data) 38 | self.res = self.mod.fit(order=order, **kwds) 39 | #self.estimated_process = 40 | return self.res 41 | 42 | def acf(self, nlags=40): 43 | return stt.acf(self.data, nlags=nlags) 44 | 45 | def pacf(self, nlags=40): 46 | return stt.pacf(self.data, nlags=nlags) 47 | 48 | def periodogram(self): 49 | #doesn't return frequesncies 50 | return stt.periodogram(self.data) 51 | 52 | # copied from fftarma.py 53 | def plot4(self, fig=None, nobs=100, nacf=20, nfreq=100): 54 | data = self.data 55 | acf = self.acf(nacf) 56 | pacf = self.pacf(nacf) 57 | w = np.linspace(0, np.pi, nfreq, endpoint=False) 58 | spdr = self.periodogram()[:nfreq] #(w) 59 | 60 | if fig is None: 61 | import matplotlib.pyplot as plt 62 | fig = plt.figure() 63 | ax = fig.add_subplot(2,2,1) 64 | namestr = ' for %s' % self.name if self.name else '' 65 | ax.plot(data) 66 | ax.set_title('Time series' + namestr) 67 | 68 | ax = fig.add_subplot(2,2,2) 69 | ax.plot(acf) 70 | ax.set_title('Autocorrelation' + namestr) 71 | 72 | ax = fig.add_subplot(2,2,3) 73 | ax.plot(spdr) # (wr, spdr) 74 | ax.set_title('Power Spectrum' + namestr) 75 | 76 | ax = fig.add_subplot(2,2,4) 77 | ax.plot(pacf) 78 | ax.set_title('Partial Autocorrelation' + namestr) 79 | 80 | return fig 81 | 82 | 83 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/filters/__init__.py: -------------------------------------------------------------------------------- 1 | from .bk_filter import bkfilter 2 | from .hp_filter import hpfilter 3 | from .cf_filter import cffilter 4 | from .filtertools import miso_lfilter, arfilter 5 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/filters/hp_filter.py: -------------------------------------------------------------------------------- 1 | from scipy import sparse 2 | from scipy.sparse import dia_matrix, eye as speye 3 | from scipy.sparse.linalg import spsolve 4 | import numpy as np 5 | 6 | def hpfilter(X, lamb=1600): 7 | """ 8 | Hodrick-Prescott filter 9 | 10 | Parameters 11 | ---------- 12 | X : array-like 13 | The 1d ndarray timeseries to filter of length (nobs,) or (nobs,1) 14 | lamb : float 15 | The Hodrick-Prescott smoothing parameter. A value of 1600 is 16 | suggested for quarterly data. Ravn and Uhlig suggest using a value 17 | of 6.25 (1600/4**4) for annual data and 129600 (1600*3**4) for monthly 18 | data. 19 | 20 | Returns 21 | ------- 22 | cycle : array 23 | The estimated cycle in the data given lamb. 24 | trend : array 25 | The estimated trend in the data given lamb. 26 | 27 | Examples 28 | --------- 29 | >>> import gwstatsmodels.api as sm 30 | >>> dta = sm.datasets.macrodata.load() 31 | >>> X = dta.data['realgdp'] 32 | >>> cycle, trend = sm.tsa.filters.hpfilter(X,1600) 33 | 34 | Notes 35 | ----- 36 | The HP filter removes a smooth trend, `T`, from the data `X`. by solving 37 | 38 | min sum((X[t] - T[t])**2 + lamb*((T[t+1] - T[t]) - (T[t] - T[t-1]))**2) 39 | T t 40 | 41 | Here we implemented the HP filter as a ridge-regression rule using 42 | scipy.sparse. In this sense, the solution can be written as 43 | 44 | T = inv(I - lamb*K'K)X 45 | 46 | where I is a nobs x nobs identity matrix, and K is a (nobs-2) x nobs matrix 47 | such that 48 | 49 | K[i,j] = 1 if i == j or i == j + 2 50 | K[i,j] = -2 if i == j + 1 51 | K[i,j] = 0 otherwise 52 | 53 | References 54 | ---------- 55 | Hodrick, R.J, and E. C. Prescott. 1980. "Postwar U.S. Business Cycles: An 56 | Empricial Investigation." `Carnegie Mellon University discussion 57 | paper no. 451`. 58 | Ravn, M.O and H. Uhlig. 2002. "Notes On Adjusted the Hodrick-Prescott 59 | Filter for the Frequency of Observations." `The Review of Economics and 60 | Statistics`, 84(2), 371-80. 61 | """ 62 | X = np.asarray(X) 63 | if X.ndim > 1: 64 | X = X.squeeze() 65 | nobs = len(X) 66 | I = speye(nobs,nobs) 67 | offsets = np.array([0,1,2]) 68 | data = np.repeat([[1],[-2],[1]], nobs, axis=1) 69 | K = dia_matrix((data, offsets), shape=(nobs-2,nobs)) 70 | trend = spsolve(I+lamb*K.T.dot(K), X) 71 | cycle = X-trend 72 | return cycle, trend 73 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/filters/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/tsa/filters/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/interp/__init__.py: -------------------------------------------------------------------------------- 1 | from denton import dentonm 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/interp/tests/test_denton.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gwstatsmodels.tsa.interp import dentonm 3 | 4 | def test_denton_quarterly(): 5 | # Data and results taken from IMF paper 6 | indicator = np.array([98.2, 100.8, 102.2, 100.8, 99.0, 101.6, 7 | 102.7, 101.5, 100.5, 103.0, 103.5, 101.5]) 8 | benchmark = np.array([4000.,4161.4]) 9 | x_imf = dentonm(indicator, benchmark, freq="aq") 10 | imf_stata = np.array([969.8, 998.4, 1018.3, 1013.4, 1007.2, 1042.9, 11 | 1060.3, 1051.0, 1040.6, 1066.5, 1071.7, 1051.0]) 12 | np.testing.assert_almost_equal(imf_stata, x_imf, 1) 13 | 14 | def test_denton_quarterly2(): 15 | # Test denton vs stata. Higher precision than other test. 16 | zQ = np.array([50,100,150,100] * 5) 17 | Y = np.array([500,400,300,400,500]) 18 | x_denton = dentonm(zQ, Y, freq="aq") 19 | x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894, 20 | 105.97568,147.50144,89.958987,40.547201,74.445963, 21 | 108.34473,76.66211,42.763347,94.14664,153.41596, 22 | 109.67405,58.290761,122.62556,190.41409,128.66959]) 23 | np.testing.assert_almost_equal(x_denton, x_stata, 5) 24 | 25 | if __name__ == "__main__": 26 | import nose 27 | nose.runmodule(argv=[__file__,'-vvs','-x', '--pdb'], exit=False) 28 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/kalmanf/__init__.py: -------------------------------------------------------------------------------- 1 | from kalmanfilter import KalmanFilter 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/kalmanf/setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import os.path 4 | 5 | cur_dir = os.path.abspath(os.path.dirname(__file__)) 6 | 7 | import sys 8 | sys.path.insert(0, os.path.normpath(os.path.join(cur_dir, 9 | '..', '..', '..', 'tools'))) 10 | from _build import cython, has_c_compiler 11 | sys.path.pop(0) 12 | del sys 13 | 14 | def configuration(parent_package='', top_path=None): 15 | from numpy.distutils.misc_util import (Configuration, 16 | get_numpy_include_dirs) 17 | 18 | config = Configuration('kalmanf', parent_package, top_path) 19 | 20 | # This function tries to create C files from the given .pyx files. If 21 | # it fails, we build the checked-in .c files. 22 | if has_c_compiler(): 23 | cython(['kalman_loglike.pyx'], working_path=cur_dir) 24 | 25 | config.add_extension('kalman_loglike', 26 | sources=['kalman_loglike.c'], 27 | include_dirs=[get_numpy_include_dirs()]) 28 | 29 | return config 30 | 31 | if __name__ == '__main__': 32 | from numpy.distutils.core import setup 33 | setup(**(configuration(top_path='').todict())) 34 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/mlemodel.py: -------------------------------------------------------------------------------- 1 | """Base Classes for Likelihood Models in time series analysis 2 | 3 | Warning: imports numdifftools 4 | 5 | 6 | 7 | Created on Sun Oct 10 15:00:47 2010 8 | 9 | Author: josef-pktd 10 | License: BSD 11 | 12 | """ 13 | 14 | import numpy as np 15 | 16 | import numdifftools as ndt 17 | 18 | from gwstatsmodels.base.model import LikelihoodModel 19 | 20 | #copied from sandbox/regression/mle.py 21 | #TODO: I take it this is only a stub and should be included in another 22 | # model class? 23 | class TSMLEModel(LikelihoodModel): 24 | """ 25 | univariate time series model for estimation with maximum likelihood 26 | 27 | Note: This is not working yet 28 | """ 29 | 30 | def __init__(self, endog, exog=None): 31 | #need to override p,q (nar,nma) correctly 32 | super(TSMLEModel, self).__init__(endog, exog) 33 | #set default arma(1,1) 34 | self.nar = 1 35 | self.nma = 1 36 | #self.initialize() 37 | 38 | def geterrors(self, params): 39 | raise NotImplementedError 40 | 41 | def loglike(self, params): 42 | """ 43 | Loglikelihood for timeseries model 44 | 45 | Notes 46 | ----- 47 | needs to be overwritten by subclass 48 | """ 49 | raise NotImplementedError 50 | 51 | 52 | def score(self, params): 53 | """ 54 | Score vector for Arma model 55 | """ 56 | #return None 57 | #print params 58 | jac = ndt.Jacobian(self.loglike, stepMax=1e-4) 59 | return jac(params)[-1] 60 | 61 | def hessian(self, params): 62 | """ 63 | Hessian of arma model. Currently uses numdifftools 64 | """ 65 | #return None 66 | Hfun = ndt.Jacobian(self.score, stepMax=1e-4) 67 | return Hfun(params)[-1] 68 | 69 | 70 | def fit(self, start_params=None, maxiter=5000, method='fmin', tol=1e-08): 71 | '''estimate model by minimizing negative loglikelihood 72 | 73 | does this need to be overwritten ? 74 | ''' 75 | if start_params is None and hasattr(self, '_start_params'): 76 | start_params = self._start_params 77 | #start_params = np.concatenate((0.05*np.ones(self.nar + self.nma), [1])) 78 | mlefit = super(TSMLEModel, self).fit(start_params=start_params, 79 | maxiter=maxiter, method=method, tol=tol) 80 | return mlefit 81 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import os.path 4 | 5 | base_path = os.path.abspath(os.path.dirname(__file__)) 6 | 7 | def configuration(parent_package='', top_path=None): 8 | from numpy.distutils.misc_util import (Configuration, 9 | get_numpy_include_dirs) 10 | config = Configuration('tsa', parent_package, top_path) 11 | 12 | config.add_subpackage('kalmanf') 13 | 14 | config.add_data_files('vector_ar/data/*.dat') 15 | 16 | return config 17 | 18 | if __name__ == '__main__': 19 | from numpy.distutils.core import setup 20 | setup(**configuration(top_path='').todict()) 21 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/tsa/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/tests/results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/tsa/tests/results/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/tests/results/make_arma.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gwstatsmodels.tsa.arima_process import arma_generate_sample 3 | from gwstatsmodels.iolib import savetxt 4 | 5 | np.random.seed(12345) 6 | 7 | # no constant 8 | y_arma11 = arma_generate_sample([1., -.75],[1., .35], nsample=250) 9 | y_arma14 = arma_generate_sample([1., -.75],[1., .35, -.75, .1, .35], 10 | nsample=250) 11 | y_arma41 = arma_generate_sample([1., -.75, .25, .25, -.75], [1., .35], 12 | nsample=250) 13 | y_arma22 = arma_generate_sample([1., -.75, .45],[1., .35, -.9], nsample=250) 14 | 15 | y_arma50 = arma_generate_sample([1., -.75, .35, -.3, -.2, .1], [1.], 16 | nsample=250) 17 | 18 | y_arma02 = arma_generate_sample([1.], [1., .35, -.75], nsample=250) 19 | 20 | 21 | # constant 22 | constant = 4.5 23 | y_arma11c = arma_generate_sample([1., -.75],[1., .35], nsample=250) + constant 24 | y_arma14c = arma_generate_sample([1., -.75],[1., .35, -.75, .1, .35], 25 | nsample=250) + constant 26 | y_arma41c = arma_generate_sample([1., -.75, .25, .25, -.75], [1., .35], 27 | nsample=250) + constant 28 | y_arma22c = arma_generate_sample([1., -.75, .45],[1., .35, -.9], nsample=250) + \ 29 | constant 30 | 31 | y_arma50c = arma_generate_sample([1., -.75, .35, -.3, -.2, .1], [1.], 32 | nsample=250) + constant 33 | 34 | y_arma02c = arma_generate_sample([1.], [1., .35, -.75], nsample=250) + constant 35 | 36 | savetxt('y_arma_data.csv', np.column_stack((y_arma11, y_arma14, y_arma41, 37 | y_arma22,y_arma50, y_arma02,y_arma11c,y_arma14c,y_arma41c,y_arma22c, 38 | y_arma50c,y_arma02c)), names=['y_arma11','y_arma14','y_arma41', 39 | 'y_arma22','y_arma50', 'y_arma02','y_arma11c','y_arma14c', 40 | 'y_arma41c','y_arma22c', 'y_arma50c','y_arma02c'], delimiter=",") 41 | 42 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/tests/results/results_arma_forecasts.csv: -------------------------------------------------------------------------------- 1 | "fc11","fe11","fc41","fe41","fc50","fe50","fc11c","fe11c","fc41c","fe41c","fc50c","fe50c" 2 | -0.0931139795,0.994743,-0.4298095393,0.91141,1.24361,0.938375,4.2276592922,1.03917,-1.3985445949,0.915488,4.779739852,0.973931 3 | -0.073415913,1.53122,-1.4961938371,1.22521,0.484195,1.16009,4.4387131881,1.52336,3.0228581596,1.29504,4.692859693,1.21992 4 | -0.057884931,1.78523,-3.770193621,1.2607,0.722602,1.17762,4.5789296469,1.6936,4.071345984,1.37495,3.8280155603,1.24654 5 | -0.0456394955,1.92633,-3.6148341892,1.27585,1.09664,1.20138,4.6720843135,1.76352,0.8457602276,1.38243,3.9467079457,1.26508 6 | -0.0359845562,2.00905,-1.5553772244,1.29131,0.854109,1.31833,4.7339728534,1.79351,-2.3126724383,1.39737,4.5998286564,1.37737 7 | -0.028372099,2.05881,-0.3162209074,1.49296,0.510239,1.39506,4.7750893268,1.80659,-0.833875753,1.5901,4.59173986,1.4486 8 | -0.0223700411,2.08914,-1.6253254747,1.67599,0.579206,1.41288,4.802405602,1.81234,2.8363368346,1.83677,4.1878148976,1.45879 9 | -0.017637706,2.10777,-3.3986372227,1.68533,0.721688,1.43171,4.8205535323,1.81487,3.7095186811,1.86802,4.2337457799,1.46621 10 | -0.0139064864,2.11928,-3.150618693,1.69048,0.60394,1.47332,4.8326103507,1.81598,0.6042207231,1.86845,4.553063531,1.49431 11 | -0.0109645984,2.1264,-1.2381045022,1.70353,0.43997,1.50507,4.8406204582,1.81647,-1.9229410947,1.87871,4.562562544,1.51482 12 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/tests/results/results_corrgram.csv: -------------------------------------------------------------------------------- 1 | acvar,acvarfft,Q1,PACOLS,PACYW 2 | .98685781,.98685781,200.6355,1.002988,.98685781 3 | .97371846,.97371846,396.9356,-.3956237,-.00650592 4 | .96014366,.96014366,588.7549,-.1769759,-.0233345 5 | .94568545,.94568545,775.7759,.0742275,-.04078457 6 | .93054425,.93054425,957.7706,-.0580367,-.0334311 7 | .91484806,.91484806,1134.57,.0935246,-.02818214 8 | .89901438,.89901438,1306.174,.0034315,-.01168091 9 | .8827679,.8827679,1472.48,.0385079,-.02192074 10 | .86649857,.86649857,1633.539,.1050301,-.00748351 11 | .8503736,.8503736,1789.463,-.1834061,-.00143193 12 | .83435254,.83435254,1940.348,-.0210358,-.00310717 13 | .81829961,.81829961,2086.244,.0992143,-.00921395 14 | .80240987,.80240987,2227.266,.2540452,-.0025009 15 | .78635609,.78635609,2363.419,-.0080142,-.01557323 16 | .77024519,.77024519,2494.745,-.0524568,-.01164751 17 | .75437469,.75437469,2621.388,.11607,-.00030521 18 | .73850631,.73850631,2743.412,-.1211871,-.00893237 19 | .72274736,.72274736,2860.916,.092306,-.00507808 20 | .70701465,.70701465,2973.971,.0317384,-.00829011 21 | .69141553,.69141553,3082.682,-.0422092,-.00460458 22 | .67600225,.67600225,3187.173,.0628712,-.00246537 23 | .66067686,.66067686,3287.53,.0086186,-.00610282 24 | .64543953,.64543953,3383.844,-.0044266,-.0068184 25 | .63020869,.63020869,3476.178,.001695,-.00994534 26 | .61518569,.61518569,3564.658,-.128714,-.00216407 27 | .60054983,.60054983,3649.454,.0279399,.00497323 28 | .58608693,.58608693,3730.674,.0982797,-.00302243 29 | .57172016,.57172016,3808.402,.0937156,-.00642863 30 | .55730727,.55730727,3882.685,-.1677933,-.0125645 31 | .54281535,.54281535,3953.562,-.116249,-.01382639 32 | .5282993,.5282993,4021.09,.1120091,-.01135471 33 | .51386026,.51386026,4085.351,-.1551635,-.00700203 34 | .49934969,.49934969,4146.39,-.0644834,-.01242531 35 | .48452688,.48452688,4204.199,.2141102,-.02166683 36 | .4697521,.4697521,4258.86,.0573164,-.0078988 37 | .45473224,.45473224,4310.389,.1685996,-.01871847 38 | .43987621,.43987621,4358.896,-.0405992,-.00300827 39 | .42500013,.42500013,4404.451,.041648,-.0100618 40 | .41057995,.41057995,4447.228,.1566605,.0076425 41 | .3960725,.3960725,4487.278,-.0708302,-.01332486 42 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/tests/results/results_process.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import array 3 | 4 | 5 | class Holder(object): 6 | pass 7 | 8 | 9 | armarep = Holder() 10 | armarep.comment = 'mlab.garchma(-res_armarep.ar[1:], res_armarep.ma[1:], 20)' +\ 11 | 'mlab.garchar(-res_armarep.ar[1:], res_armarep.ma[1:], 20)' 12 | armarep.marep = array([[-0.1 ], 13 | [-0.77 ], 14 | [-0.305 ], 15 | [ 0.4635 ], 16 | [ 0.47575 ], 17 | [-0.132925 ], 18 | [-0.4470625 ], 19 | [-0.11719125 ], 20 | [ 0.299054375 ], 21 | [ 0.2432801875 ], 22 | [-0.11760340625 ], 23 | [-0.253425853125 ], 24 | [-0.0326302015625 ], 25 | [ 0.18642558171875], 26 | [ 0.11931695210938], 27 | [-0.08948198932031], 28 | [-0.14019455634766], 29 | [ 0.00148831328242], 30 | [ 0.11289980171934], 31 | [ 0.05525925023373]]) 32 | armarep.ar = array([ 1. , -0.5, 0.8]) 33 | armarep.ma = array([ 1. , -0.6 , 0.08]) 34 | armarep.name = 'armarep' 35 | armarep.arrep = array([[ -1.00000000000000e-01], 36 | [ -7.80000000000000e-01], 37 | [ -4.60000000000000e-01], 38 | [ -2.13600000000000e-01], 39 | [ -9.13600000000000e-02], 40 | [ -3.77280000000000e-02], 41 | [ -1.53280000000000e-02], 42 | [ -6.17856000000000e-03], 43 | [ -2.48089600000000e-03], 44 | [ -9.94252799999999e-04], 45 | [ -3.98080000000000e-04], 46 | [ -1.59307776000000e-04], 47 | [ -6.37382655999999e-05], 48 | [ -2.54983372800000e-05], 49 | [ -1.01999411200000e-05], 50 | [ -4.08009768959999e-06], 51 | [ -1.63206332416000e-06], 52 | [ -6.52830179327999e-07], 53 | [ -2.61133041663999e-07], 54 | [ -1.04453410652160e-07]]) 55 | 56 | 57 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/tests/test_arima_process.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from numpy.testing import (assert_array_almost_equal, assert_almost_equal, 4 | assert_equal) 5 | 6 | 7 | from gwstatsmodels.tsa.arima_process import (arma_impulse_response, 8 | lpol_fiar, lpol_fima) 9 | from gwstatsmodels.sandbox.tsa.fftarma import ArmaFft 10 | 11 | from results.results_process import armarep #benchmarkdata 12 | 13 | arlist = [[1.], 14 | [1, -0.9], #ma representation will need many terms to get high precision 15 | [1, 0.9], 16 | [1, -0.9, 0.3]] 17 | 18 | malist = [[1.], 19 | [1, 0.9], 20 | [1, -0.9], 21 | [1, 0.9, -0.3]] 22 | 23 | 24 | 25 | def test_fi(): 26 | #test identity of ma and ar representation of fi lag polynomial 27 | n = 100 28 | mafromar = arma_impulse_response(lpol_fiar(0.4, n=n), [1], n) 29 | assert_array_almost_equal(mafromar, lpol_fima(0.4, n=n), 13) 30 | 31 | 32 | def test_arma_impulse_response(): 33 | arrep = arma_impulse_response(armarep.ma, armarep.ar, nobs=21)[1:] 34 | marep = arma_impulse_response(armarep.ar, armarep.ma, nobs=21)[1:] 35 | assert_array_almost_equal(armarep.marep.ravel(), marep, 14) 36 | #difference in sign convention to matlab for AR term 37 | assert_array_almost_equal(-armarep.arrep.ravel(), arrep, 14) 38 | 39 | 40 | def test_spectrum(): 41 | nfreq = 20 42 | w = np.linspace(0, np.pi, nfreq, endpoint=False) 43 | for ar in arlist: 44 | for ma in malist: 45 | arma = ArmaFft(ar, ma, 20) 46 | spdr, wr = arma.spdroots(w) 47 | spdp, wp = arma.spdpoly(w, 200) 48 | spdd, wd = arma.spddirect(nfreq*2) 49 | assert_equal(w, wr) 50 | assert_equal(w, wp) 51 | assert_almost_equal(w, wd[:nfreq], decimal=14) 52 | assert_almost_equal(spdr, spdp, decimal=7, 53 | err_msg='spdr spdp not equal for %s, %s' % (ar, ma)) 54 | assert_almost_equal(spdr, spdd[:nfreq], decimal=7, 55 | err_msg='spdr spdd not equal for %s, %s' % (ar, ma)) 56 | 57 | def test_armafft(): 58 | #test other methods 59 | nfreq = 20 60 | w = np.linspace(0, np.pi, nfreq, endpoint=False) 61 | for ar in arlist: 62 | for ma in malist: 63 | arma = ArmaFft(ar, ma, 20) 64 | ac1 = arma.invpowerspd(1024)[:10] 65 | ac2 = arma.acovf(10)[:10] 66 | assert_almost_equal(ac1, ac2, decimal=7, 67 | err_msg='acovf not equal for %s, %s' % (ar, ma)) 68 | 69 | 70 | if __name__ == '__main__': 71 | test_fi() 72 | test_arma_impulse_response() 73 | test_spectrum() 74 | test_armafft() 75 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/vector_ar/__init__.py: -------------------------------------------------------------------------------- 1 | from gwstatsmodels import NoseWrapper as Tester 2 | test = Tester().test 3 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/vector_ar/api.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=W0611 2 | 3 | from .var_model import VAR 4 | from .svar_model import SVAR 5 | from .dynamic import DynamicVAR 6 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/vector_ar/data/e2.dat: -------------------------------------------------------------------------------- 1 | /*quarterly, seasonally adjusted, U.S. 2 | fixed investment (y1), change in business inventories (y2), 3 | 1947Q1-1972Q4; 4 | source: U.S. Department of Commerce, Bureau of Economic Analysis, 5 | The National Income and Product Accounts of the United States, 1929-1974 6 | */ 7 | 8 | <1947 Q1> 9 | y1 y2 10 | 69.6 0.1 11 | 67.6 -0.9 12 | 69.5 -2.9 13 | 74.7 2.7 14 | 77.1 4.1 15 | 77.4 5.6 16 | 76.6 6.9 17 | 76.1 5.3 18 | 71.8 -0.3 19 | 68.9 -7.1 20 | 68.5 -2.5 21 | 70.6 -7.7 22 | 75.4 4.4 23 | 82.3 7.7 24 | 88.2 8.0 25 | 86.9 22.1 26 | 83.4 13.4 27 | 80.3 19.9 28 | 79.4 14.6 29 | 78.6 7.0 30 | 79.3 7.3 31 | 80.3 -2.7 32 | 75.3 5.4 33 | 80.6 7.2 34 | 83.9 3.9 35 | 84.2 5.1 36 | 84.4 1.9 37 | 83.8 -5.0 38 | 82.8 -3.4 39 | 84.1 -4.1 40 | 87.0 -2.7 41 | 88.5 1.5 42 | 92.1 5.9 43 | 96.1 8.0 44 | 98.3 7.8 45 | 98.8 9.2 46 | 96.6 7.5 47 | 97.4 5.5 48 | 97.6 4.9 49 | 96.6 5.4 50 | 96.2 2.5 51 | 95.3 2.9 52 | 96.4 3.7 53 | 94.9 -3.0 54 | 90.0 -6.8 55 | 87.2 -6.2 56 | 88.0 0.3 57 | 93.0 5.3 58 | 98.3 5.0 59 | 101.6 13.0 60 | 102.6 -0.4 61 | 101.4 8.2 62 | 104.9 13.5 63 | 101.8 4.9 64 | 98.8 3.0 65 | 98.6 -3.9 66 | 97.7 -3.8 67 | 99.2 1.9 68 | 101.3 6.6 69 | 104.6 6.7 70 | 106.1 10.6 71 | 109.9 9.2 72 | 111.1 8.0 73 | 110.1 4.7 74 | 110.7 7.6 75 | 116.0 7.0 76 | 118.5 9.3 77 | 122.0 7.1 78 | 124.0 6.1 79 | 124.0 8.0 80 | 124.9 7.3 81 | 126.4 7.9 82 | 133.4 13.4 83 | 137.9 10.6 84 | 140.1 12.4 85 | 143.8 8.8 86 | 147.5 13.5 87 | 146.2 17.8 88 | 145.0 15.1 89 | 139.7 20.5 90 | 136.4 14.6 91 | 139.6 7.5 92 | 141.1 12.2 93 | 145.5 13.8 94 | 148.9 6.3 95 | 148.9 11.8 96 | 150.7 9.2 97 | 155.0 7.6 98 | 159.1 9.8 99 | 158.4 12.2 100 | 158.1 13.4 101 | 154.3 6.8 102 | 151.8 2.9 103 | 150.0 4.8 104 | 150.4 6.3 105 | 149.5 3.3 106 | 154.3 7.9 107 | 158.4 10.0 108 | 162.1 5.0 109 | 166.0 3.7 110 | 174.3 4.8 111 | 176.1 10.1 112 | 178.2 12.1 113 | 186.7 10.8 114 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/vector_ar/data/e6.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/tsa/vector_ar/data/e6.dat -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/vector_ar/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/tsa/vector_ar/tests/__init__.py -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/vector_ar/tests/example_svar.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gwstatsmodels.api as sm 3 | from gwstatsmodels.tsa.api import VAR, SVAR 4 | import matplotlib.pyplot as plt 5 | import gwstatsmodels.tsa.vector_ar.util as util 6 | import pandas as px 7 | 8 | mdatagen = sm.datasets.macrodata.load().data 9 | mdata = mdatagen[['realgdp','realcons','realinv']] 10 | names = mdata.dtype.names 11 | start = px.datetime(1959, 3, 31) 12 | end = px.datetime(2009, 9, 30) 13 | qtr = px.DateRange(start, end, offset=px.datetools.BQuarterEnd()) 14 | data = px.DataFrame(mdata, index=qtr) 15 | data = (np.log(data)).diff().dropna() 16 | 17 | #define structural inputs 18 | A = np.asarray([[1, 0, 0],['E', 1, 0],['E', 'E', 1]]) 19 | B = np.asarray([['E', 0, 0], [0, 'E', 0], [0, 0, 'E']]) 20 | A_guess = np.asarray([0.5, 0.25, -0.38]) 21 | B_guess = np.asarray([0.5, 0.1, 0.05]) 22 | mymodel = SVAR(data, svar_type='AB', A=A, B=B, freq='Q') 23 | res = mymodel.fit(maxlags=3, maxiter=10000, maxfun=10000, solver='bfgs') 24 | res.irf(periods=30).plot(impulse='realgdp', plot_stderr=True, stderr_type='mc', repl=100) 25 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/vector_ar/tests/results/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/vector_ar/tests/results/results_svar.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test Results for the SVAR model. Obtained from R using svartest.R 3 | """ 4 | 5 | import numpy as np 6 | 7 | class SVARdataResults(object): 8 | def __init__(self): 9 | self.A = ([[1.0, 0.0, 0], 10 | [-0.506802245, 1.0, 0], 11 | [-5.536056520, 3.04117686, 1.0]]) 12 | self.B = ([[0.0075756676, 0.0, 0.0], 13 | [0.0, 0.00512051886, 0.0], 14 | [0.0, 0.0, 0.020708948]]) 15 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/vector_ar/tests/results/vars_results.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkordi/pygwr/b3440687b8f44b23f6a813ef0eefa0664dfb9e75/pygwr/gwstatsmodels/tsa/vector_ar/tests/results/vars_results.npz -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/tsa/vector_ar/tests/test_svar.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test SVAR estimation 3 | """ 4 | 5 | import gwstatsmodels.api as sm 6 | from gwstatsmodels.tsa.vector_ar.svar_model import SVAR 7 | from numpy.testing import assert_almost_equal, assert_equal 8 | from results import results_svar 9 | import numpy as np 10 | import numpy.testing as npt 11 | 12 | DECIMAL_6 = 6 13 | DECIMAL_5 = 5 14 | DECIMAL_4 = 4 15 | 16 | class TestSVAR(object): 17 | @classmethod 18 | def setupClass(cls): 19 | mdata = sm.datasets.macrodata.load().data 20 | mdata = mdata[['realgdp','realcons','realinv']] 21 | names = mdata.dtype.names 22 | data = mdata.view((float,3)) 23 | data = np.diff(np.log(data), axis=0) 24 | A = np.asarray([[1, 0, 0],['E', 1, 0],['E', 'E', 1]]) 25 | B = np.asarray([['E', 0, 0], [0, 'E', 0], [0, 0, 'E']]) 26 | results = SVAR(data, svar_type='AB', A=A, B=B).fit(maxlags=3) 27 | cls.res1 = results 28 | cls.res2 = results_svar.SVARdataResults() 29 | def test_A(self): 30 | assert_almost_equal(self.res1.A, self.res2.A, DECIMAL_4) 31 | def test_B(self): 32 | assert_almost_equal(self.res1.B, self.res2.B, DECIMAL_4) 33 | -------------------------------------------------------------------------------- /pygwr/gwstatsmodels/version.py: -------------------------------------------------------------------------------- 1 | 2 | # THIS FILE IS GENERATED FROM SETUP.PY 3 | short_version = '0.4.0' 4 | version = '0.4.0' 5 | full_version = '0.4.0' 6 | git_revision = '52f270fab981a63c3361a3954a774251405e1904' 7 | release = True 8 | 9 | if not release: 10 | version = full_version 11 | --------------------------------------------------------------------------------