├── .appveyor.yml ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── linux.yml │ └── osx.yml ├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── build.py ├── build ├── _gtools_internal.ado ├── _gtools_internal.mata ├── changelog.md ├── fasterxtile.ado ├── fasterxtile.sthlp ├── gcollapse.ado ├── gcollapse.sthlp ├── gcontract.ado ├── gcontract.sthlp ├── gdistinct.ado ├── gdistinct.sthlp ├── gduplicates.ado ├── gduplicates.sthlp ├── gegen.ado ├── gegen.sthlp ├── gglm.ado ├── gglm.sthlp ├── gisid.ado ├── gisid.sthlp ├── givregress.ado ├── givregress.sthlp ├── glevelsof.ado ├── glevelsof.sthlp ├── gpoisson.ado ├── gpoisson.sthlp ├── gquantiles.ado ├── gquantiles.sthlp ├── greg.ado ├── greg.sthlp ├── gregress.ado ├── gregress.sthlp ├── greshape.ado ├── greshape.sthlp ├── gstats.ado ├── gstats.sthlp ├── gstats_hdfe.sthlp ├── gstats_moving.sthlp ├── gstats_range.sthlp ├── gstats_residualize.sthlp ├── gstats_sum.sthlp ├── gstats_summarize.sthlp ├── gstats_tab.sthlp ├── gstats_transform.sthlp ├── gstats_winsor.sthlp ├── gtools.ado ├── gtools.pkg ├── gtools.sthlp ├── gtools_macosx_v2.plugin ├── gtools_macosx_v3.plugin ├── gtools_tests.do ├── gtools_tests_macosx.log ├── gtools_tests_unix.log ├── gtools_tests_windows.log ├── gtools_unix_v2.plugin ├── gtools_unix_v3.plugin ├── gtools_windows_v2.plugin ├── gtools_windows_v3.plugin ├── gtop.ado ├── gtop.sthlp ├── gtoplevelsof.ado ├── gtoplevelsof.sthlp ├── gunique.ado ├── gunique.sthlp ├── hashsort.ado ├── hashsort.sthlp ├── lgtools.mlib └── stata.toc ├── changelog.md ├── docs ├── benchmarks.md ├── benchmarks │ ├── material.json │ ├── quick.do │ ├── quick.png │ ├── quick.py │ └── quickdark.png ├── compiling.md ├── css │ ├── extra-material-dark.css │ ├── extra-material-dark.scss │ ├── extra-material.css │ └── extra-rtd.css ├── examples │ ├── gcollapse.do │ ├── gcontract.do │ ├── gdistinct.do │ ├── gduplicates.do │ ├── gegen.do │ ├── gglm.do │ ├── gisid.do │ ├── givregress.do │ ├── glevelsof.do │ ├── gquantiles.do │ ├── gregress.do │ ├── greshape.do │ ├── gstats_hdfe.do │ ├── gstats_summarize.do │ ├── gstats_transform.do │ ├── gstats_winsor.do │ ├── gtoplevelsof.do │ ├── gunique.do │ └── hashsort.do ├── faqs.md ├── helpers.js ├── index.md ├── stata │ ├── fasterxtile.sthlp │ ├── gcollapse.sthlp │ ├── gcontract.sthlp │ ├── gdistinct.sthlp │ ├── gduplicates.sthlp │ ├── gegen.sthlp │ ├── generic.sthlp │ ├── gglm.sthlp │ ├── gisid.sthlp │ ├── givregress.sthlp │ ├── glevelsof.sthlp │ ├── gquantiles.sthlp │ ├── greg.sthlp │ ├── gregress.sthlp │ ├── greshape.sthlp │ ├── gstats.sthlp │ ├── gstats_hdfe.sthlp │ ├── gstats_moving.sthlp │ ├── gstats_range.sthlp │ ├── gstats_residualize.sthlp │ ├── gstats_sum.sthlp │ ├── gstats_summarize.sthlp │ ├── gstats_tab.sthlp │ ├── gstats_transform.sthlp │ ├── gstats_winsor.sthlp │ ├── gtools.sthlp │ ├── gtop.sthlp │ ├── gtoplevelsof.sthlp │ ├── gunique.sthlp │ └── hashsort.sthlp └── usage │ ├── gcollapse.md │ ├── gcontract.md │ ├── gdistinct.md │ ├── gduplicates.md │ ├── gegen.md │ ├── gglm.md │ ├── gisid.md │ ├── givregress.md │ ├── glevelsof.md │ ├── gquantiles.md │ ├── gregress.md │ ├── greshape.md │ ├── gstats_hdfe.md │ ├── gstats_summarize.md │ ├── gstats_transform.md │ ├── gstats_winsor.md │ ├── gtools.md │ ├── gtoplevelsof.md │ ├── gunique.md │ └── hashsort.md ├── lib ├── bumpver.py ├── id_rsa_travis.enc ├── material.json ├── plugin │ ├── gtools_macosx_v2.plugin │ ├── gtools_macosx_v3.plugin │ ├── gtools_unix_v2.plugin │ ├── gtools_unix_v3.plugin │ ├── gtools_windows_v2.plugin │ ├── gtools_windows_v3.plugin │ └── lgtools.mlib ├── spi-2.0 │ ├── stplugin.c │ └── stplugin.h ├── spi-3.0 │ ├── stplugin.c │ └── stplugin.h └── update_version.py ├── mkdocs.yml └── src ├── ado ├── _gtools_internal.ado ├── _gtools_internal.mata ├── fasterxtile.ado ├── gcollapse.ado ├── gcontract.ado ├── gdistinct.ado ├── gduplicates.ado ├── gegen.ado ├── gglm.ado ├── gisid.ado ├── givregress.ado ├── glevelsof.ado ├── gpoisson.ado ├── gquantiles.ado ├── greg.ado ├── gregress.ado ├── greshape.ado ├── gstats.ado ├── gtools.ado ├── gtop.ado ├── gtoplevelsof.ado ├── gunique.ado └── hashsort.ado ├── github-issues ├── 29 │ └── reproduce.ado ├── 35 │ ├── Makefile │ ├── spookyhash.dll │ ├── spookyhash_api.h │ ├── stplugin.c │ ├── stplugin.h │ ├── test.do │ ├── test1.c │ ├── test1.plugin │ ├── test2.c │ └── test2.plugin ├── 40 │ ├── gtools-1.3.3.zip │ ├── plugin.zip │ ├── plugin │ │ ├── Makefile │ │ ├── libspookyhash.a │ │ ├── spookyhash_api.h │ │ ├── stplugin.c │ │ ├── stplugin.h │ │ ├── test.do │ │ ├── test1.c │ │ ├── test1.plugin │ │ ├── test2.c │ │ └── test2.plugin │ └── test.do ├── 45 │ └── test.do ├── 48 │ └── test.do ├── 60 │ ├── plugin.zip │ └── plugin │ │ ├── Makefile │ │ ├── lib │ │ └── spookyhash │ │ │ └── src │ │ │ ├── context.c │ │ │ ├── context.h │ │ │ ├── globals.c │ │ │ ├── globals.h │ │ │ ├── spookyhash.c │ │ │ ├── spookyhash.h │ │ │ └── spookyhash_api.h │ │ ├── stplugin.c │ │ ├── stplugin.h │ │ ├── test.do │ │ ├── test1.c │ │ ├── test1.plugin │ │ ├── test2.c │ │ └── test2.plugin ├── 65 │ ├── estpost.ado │ └── test.do ├── 67 │ ├── test-transform.do │ ├── test.do │ ├── test.orig.do │ └── test.short.do ├── 71 │ └── test.do ├── 72 │ └── issue.do ├── 78 │ ├── ifin.do │ └── test.do ├── 88 │ └── test.do ├── 30a │ └── replicate.do ├── 30b │ ├── replicate.do │ └── reply01.do ├── debug-1 │ └── savehdfe.do ├── debug-2 │ ├── test-median.do │ └── test-noobs.do ├── debug-3 │ └── test-excludeself.do ├── debug-4 │ ├── test_excludeself.do │ ├── test_gcollapse.do │ ├── test_gquantiles.do │ ├── test_greshape.do │ ├── test_gtop.do │ ├── test_gunique.do │ ├── test_hash.do │ └── test_replace.do ├── debug-5 │ └── test.do ├── debug-6 │ └── greg-coredump.do ├── debug-7 │ └── quantiles.do ├── debug-8 │ └── test.do ├── debug-9 │ └── test.do ├── email-10 │ ├── bug.do │ ├── fix.do │ ├── gtools_test.do │ └── mwe.do ├── email-11 │ └── test.do ├── email-12 │ ├── test.do │ └── test2.do ├── email-13 │ ├── dec_gtools.dta │ ├── dec_manual.dta │ ├── dec_stata.dta │ └── example.do └── email-14 │ ├── gtools_bug.dta │ └── test.do ├── gtools.pkg ├── plugin ├── api │ ├── groupby.c │ ├── groupby.h │ ├── groupby │ │ ├── accelerators.c │ │ ├── alphas.c │ │ ├── base.c │ │ ├── berge.c │ │ ├── stats.c │ │ ├── stats_unweighted.c │ │ ├── stats_weights.c │ │ ├── transforms.c │ │ ├── transforms_unweighted.c │ │ └── transforms_weights.c │ ├── hashing.c │ ├── hashing.h │ └── hashing │ │ ├── bijection.c │ │ ├── panelsetup.c │ │ ├── radix.c │ │ └── utils.c ├── collapse │ ├── gegen.c │ ├── gegen_w.c │ ├── gtools_math.c │ ├── gtools_math.h │ ├── gtools_math_unw.c │ ├── gtools_math_unw.h │ ├── gtools_math_w.c │ ├── gtools_math_w.h │ ├── gtools_nunique.c │ ├── gtools_utils.c │ ├── gtools_utils.h │ └── qselect.c ├── common │ ├── RadixSort.c │ ├── RadixSortGeneric.c │ ├── RadixSortTesting │ ├── RadixSortTesting.c │ ├── RadixSortTyped.c │ ├── RadixSortTypedIndex.c │ ├── encode.c │ ├── fixes.c │ ├── gttypes.h │ ├── quicksort.c │ ├── quicksortComparators.c │ ├── quicksortMultiLevel.c │ ├── quicksortMultiLevelMlast.c │ ├── readWrite.c │ ├── sf_printf.c │ ├── sf_printf.h │ ├── sf_wrappers.c │ └── sf_wrappers.h ├── extra │ ├── gcontract.c │ ├── gisid.c │ ├── glevelsof.c │ ├── greshape.c │ ├── greshape_fast.c │ ├── gtop.c │ └── hashsort.c ├── gtools.c ├── gtools.h ├── hash │ ├── gtools_hash.c │ ├── gtools_hash.h │ ├── gtools_hash_fast.c │ ├── gtools_sort.c │ └── gtools_sort.h ├── lib ├── quantiles │ ├── gquantiles.c │ ├── gquantiles_by.c │ ├── gquantiles_math.c │ ├── gquantiles_math.h │ ├── gquantiles_math_w.c │ ├── gquantiles_math_w.h │ ├── gquantiles_utils.c │ └── gquantiles_utils.h ├── regress │ ├── gregress.c │ ├── gregress.h │ ├── linalg │ │ ├── colmajor.c │ │ ├── colmajor_ix.c │ │ ├── colmajor_w.c │ │ ├── common.c │ │ ├── decompositions.c │ │ ├── inverses.c │ │ ├── linalg.h │ │ └── rowmajor.c │ ├── models │ │ ├── glm.c │ │ ├── glm.h │ │ ├── ivregress.c │ │ ├── logit.c │ │ ├── models.h │ │ ├── ols.c │ │ └── poisson.c │ ├── utils │ │ └── read.c │ └── vce │ │ ├── cluster.c │ │ ├── heteroskedastic.c │ │ ├── homoskedastic.c │ │ ├── vce.h │ │ └── vceadj.c ├── spi └── stats │ ├── gstats.c │ ├── gstats.h │ ├── hdfe.c │ ├── summarize.c │ ├── transform.c │ └── winsor.c ├── stata.toc └── test ├── bench_v2 ├── gcollapse_complex ├── gcollapse_simple ├── gcontract ├── gduplicates_drop ├── gegen ├── gisid ├── gisid_ix ├── glevelsof ├── gquantiles_by ├── gquantiles_pctile ├── gquantiles_xtile ├── gstats_sum ├── gstats_tab ├── gstats_winsor ├── gstats_winsor_by ├── gunique ├── hashsort_gsort ├── hashsort_sort └── material.json ├── gtools_tests.do ├── test_benchmarks.do ├── test_benchmarks.py ├── test_gcollapse.do ├── test_gcontract.do ├── test_gduplicates.do ├── test_gegen.do ├── test_gisid.do ├── test_glevelsof.do ├── test_gquantiles.do ├── test_gquantiles_by.do ├── test_gregress.do ├── test_greshape.do ├── test_gstats.do ├── test_gtoplevelsof.do ├── test_gunique.do ├── test_hashsort.do └── test_pthreads.do /.appveyor.yml: -------------------------------------------------------------------------------- 1 | version: "generic-1.11.8-{build}" 2 | 3 | environment: 4 | matrix: 5 | - ARCH: x86_64 6 | CYGWIN: C:\Cygwin64 7 | CHOST: x86_64-w64-mingw32 8 | CC: x86_64-w64-mingw32-gcc 9 | 10 | build_script: 11 | - git submodule update --init --recursive 12 | - "%CYGWIN%\\bin\\bash -lc 'set -eux; cd /cygdrive/c/projects/%APPVEYOR_PROJECT_NAME%; cd lib/spookyhash; git checkout spookyhash-1.0.6; cd -'" 13 | - "%CYGWIN%\\bin\\bash -lc 'set -eux; cd /cygdrive/c/projects/%APPVEYOR_PROJECT_NAME%; make clean SPI=2.0 SPIVER=v2; make all SPI=2.0 SPIVER=v2'" 14 | - "%CYGWIN%\\bin\\bash -lc 'set -eux; cd /cygdrive/c/projects/%APPVEYOR_PROJECT_NAME%; make clean SPI=3.0 SPIVER=v3; make all SPI=3.0 SPIVER=v3'" 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | - A clear and concise description of what the bug is. 9 | - If possible, try `gtools, upgrade` before submitting the bug; your issue might have already been solved. 10 | 11 | **Code Sample** 12 | Include a code snippet to reproduce the problem; be sure to run the relevant `gtools` program(s) with options `verbose bench(3)`. 13 | 14 | ```stata 15 | // code snippet 16 | ``` 17 | 18 | ```stata 19 | // output (or attach a .log file) 20 | ``` 21 | 22 | **Version info** 23 | - OS: [e.g. Windows 10] 24 | - Version: [i.e. output of `gtools`] 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **What would you like gtools to add or change (and why)?** 8 | If this relates to a new feature, specify what behavior you would like to see. If this relates to an existing part of Gtools, explain how you would like that to change. 9 | 10 | **Please include a specific suggestion** 11 | Ex. Add option `foo` to function `gcollapse` to do "bar". 12 | Ex. Function `gquantiles` should do A by default instead of B. 13 | 14 | **Additional context** 15 | Add any other context related to your feature request. 16 | -------------------------------------------------------------------------------- /.github/workflows/linux.yml: -------------------------------------------------------------------------------- 1 | # Test compile plugin 2 | 3 | name: linux 4 | on: 5 | push: 6 | branches: 7 | - master 8 | - develop 9 | pull_request: 10 | branches: [ master ] 11 | jobs: 12 | build: 13 | name: Compile Plugin 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Build Plugin 18 | run: | 19 | # brew install xx 20 | git submodule update --init 21 | cd lib/spookyhash && git checkout spookyhash-1.0.6 && cd - 22 | make clean SPI=2.0 SPIVER=v2 && make all SPI=2.0 SPIVER=v2 GCC=gcc 23 | make clean SPI=3.0 SPIVER=v3 && make all SPI=3.0 SPIVER=v3 GCC=gcc 24 | -------------------------------------------------------------------------------- /.github/workflows/osx.yml: -------------------------------------------------------------------------------- 1 | # Compile and pull osx plugin 2 | 3 | name: osx 4 | on: 5 | push: 6 | branches: 7 | - master 8 | - develop 9 | pull_request: 10 | branches: [ master ] 11 | jobs: 12 | build: 13 | name: Compile and Push Plugin 14 | runs-on: macos-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Build Plugin 18 | run: | 19 | # brew install xx 20 | git submodule update --init 21 | cd lib/spookyhash && git checkout spookyhash-1.0.6 && cd - 22 | make clean SPI=2.0 SPIVER=v2 && make osx SPI=2.0 SPIVER=v2 GCC=gcc 23 | make clean SPI=3.0 SPIVER=v3 && make osx SPI=3.0 SPIVER=v3 GCC=gcc 24 | - name: Commit OSX Plugin 25 | run: | 26 | shasum build/gtools_macosx_v3.plugin 27 | shasum build/gtools_macosx_v2.plugin 28 | # 29 | otool -L build/gtools_macosx_v3.plugin 30 | otool -L build/gtools_macosx_v2.plugin 31 | # 32 | cp -f build/gtools_macosx_v3.plugin lib/plugin/gtools_macosx_v3.plugin 33 | cp -f build/gtools_macosx_v2.plugin lib/plugin/gtools_macosx_v2.plugin 34 | # 35 | git config --global user.name 'Mauricio Caceres' 36 | git config --global user.email 'mauricio.caceres.bravo@gmail.com' 37 | git remote set-url origin https://x-access-token:${{ secrets.COMPILE_TOKEN }}@github.com/${{ github.repository }} 38 | # 39 | git add build/gtools_macosx_v3.plugin 40 | git add build/gtools_macosx_v2.plugin 41 | git add lib/plugin/gtools_macosx_v3.plugin 42 | git add lib/plugin/gtools_macosx_v2.plugin 43 | # 44 | echo ${GITHUB_REF##*/} 45 | [ -n "$(git status --porcelain)" ] && git commit -m "[Automated Commit] OSX plugin" 46 | git fetch 47 | git push -f origin HEAD:${GITHUB_REF##*/} 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | releases 2 | testing 3 | site/ 4 | docs/site/ 5 | build/gtools_macosx_v2.plugin.arm64 6 | build/gtools_macosx_v2.plugin.x86_64 7 | build/gtools_macosx_v3.plugin.arm64 8 | build/gtools_macosx_v3.plugin.x86_64 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/spookyhash"] 2 | path = lib/spookyhash 3 | url = https://github.com/centaurean/spookyhash 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017, 2018 Mauricio Caceres Bravo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to 7 | deal in the Software without restriction, including without limitation the 8 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 9 | sell copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /build/gglm.ado: -------------------------------------------------------------------------------- 1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Estimate GLM via IRLS by group and with HDFE 3 | 4 | cap program drop gglm 5 | program gglm, rclass 6 | version 13.1 7 | 8 | local 00: copy local 0 9 | if ( strpos(`"`0'"', ",") > 0 ) { 10 | local comma 11 | } 12 | else { 13 | local comma , 14 | } 15 | gregress `0' `comma' glm 16 | if ( ${GREG_RC} ) { 17 | global GREG_RC 18 | exit 0 19 | } 20 | local 0: copy local 00 21 | 22 | return local levels `"`r(levels)'"' 23 | return scalar N = r(N) 24 | return scalar J = r(J) 25 | return scalar minJ = r(minJ) 26 | return scalar maxJ = r(maxJ) 27 | end 28 | 29 | -------------------------------------------------------------------------------- /build/gisid.ado: -------------------------------------------------------------------------------- 1 | *! version 1.1.1 23Jan2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! -isid- implementation using C for faster processing 3 | 4 | capture program drop gisid 5 | program gisid 6 | version 13.1 7 | 8 | global GTOOLS_CALLER gisid 9 | syntax varlist /// Variables to check 10 | [if] [in] , /// [if condition] [in start / end] 11 | [ /// 12 | Missok /// Missing values in varlist are OK 13 | compress /// Try to compress strL variables 14 | forcestrl /// Force reading strL variables (stata 14 and above only) 15 | Verbose /// Print info during function execution 16 | _keepgreshape /// (Undocumented) Keep greshape scalars 17 | _CTOLerance(passthru) /// (Undocumented) Counting sort tolerance; default is radix 18 | BENCHmark /// Benchmark function 19 | BENCHmarklevel(int 0) /// Benchmark various steps of the plugin 20 | HASHmethod(passthru) /// Hashing method: 0 (default), 1 (biject), 2 (spooky) 21 | oncollision(passthru) /// error|fallback: On collision, use native command or throw error 22 | debug(passthru) /// Print debugging info to console 23 | /// 24 | /// Unsupported isid options 25 | /// ------------------------ 26 | Sort /// 27 | ] 28 | 29 | if ( `benchmarklevel' > 0 ) local benchmark benchmark 30 | local benchmarklevel benchmarklevel(`benchmarklevel') 31 | 32 | if ( "`sort'" != "" ) { 33 | di as err "Option -sort- is not implemented" 34 | exit 198 35 | } 36 | 37 | if ( "`missok'" == "" ) { 38 | local miss exitmissing 39 | } 40 | else { 41 | local miss missing 42 | } 43 | 44 | local opts `miss' `compress' `forcestrl' `_ctolerance' `_keepgreshape' 45 | local opts `opts' `verbose' `benchmark' `benchmarklevel' 46 | local opts `opts' `oncollision' `hashmethod' `debug' 47 | cap noi _gtools_internal `varlist' `if' `in', unsorted `opts' gfunction(isid) 48 | local rc = _rc 49 | global GTOOLS_CALLER "" 50 | 51 | if ( `rc' == 17999 ) { 52 | isid `varlist' `if' `in', `missok' 53 | exit 0 54 | } 55 | else if ( `rc' == 17001 ) { 56 | di as txt "(no observations)" 57 | exit 0 58 | } 59 | else if ( `rc' ) exit `rc' 60 | end 61 | -------------------------------------------------------------------------------- /build/givregress.ado: -------------------------------------------------------------------------------- 1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Estimate IV regression via 2SLS by group and with HDFE 3 | 4 | cap program drop givregress 5 | program givregress, rclass 6 | version 13.1 7 | 8 | local 00: copy local 0 9 | if ( strpos(`"`0'"', ",") > 0 ) { 10 | local comma 11 | } 12 | else { 13 | local comma , 14 | } 15 | gregress `0' `comma' ivregress 16 | if ( ${GREG_RC} ) { 17 | global GREG_RC 18 | exit 0 19 | } 20 | local 0: copy local 00 21 | 22 | return local levels `"`r(levels)'"' 23 | return scalar N = r(N) 24 | return scalar J = r(J) 25 | return scalar minJ = r(minJ) 26 | return scalar maxJ = r(maxJ) 27 | end 28 | -------------------------------------------------------------------------------- /build/gpoisson.ado: -------------------------------------------------------------------------------- 1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Estimate poisson regression via IRLS by group and with HDFE 3 | 4 | cap program drop gpoisson 5 | program gpoisson, rclass 6 | version 13.1 7 | 8 | local 00: copy local 0 9 | if ( strpos(`"`0'"', ",") > 0 ) { 10 | local comma 11 | } 12 | else { 13 | local comma , 14 | } 15 | gregress `0' `comma' glm family(poisson) 16 | if ( ${GREG_RC} ) { 17 | global GREG_RC 18 | exit 0 19 | } 20 | local 0: copy local 00 21 | 22 | return local levels `"`r(levels)'"' 23 | return scalar N = r(N) 24 | return scalar J = r(J) 25 | return scalar minJ = r(minJ) 26 | return scalar maxJ = r(maxJ) 27 | end 28 | -------------------------------------------------------------------------------- /build/greg.ado: -------------------------------------------------------------------------------- 1 | *! version 1.11.8 28Jun2024 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Estimate linear regression via OLS by group and with HDFE 3 | 4 | cap program drop greg 5 | program greg, rclass 6 | version 13.1 7 | 8 | local 00: copy local 0 9 | gregress `0' 10 | if ( ${GREG_RC} ) { 11 | global GREG_RC 12 | exit 0 13 | } 14 | local 0: copy local 00 15 | 16 | return local cmd `"`r(cmd)'"' 17 | return local mata `"`r(mata)'"' 18 | return scalar N = r(N) 19 | return scalar J = r(J) 20 | return scalar minJ = r(minJ) 21 | return scalar maxJ = r(maxJ) 22 | end 23 | -------------------------------------------------------------------------------- /build/gstats.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* *! version 0.4.0 09Jun2019}{...} 3 | {viewerdialog gstats "dialog gstats"}{...} 4 | {vieweralsosee "[R] gstats" "mansection R gstats"}{...} 5 | {viewerjumpto "Syntax" "gstats##syntax"}{...} 6 | {viewerjumpto "Description" "gstats##description"}{...} 7 | {title:Title} 8 | 9 | {p2colset 5 15 23 2}{...} 10 | {p2col :{cmd:gstats} {hline 2}} Various statistical fucntions and transformations. {p_end} 11 | {p2colreset}{...} 12 | 13 | {pstd} 14 | {it:Important}: Please run {stata gtools, upgrade} to update {cmd:gtools} to 15 | the latest stable version. 16 | 17 | {marker syntax}{...} 18 | {title:Syntax} 19 | 20 | {p 8 17 2} 21 | {cmd:gstats} 22 | {it:subcommand} 23 | {varlist} 24 | {ifin} 25 | [{it:{help gstats##weight:weight}}] 26 | [{cmd:,} {opth by(varlist)} {it:{help gstats##table_options:subcommand_options}}] 27 | 28 | {phang} 29 | {opt gstats} is a wrapper for various statistical functions and 30 | transformations, including: 31 | 32 | {p 8 17 2} 33 | {help gstats hdfee:{bf:hdfe}} 34 | (alias {help gstats hdfe:{bf:residualize}}) is a fast utility for residualizing variables (i.e. HDFE transform; accepts weights). {p_end} 35 | 36 | {p 8 17 2} 37 | {help gstats winsor:{bf:winsor}} 38 | as a fast {opt winsor2} alternative (accepts weights). {p_end} 39 | 40 | {p 8 17 2} 41 | {help gstats summarize:{bf:{ul:sum}marize}} and 42 | {help gstats summarize:{bf:{ul:tab}stat}} are fast, 43 | by-able alternatives to {opt summarize, detail} and {opt tabtsat} (accept weights). {p_end} 44 | 45 | {p 8 17 2} 46 | {help gstats transform:{bf:transform}} 47 | to apply various statistical transformations (accepts weights). {p_end} 48 | 49 | {marker description}{...} 50 | {title:Description} 51 | 52 | {pstd} 53 | {opt gstats} is a wrapper to several statistical fucntions and 54 | transformations. In theory {opt gegen} would be the place to expand 55 | {opt gtools}; however, {opt gegen}'s internally implemented functions 56 | were written with two assumptions: first, the output is unique at the 57 | group level; second, there is always a target variable. {opt gstats} 58 | is written to be more flexible and allow arbitrary functions and 59 | transformations. 60 | 61 | {pstd} 62 | Weights are supported for the following subcommands: {it:winsor}, {it:summarize}, {it:tabstat}, {it:residualize}. 63 | 64 | {marker author}{...} 65 | {title:Author} 66 | 67 | {pstd}Mauricio Caceres{p_end} 68 | {pstd}{browse "mailto:mauricio.caceres.bravo@gmail.com":mauricio.caceres.bravo@gmail.com }{p_end} 69 | {pstd}{browse "https://mcaceresb.github.io":mcaceresb.github.io}{p_end} 70 | 71 | {title:Website} 72 | 73 | {pstd}{cmd:gstats} is maintained as part of the {manhelp gtools R:gtools} project at {browse "https://github.com/mcaceresb/stata-gtools":github.com/mcaceresb/stata-gtools}{p_end} 74 | 75 | {marker acknowledgment}{...} 76 | {title:Acknowledgment} 77 | 78 | {pstd} 79 | {opt gtools} was largely inspired by Sergio Correia's {it:ftools}: 80 | {browse "https://github.com/sergiocorreia/ftools"}. 81 | {p_end} 82 | 83 | {pstd} 84 | The OSX version of gtools was implemented with invaluable help from @fbelotti; 85 | see {browse "https://github.com/mcaceresb/stata-gtools/issues/11"}. 86 | {p_end} 87 | 88 | {title:Also see} 89 | 90 | {p 4 13 2} 91 | help for 92 | {help gtools} 93 | -------------------------------------------------------------------------------- /build/gtools.pkg: -------------------------------------------------------------------------------- 1 | v 1.11.8 2 | d 3 | d 'GTOOLS': Faster implementation of common Stata commands optimized for large datasets 4 | d 5 | d Faster Stata for big data. Gtools provides a hash-based implementation 6 | d of common Stata commands using C plugins for a massive speed 7 | d improvement. Gtools implements gcollapse, greshape, gquantiles 8 | d (pctile, xtile, and _pctile), gcontract, gegen, gisid, glevelsof, 9 | d gunique, gdistinct, gduplicates, gtop, and gstats (winsor, summarize, 10 | d tabstat). Syntax is largely analogous to their native counterparts 11 | d (see help gtools for details). This package was inspired by Sergio 12 | d Correia's ftools. 13 | d 14 | d KW: plugin 15 | d KW: gtools 16 | d KW: collapse 17 | d KW: reshape 18 | d KW: regress 19 | d KW: ivregress 20 | d KW: glm 21 | d KW: logit 22 | d KW: poisson 23 | d KW: residualize 24 | d KW: hdfe 25 | d KW: contract 26 | d KW: egen 27 | d KW: xtile 28 | d KW: fastxtile 29 | d KW: pctile 30 | d KW: _pctile 31 | d KW: levelsof 32 | d KW: toplevelsof 33 | d KW: isid 34 | d KW: duplicates 35 | d KW: unique 36 | d KW: distinct 37 | d KW: sort 38 | d KW: gsort 39 | d KW: gegen 40 | d KW: fasterxtile 41 | d KW: gquantiles 42 | d KW: gcollapse 43 | d KW: greshape 44 | d KW: gregress 45 | d KW: givregress 46 | d KW: gglm 47 | d KW: gpoisson 48 | d KW: glogit 49 | d KW: gcontract 50 | d KW: gisid 51 | d KW: gduplicates 52 | d KW: glevelsof 53 | d KW: gtoplevelsof 54 | d KW: gunique 55 | d KW: gdistinct 56 | d KW: hashsort 57 | d KW: winsor 58 | d KW: summarize 59 | d KW: tabstat 60 | d 61 | d Requires: Stata version 13.1 62 | d 63 | d Author: Mauricio Caceres Bravo 64 | d Support: email mauricio.caceres.bravo@gmail.com 65 | d 66 | d Distribution-Date: 20240628 67 | d 68 | f _gtools_internal.ado 69 | f _gtools_internal.mata 70 | f gcollapse.ado 71 | f gcontract.ado 72 | f gegen.ado 73 | f gquantiles.ado 74 | f fasterxtile.ado 75 | f gunique.ado 76 | f gdistinct.ado 77 | f glevelsof.ado 78 | f gtop.ado 79 | f gtoplevelsof.ado 80 | f gisid.ado 81 | f gduplicates.ado 82 | f hashsort.ado 83 | f greshape.ado 84 | f greg.ado 85 | f gregress.ado 86 | f givregress.ado 87 | f gglm.ado 88 | f gpoisson.ado 89 | f gstats.ado 90 | f gtools.ado 91 | f gcollapse.sthlp 92 | f gcontract.sthlp 93 | f gegen.sthlp 94 | f gquantiles.sthlp 95 | f fasterxtile.sthlp 96 | f gunique.sthlp 97 | f gdistinct.sthlp 98 | f glevelsof.sthlp 99 | f gtop.sthlp 100 | f gtoplevelsof.sthlp 101 | f gisid.sthlp 102 | f gduplicates.sthlp 103 | f hashsort.sthlp 104 | f greshape.sthlp 105 | f greg.sthlp 106 | f gregress.sthlp 107 | f givregress.sthlp 108 | f gglm.sthlp 109 | f gstats.sthlp 110 | f gstats_residualize.sthlp 111 | f gstats_hdfe.sthlp 112 | f gstats_winsor.sthlp 113 | f gstats_tab.sthlp 114 | f gstats_sum.sthlp 115 | f gstats_summarize.sthlp 116 | f gstats_transform.sthlp 117 | f gstats_range.sthlp 118 | f gstats_moving.sthlp 119 | f gtools.sthlp 120 | f lgtools.mlib 121 | f gtools_windows_v2.plugin 122 | f gtools_unix_v2.plugin 123 | f gtools_macosx_v2.plugin 124 | f gtools_windows_v3.plugin 125 | f gtools_unix_v3.plugin 126 | f gtools_macosx_v3.plugin 127 | -------------------------------------------------------------------------------- /build/gtools_macosx_v2.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_macosx_v2.plugin -------------------------------------------------------------------------------- /build/gtools_macosx_v3.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_macosx_v3.plugin -------------------------------------------------------------------------------- /build/gtools_unix_v2.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_unix_v2.plugin -------------------------------------------------------------------------------- /build/gtools_unix_v3.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_unix_v3.plugin -------------------------------------------------------------------------------- /build/gtools_windows_v2.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_windows_v2.plugin -------------------------------------------------------------------------------- /build/gtools_windows_v3.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_windows_v3.plugin -------------------------------------------------------------------------------- /build/gtop.ado: -------------------------------------------------------------------------------- 1 | *! version 1.2.0 23Mar2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Calculate the top groups by count of a varlist (jointly). 3 | 4 | cap program drop gtop 5 | program gtop, rclass 6 | version 13.1 7 | 8 | local 00 `0' 9 | gtoplevelsof `0' 10 | if ( ${GTOP_RC} ) { 11 | global GTOP_RC 12 | exit 0 13 | } 14 | local 0 `00' 15 | 16 | qui syntax [anything] [if] [in] [aw fw pw], [LOCal(str) MATrix(str) *] 17 | if ( "`local'" != "" ) c_local `local' `"`r(levels)'"' 18 | if ( "`matrix'" != "" ) matrix `matrix' = r(toplevels) 19 | return local levels `"`r(levels)'"' 20 | return scalar N = r(N) 21 | return scalar J = r(J) 22 | return scalar minJ = r(minJ) 23 | return scalar maxJ = r(maxJ) 24 | return scalar alpha = r(alpha) 25 | return scalar ntop = r(ntop) 26 | return scalar nrows = r(nrows) 27 | 28 | if ( `"`r(matalevels)'"' == "" ) { 29 | tempname gmat 30 | matrix `gmat' = r(toplevels) 31 | return matrix toplevels = `gmat' 32 | } 33 | else { 34 | return local matalevels = `"`r(matalevels)'"' 35 | } 36 | end 37 | -------------------------------------------------------------------------------- /build/hashsort.ado: -------------------------------------------------------------------------------- 1 | *! version 1.0.1 23Jan2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Hash-based implementation of -sort- and -gsort- using C-plugins 3 | 4 | capture program drop hashsort 5 | program define hashsort 6 | version 13.1 7 | 8 | global GTOOLS_CALLER hashsort 9 | syntax anything, /// Variables to sort by: [+|-]varname [[+|-]varname ...] 10 | [ /// 11 | GENerate(passthru) /// Generate variable with sort order 12 | replace /// Replace generated variable, if it exists 13 | sortgen /// Sort by generated variable, if applicable 14 | skipcheck /// Turn off internal is sorted check 15 | /// 16 | compress /// Try to compress strL variables 17 | forcestrl /// Force reading strL variables (stata 14 and above only) 18 | Verbose /// Print info during function execution 19 | _CTOLerance(passthru) /// (Undocumented) Counting sort tolerance; default is radix 20 | BENCHmark /// Benchmark function 21 | BENCHmarklevel(int 0) /// Benchmark various steps of the plugin 22 | HASHmethod(passthru) /// Hashing method: 0 (default), 1 (biject), 2 (spooky) 23 | oncollision(passthru) /// error|fallback: On collision, use native command or throw error 24 | debug(passthru) /// Print debugging info to console 25 | /// 26 | tag(passthru) /// 27 | counts(passthru) /// 28 | fill(passthru) /// 29 | invertinmata /// 30 | /// 31 | /// Unsupported sort options 32 | /// ------------------------ 33 | /// 34 | stable /// Hashsort is always stable 35 | mlast /// 36 | Mfirst /// 37 | ] 38 | 39 | if ( `benchmarklevel' > 0 ) local benchmark benchmark 40 | local benchmarklevel benchmarklevel(`benchmarklevel') 41 | 42 | if ( "`stable'" != "" ) { 43 | di as txt "hashsort is always -stable-" 44 | } 45 | 46 | * mfirst is set by default, unlike gsort 47 | if ( ("`mfirst'" != "") & ("`mlast'" != "") ) { 48 | di as err "Cannot request both {opt mfirst} and {opt mlast}" 49 | } 50 | 51 | * mfirst is set by default, unlike gsort 52 | if ( ("`mfirst'" == "") & ("`mlast'" == "") & (strpos("`anything'", "-") > 0) ) { 53 | di as txt "(note: missing values will be sorted first)" 54 | } 55 | 56 | * mfirst is set by default 57 | if ( ("`mfirst'" == "") & ("`mlast'" == "") ) { 58 | local mfirst mfirst 59 | } 60 | 61 | if ( "`generate'" != "" ) local skipcheck skipcheck 62 | 63 | local opts `compress' `forcestrl' nods 64 | local opts `opts' `verbose' `benchmark' `benchmarklevel' `_ctolerance' 65 | local opts `opts' `oncollision' `hashmethod' `debug' 66 | local eopts `invertinmata' `sortgen' `skipcheck' 67 | local gopts `generate' `tag' `counts' `fill' `replace' `mlast' 68 | cap noi _gtools_internal `anything', missing `opts' `gopts' `eopts' gfunction(sort) 69 | global GTOOLS_CALLER "" 70 | local rc = _rc 71 | 72 | if ( `rc' == 17999 ) { 73 | if regexm("`anything'", "[\+\-]") { 74 | gsort `anything', `generate' `mfirst' 75 | exit 0 76 | } 77 | else { 78 | sort `anything' 79 | exit 0 80 | } 81 | } 82 | else if ( `rc' == 17001 ) { 83 | exit 0 84 | } 85 | else if ( `rc' ) exit `rc' 86 | end 87 | -------------------------------------------------------------------------------- /build/lgtools.mlib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/lgtools.mlib -------------------------------------------------------------------------------- /build/stata.toc: -------------------------------------------------------------------------------- 1 | v 1.11.8 2 | d Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 3 | p 'GTOOLS': Faster implementation of common Stata commands for big data 4 | -------------------------------------------------------------------------------- /docs/benchmarks/quick.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/docs/benchmarks/quick.png -------------------------------------------------------------------------------- /docs/benchmarks/quickdark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/docs/benchmarks/quickdark.png -------------------------------------------------------------------------------- /docs/css/extra-material-dark.css: -------------------------------------------------------------------------------- 1 | @media only screen and (max-width: 76.1875em) { 2 | .md-nav { 3 | background-color: #000; } } 4 | 5 | .md-header { 6 | background-color: #000; } 7 | 8 | .md-container { 9 | background-color: #333; } 10 | 11 | .md-typeset .footnote { 12 | color: #ccc; } 13 | 14 | body { 15 | color: #ccc; } 16 | 17 | .md-typeset pre { 18 | background-color: #000; } 19 | 20 | .md-typeset code { 21 | color: #ccc; 22 | box-shadow: 0.29412em 0 0 rgba(0, 0, 0, 0.5), -0.29412em 0 0 rgba(0, 0, 0, 0.5); 23 | background-color: rgba(0, 0, 0, 0.5); } 24 | 25 | .md-nav__link[data-md-state=blur] { 26 | color: rgba(230, 230, 230, 0.54); } 27 | 28 | .md-typeset h1 { 29 | color: #ccc; } 30 | 31 | a, .md-typeset a, md-nav__link, md-nav__link:hover { 32 | color: #00968c; } 33 | 34 | a footnote-ref::before { 35 | border-color: #00968c; } 36 | 37 | a footnote-ref::before { 38 | color: #00968c; } 39 | 40 | .md-nav__item--active > .md-nav__link, .md-nav__link:active { 41 | color: #00968c; } 42 | 43 | .md-nav--secondary { 44 | border-left: 0.4rem solid #00968c; } 45 | 46 | .md-nav__link:focus, .md-nav__link:hover { 47 | color: #00968c; } 48 | 49 | .md-typeset hr { 50 | border-bottom: .1rem dotted #666; } 51 | 52 | .md-footer-copyright { 53 | color: #666; } 54 | 55 | .md-typeset blockquote { 56 | padding-left: 1.2rem; 57 | border-left: 0.4rem solid rgba(230, 230, 230, 0.26); 58 | color: rgba(230, 230, 230, 0.54); } 59 | 60 | @media only screen and (max-width: 76.1875em) { 61 | html .md-nav--primary .md-nav__title--site { 62 | background-color: #000; } 63 | html .md-nav--primary .md-nav__title ~ .md-nav__list { 64 | background: linear-gradient(180deg, #000 10%, transparent), linear-gradient(180deg, rgba(0, 0, 0, 0.26), rgba(0, 0, 0, 0.07) 35%, transparent 60%); 65 | background-color: #333; } } 66 | 67 | pre { color: white !important; } 68 | 69 | .md-clipboard:before { 70 | color: rgb(255, 255, 255); 71 | } 72 | 73 | .codehilite:hover .md-clipboard:before,.md-typeset .highlight:hover .md-clipboard:before,pre:hover .md-clipboard:before { 74 | color: rgba(255, 255, 255, 0.54) !important 75 | } 76 | -------------------------------------------------------------------------------- /docs/css/extra-material-dark.scss: -------------------------------------------------------------------------------- 1 | // $link_color: #10cfff; 2 | $link_color: #00968c; 3 | $background_color1: #333; 4 | 5 | @media only screen and (max-width: 76.1875em) { 6 | .md-nav { 7 | background-color: #000; 8 | } 9 | } 10 | .md-header { 11 | background-color: #000; 12 | } 13 | .md-container { 14 | background-color: $background_color1; 15 | } 16 | .md-typeset .footnote { 17 | color: #ccc; 18 | } 19 | body { 20 | color: #ccc; 21 | } 22 | .md-typeset pre { 23 | background-color: #000; 24 | } 25 | .md-typeset code { 26 | color: #ccc; 27 | box-shadow: 0.29412em 0 0 hsla(0,0%,0%,.5), -0.29412em 0 0 hsla(0,0%,0%,.5); 28 | background-color: hsla(0,0%,0%,.5); 29 | } 30 | .md-nav__link[data-md-state=blur] { 31 | color: rgba(230,230,230,.54); 32 | } 33 | .md-typeset h1 { 34 | color: #ccc; 35 | } 36 | a, .md-typeset a, md-nav__link, md-nav__link:hover { 37 | color: $link_color; 38 | } 39 | a footnote-ref::before { 40 | border-color: $link_color; 41 | } 42 | a footnote-ref::before { 43 | color: $link_color; 44 | } 45 | .md-nav__item--active>.md-nav__link, .md-nav__link:active { 46 | color: $link_color; 47 | } 48 | .md-nav--secondary { 49 | border-left: .4rem solid $link_color; 50 | } 51 | .md-nav__link:focus, .md-nav__link:hover { 52 | color: $link_color; 53 | } 54 | .md-typeset hr { 55 | border-bottom: .1rem dotted #666; 56 | } 57 | .md-footer-copyright { 58 | color: #666; 59 | } 60 | .md-typeset blockquote { 61 | padding-left: 1.2rem; 62 | border-left: .4rem solid rgba(230,230,230,.26); 63 | color: rgba(230,230,230,.54); 64 | } 65 | @media only screen and (max-width: 76.1875em) { 66 | html .md-nav--primary .md-nav__title--site { 67 | background-color: #000; 68 | } 69 | html .md-nav--primary .md-nav__title~.md-nav__list { 70 | background: linear-gradient(180deg,#000 10%,hsla(0,0%,0%,0)),linear-gradient(180deg,rgba(0,0,0,.26),rgba(0,0,0,.07) 35%,transparent 60%); 71 | background-color: $background_color1; 72 | } 73 | } 74 | 75 | pre { color: white !important; } 76 | 77 | .md-clipboard:before { 78 | color: rgb(255, 255, 255); 79 | } 80 | 81 | .codehilite:hover .md-clipboard:before,.md-typeset .highlight:hover .md-clipboard:before,pre:hover .md-clipboard:before { 82 | color: rgba(255, 255, 255, 0.54) !important 83 | } 84 | -------------------------------------------------------------------------------- /docs/css/extra-material.css: -------------------------------------------------------------------------------- 1 | .codespan { 2 | font-family: monospace; 3 | font-size: 18px 4 | } 5 | -------------------------------------------------------------------------------- /docs/css/extra-rtd.css: -------------------------------------------------------------------------------- 1 | code { 2 | font-size: 13px 3 | } 4 | 5 | .codehilite { 6 | font-size: 14px 7 | } 8 | 9 | .codespan { 10 | font-family: monospace; 11 | font-size: 14px 12 | } 13 | 14 | li { 15 | font-size: 15px 16 | } 17 | 18 | table { 19 | font-size: 16px 20 | } 21 | -------------------------------------------------------------------------------- /docs/examples/gcontract.do: -------------------------------------------------------------------------------- 1 | * The options here are essentially the same as Stata's contract, 2 | * save for the standard gtools options. 3 | 4 | sysuse auto, clear 5 | gen long id = _n * 1000 6 | expand id 7 | gcontract rep78, verbose 8 | 9 | l 10 | 11 | 12 | * You can add frequencies, percentages, and so on: 13 | sysuse auto, clear 14 | gen long id = _n * 1000 15 | expand id 16 | gcontract rep78, freq(f) cfreq(cf) percent(p) cpercent(cp) bench 17 | 18 | l 19 | 20 | 21 | * Last, with multiple variables you can "fill in" missing groups. This option 22 | * has not been implemented internally and as such is very slow: 23 | 24 | sysuse auto, clear 25 | gen long id = _n * 1000 26 | expand id 27 | gcontract foreign rep78, /// 28 | freq(f) cfreq(cf) percent(p) cpercent(cp) bench(3) zero 29 | 30 | l 31 | 32 | * You will note a few levels have 0 frequency, which means they did 33 | * not appear in the full data. 34 | -------------------------------------------------------------------------------- /docs/examples/gdistinct.do: -------------------------------------------------------------------------------- 1 | * gdistinct can function as a drop-in replacement for distinct. 2 | 3 | sysuse auto, clear 4 | gdistinct 5 | matrix list r(distinct) 6 | 7 | gdistinct, sort(-distinct) 8 | 9 | gdistinct, max(10) 10 | 11 | gdistinct make-headroom 12 | 13 | gdistinct make-headroom, missing abbrev(6) 14 | 15 | gdistinct foreign rep78, joint 16 | 17 | gdistinct foreign rep78, joint missing 18 | -------------------------------------------------------------------------------- /docs/examples/gduplicates.do: -------------------------------------------------------------------------------- 1 | * Setup 2 | sysuse auto 3 | keep make price mpg rep78 foreign 4 | expand 2 in 1/2 5 | 6 | * Report duplicates 7 | gduplicates report 8 | 9 | * List one example for each group of duplicated observations 10 | sort mpg 11 | gduplicates examples 12 | gduplicates examples, sorted 13 | 14 | * List all duplicated observations 15 | gduplicates list 16 | 17 | * Create variable dup containing the number of duplicates (0 if 18 | * observation is unique) 19 | gduplicates tag, generate(dup) 20 | 21 | * List the duplicated observations 22 | list if dup == 1 23 | 24 | * Drop all but the first occurrence of each group of duplicated 25 | * observations 26 | gduplicates drop 27 | 28 | * List all duplicated observations 29 | gduplicates list 30 | 31 | -------------------------------------------------------------------------------- /docs/examples/gegen.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | gegen id = group(foreign) 3 | gegen tag = group(foreign) 4 | gegen sum = sum(mpg), by(foreign) 5 | gegen sum2 = sum(mpg rep78), by(foreign) 6 | gegen p5 = pctile(mpg rep78), p(5) by(foreign) 7 | gegen nuniq = nunique(mpg), by(foreign) 8 | 9 | * The function can be any of the supported functions above. 10 | * It can also be any function supported by egen: 11 | 12 | webuse egenxmpl4, clear 13 | gegen hsum = rowtotal(a b c) 14 | 15 | sysuse auto, clear 16 | gegen seq = seq(), by(foreign) 17 | -------------------------------------------------------------------------------- /docs/examples/gglm.do: -------------------------------------------------------------------------------- 1 | * NOTE: gglm is in beta. To enable enable beta features, define 2 | * 3 | * global GTOOLS_BETA = 1 4 | 5 | * Showcase 6 | * -------- 7 | 8 | webuse lbw, clear 9 | gglm low age lwt smoke ptl ht ui, absorb(race) family(binomial) 10 | mata GtoolsLogit.print() 11 | 12 | gen w = _n 13 | gglm low age lwt smoke ptl ht ui [fw = w], absorb(race) family(binomial) 14 | mata GtoolsLogit.print() 15 | 16 | webuse ships, clear 17 | expand 2 18 | gen by = 1.5 - (_n < _N / 2) 19 | gen w = _n 20 | gen _co_75_79 = co_75_79 21 | qui tab ship, gen(_s) 22 | 23 | gglm accident op_75_79 co_65_69 co_70_74 co_75_79 [fw = w], robust family(poisson) 24 | mata GtoolsPoisson.print() 25 | 26 | gglm accident op_75_79 co_65_69 co_70_74 co_75_79 _co_75_79 [pw = w], cluster(ship) family(poisson) 27 | mata GtoolsPoisson.print() 28 | 29 | gglm accident op_75_79 co_65_69 co_70_74 co_75_79 _s*, absorb(ship) cluster(ship) family(poisson) 30 | mata GtoolsPoisson.print() 31 | 32 | gglm accident op_75_79 co_65_69 co_70_74 co_75_79, by(by) absorb(ship) robust family(poisson) 33 | mata GtoolsPoisson.print() 34 | 35 | * Basic Benchmark 36 | * --------------- 37 | 38 | clear 39 | local N 1000000 40 | local G 10000 41 | set obs `N' 42 | gen g1 = int(runiform() * `G') 43 | gen g2 = int(runiform() * `G') 44 | gen g3 = int(runiform() * `G') 45 | gen g4 = int(runiform() * `G') 46 | gen x3 = runiform() 47 | gen x4 = runiform() 48 | gen x1 = x3 + runiform() 49 | gen x2 = x4 + runiform() 50 | gen l = int(0.25 * x1 - 0.75 * x2 + g1 + g2 + g3 + g4 + 20 * rnormal()) 51 | 52 | timer clear 53 | timer on 1 54 | gglm l x1 x2, absorb(g1 g2 g3) mata(greg) family(poisson) 55 | timer off 1 56 | mata greg.print() 57 | timer on 2 58 | ppmlhdfe l x1 x2, absorb(g1 g2 g3) 59 | timer off 2 60 | 61 | timer on 3 62 | gglm l x1 x2, absorb(g1 g2 g3) cluster(g4) mata(greg) family(poisson) 63 | timer off 3 64 | mata greg.print() 65 | timer on 4 66 | ppmlhdfe l x1 x2, absorb(g1 g2 g3) vce(cluster g4) 67 | timer off 4 68 | 69 | timer list 70 | 71 | * 1: 3.22 / 1 = 3.2160 72 | * 2: 29.64 / 1 = 29.6380 73 | * 3: 3.31 / 1 = 3.3140 74 | * 4: 31.32 / 1 = 31.3190 75 | -------------------------------------------------------------------------------- /docs/examples/gisid.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | 3 | gisid mpg // not an id 4 | gisid make 5 | 6 | replace make = "" in 1 7 | gisid make // should never be missing 8 | gisid make, missok 9 | 10 | * gisid can also take a range, that is 11 | gisid mpg in 1 12 | gisid mpg if _n == 1 13 | -------------------------------------------------------------------------------- /docs/examples/givregress.do: -------------------------------------------------------------------------------- 1 | * NOTE: givregress is in beta. To enable enable beta features, define 2 | * 3 | * global GTOOLS_BETA = 1 4 | 5 | * Showcase 6 | * -------- 7 | 8 | sysuse auto, clear 9 | gen _mpg = mpg 10 | qui tab headroom, gen(_h) 11 | 12 | givregress price (mpg = gear_ratio) weight turn 13 | givregress price (mpg = gear_ratio) _mpg, cluster(headroom) 14 | mata GtoolsIV.print() 15 | 16 | givregress price (mpg weight = gear_ratio turn displacement) _h*, absorb(rep78 headroom) 17 | mata GtoolsIV.print() 18 | 19 | givregress price (mpg = gear_ratio) weight [fw = rep78], absorb(headroom) 20 | mata GtoolsIV.print() 21 | 22 | givregress price (mpg = gear_ratio turn displacement) weight [aw = rep78], by(foreign) 23 | mata GtoolsIV.print() 24 | 25 | givregress price (mpg = gear_ratio turn) weight, by(foreign) mata(coefsOnly, nose) prefix(b(_b_) se(_se_)) 26 | givregress price (mpg weight = gear_ratio turn), mata(seOnly, nob) prefix(hdfe(_hdfe_)) 27 | givregress price (mpg weight = gear_ratio turn) displacement, mata(nothing, nob nose) 28 | 29 | mata coefsOnly.print() 30 | mata seOnly.print() 31 | mata nothing.print() 32 | 33 | * Basic Benchmark 34 | * --------------- 35 | 36 | clear 37 | local N 1000000 38 | local G 10000 39 | set obs `N' 40 | gen g1 = int(runiform() * `G') 41 | gen g2 = int(runiform() * `G') 42 | gen g3 = int(runiform() * `G') 43 | gen g4 = int(runiform() * `G') 44 | gen x3 = runiform() 45 | gen x4 = runiform() 46 | gen x1 = x3 + runiform() 47 | gen x2 = x4 + runiform() 48 | gen y = 0.25 * x1 - 0.75 * x2 + g1 + g2 + g3 + g4 + 20 * rnormal() 49 | 50 | timer clear 51 | timer on 1 52 | givregress y (x1 x2 = x3 x4), absorb(g1 g2 g3) mata(greg) 53 | timer off 1 54 | mata greg.print() 55 | timer on 2 56 | ivreghdfe y (x1 x2 = x3 x4), absorb(g1 g2 g3) 57 | timer off 2 58 | 59 | timer on 3 60 | givregress y (x1 x2 = x3 x4), absorb(g1 g2 g3) cluster(g4) mata(greg) 61 | timer off 3 62 | mata greg.print() 63 | timer on 4 64 | ivreghdfe y (x1 x2 = x3 x4), absorb(g1 g2 g3) cluster(g4) 65 | timer off 4 66 | 67 | timer list 68 | 69 | * 1: 0.89 / 1 = 0.8920 70 | * 2: 17.62 / 1 = 17.6240 71 | * 3: 1.07 / 1 = 1.0670 72 | * 4: 23.17 / 1 = 23.1670 73 | -------------------------------------------------------------------------------- /docs/examples/glevelsof.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | glevelsof rep78 3 | qui glevelsof rep78, miss local(mylevs) 4 | display "`mylevs'" 5 | glevelsof rep78, sep(,) 6 | 7 | 8 | ************************************ 9 | * De-duplicating a variable list * 10 | ************************************ 11 | 12 | * `glevelsof` can store the unique levels of a varlist. This is 13 | * specially useful when the user wants to obtain the unique levels but 14 | * runs up against the stata macro variable limit. 15 | 16 | set seed 42 17 | clear 18 | set obs 100000 19 | gen x = "a long string appeared" + string(mod(_n, 10000)) 20 | gen y = int(10 * runiform()) 21 | glevelsof x 22 | glevelsof x, gen(uniq_) nolocal 23 | gisid uniq_* in 1 / `r(J)' 24 | 25 | * If the user prefers to work with mata, simply pass the option 26 | * `matasave[(name)]`. With mixed-types, numbers and strings are 27 | * stored in separate matrices as well as a single printed matrix, 28 | * but the latter can be suppressed to save memory. 29 | 30 | glevelsof x y, mata(xy) nolocal 31 | glevelsof x, mata(x) nolocal silent 32 | 33 | mata xy.desc() 34 | mata x.desc() 35 | 36 | * The user can also replace the source variables if need be. This is 37 | * faster and saves memory, but it dispenses with the original variables. 38 | 39 | glevelsof x y, gen(, replace) nolocal 40 | l in `r(J)' 41 | l in `=_N' 42 | 43 | 44 | ******************* 45 | * Number format * 46 | ******************* 47 | 48 | * `levelsof` by default shows many significant digits for numerical variables. 49 | 50 | sysuse auto, clear 51 | replace headroom = headroom + 0.1 52 | levelsof headroom 53 | glevelsof headroom 54 | 55 | * This is cumbersome. You can specify a number format to compress this: 56 | glevelsof headroom, numfmt(%.3g) 57 | 58 | 59 | ************************ 60 | * Multiple variables * 61 | ************************ 62 | 63 | * `glevelsof` can parse multiple variables: 64 | local varlist foreign rep78 65 | glevelsof `varlist', sep("|") colsep(", ") 66 | 67 | * If you know a bit of mata, you can parse this string! 68 | mata: 69 | string scalar function unquote_str(string scalar quoted_str) 70 | { 71 | if ( substr(quoted_str, 1, 1) == `"""' ) { 72 | quoted_str = substr(quoted_str, 2, strlen(quoted_str) - 2) 73 | } 74 | else if (substr(quoted_str, 1, 2) == "`" + `"""') { 75 | quoted_str = substr(quoted_str, 3, strlen(quoted_str) - 4) 76 | } 77 | return (quoted_str); 78 | } 79 | 80 | t = tokeninit(`"`r(sep)'"', (""), (`""""', `"`""'"'), 1) 81 | tokenset(t, `"`r(levels)'"') 82 | 83 | rows = tokengetall(t) 84 | for (i = 1; i <= cols(rows); i++) { 85 | rows[i] = unquote_str(rows[i]); 86 | } 87 | 88 | levels = J(cols(rows), `:list sizeof varlist', "") 89 | 90 | t = tokeninit(`"`r(colsep)'"', (""), (`""""', `"`""'"'), 1) 91 | for (i = 1; i <= cols(rows); i++) { 92 | tokenset(t, rows[i]) 93 | levels[i, .] = tokengetall(t) 94 | for (k = 1; k <= `:list sizeof varlist'; k++) { 95 | levels[i, k] = unquote_str(levels[i, k]) 96 | } 97 | } 98 | end 99 | 100 | mata: levels 101 | 102 | * While this looks cumbersome, this mechanism is used internally by 103 | * `gtoplevelsof` to display its results. 104 | -------------------------------------------------------------------------------- /docs/examples/gregress.do: -------------------------------------------------------------------------------- 1 | * NOTE: gregress is in beta. To enable enable beta features, define 2 | * 3 | * global GTOOLS_BETA = 1 4 | * global GTOOLS_GREGTABLE = 1 5 | 6 | * Showcase 7 | * -------- 8 | 9 | sysuse auto, clear 10 | gen _mpg = mpg 11 | qui tab headroom, gen(_h) 12 | 13 | greg price mpg 14 | greg price mpg, by(foreign) robust 15 | mata GtoolsRegress.print() 16 | 17 | greg price mpg _h* [fw = rep78] 18 | mata GtoolsRegress.print() 19 | 20 | greg price mpg _h* [fw = rep78], absorb(headroom) 21 | mata GtoolsRegress.print() 22 | 23 | greg price mpg _mpg, cluster(headroom) 24 | greg price mpg _mpg [aw = rep78], by(foreign) absorb(rep78 headroom) cluster(headroom) 25 | mata GtoolsRegress.print() 26 | 27 | greg price mpg, mata(coefsOnly, nose) 28 | greg price mpg, mata(seOnly, nob) 29 | greg price mpg, mata(nothing, nob nose) 30 | 31 | mata coefsOnly.print() 32 | mata seOnly.print() 33 | mata nothing.print() 34 | 35 | greg price mpg, prefix(b(_b_)) replace 36 | greg price mpg, prefix(se(_se_)) replace 37 | greg price mpg _mpg, absorb(rep78 headroom) prefix(b(_b_) se(_se_) hdfe(_hdfe_)) replace 38 | drop _* 39 | 40 | greg price mpg, gen(b(_b_mpg _b_cons)) 41 | greg price mpg, gen(se(_se_mpg _se_cons)) 42 | greg price mpg, absorb(rep78 headroom) gen(hdfe(_hdfe_price _hdfe_mpg)) 43 | 44 | * Basic Benchmark 45 | * --------------- 46 | 47 | clear 48 | local N 1000000 49 | local G 10000 50 | set obs `N' 51 | gen g1 = int(runiform() * `G') 52 | gen g2 = int(runiform() * `G') 53 | gen g3 = int(runiform() * `G') 54 | gen g4 = int(runiform() * `G') 55 | gen x3 = runiform() 56 | gen x4 = runiform() 57 | gen x1 = x3 + runiform() 58 | gen x2 = x4 + runiform() 59 | gen y = 0.25 * x1 - 0.75 * x2 + g1 + g2 + g3 + g4 + 20 * rnormal() 60 | 61 | timer clear 62 | timer on 1 63 | greg y x1 x2, absorb(g1 g2 g3) mata(greg) 64 | timer off 1 65 | mata greg.print() 66 | timer on 2 67 | reghdfe y x1 x2, absorb(g1 g2 g3) 68 | timer off 2 69 | 70 | timer on 3 71 | greg y x1 x2, absorb(g1 g2 g3) cluster(g4) mata(greg) 72 | timer off 3 73 | mata greg.print() 74 | timer on 4 75 | reghdfe y x1 x2, absorb(g1 g2 g3) vce(cluster g4) 76 | timer off 4 77 | 78 | timer on 5 79 | greg y x1 x2, by(g4) prefix(b(_b_)) 80 | timer off 5 81 | drop _* 82 | timer on 6 83 | asreg y x1 x2, by(g4) 84 | timer off 6 85 | drop _* 86 | 87 | timer list 88 | 89 | * 1: 0.64 / 1 = 0.6380 90 | * 2: 11.77 / 1 = 11.7730 91 | * 3: 0.91 / 1 = 0.9140 92 | * 4: 15.74 / 1 = 15.7370 93 | * 5: 0.46 / 1 = 0.4570 94 | * 6: 2.09 / 1 = 2.0890 95 | -------------------------------------------------------------------------------- /docs/examples/gstats_hdfe.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | gstats hdfe demean_price = price, absorb(foreign) 3 | gstats hdfe hdfe_price = price, absorb(foreign rep78) 4 | assert mi(hdfe_price) if mi(rep78) 5 | gstats hdfe hdfe_price = price, absorb(foreign rep78) replace absorbmissing 6 | assert !mi(hdfe_price) 7 | 8 | gstats hdfe price mpg [aw = rep78], by(foreign) absorb(rep78 headroom) gen(v1 v2) mata 9 | mata GtoolsByLevels.desc() 10 | mata GtoolsByLevels.nj 11 | mata GtoolsByLevels.njabsorb 12 | 13 | gstats hdfe price mpg, absorb(foreign rep78) prefix(res_) 14 | gstats hdfe price mpg, absorb(foreign rep78) replace 15 | assert price == res_price if !mi(rep78) 16 | assert mpg == res_mpg if !mi(rep78) 17 | 18 | gstats hdfe price mpg, absorb(foreign make) replace 19 | assert abs(price) < 1e-8 if !mi(rep78) 20 | assert abs(price) < 1e-8 if !mi(rep78) 21 | 22 | * Basic Benchmark 23 | * --------------- 24 | 25 | clear 26 | local N 10000000 27 | set obs `N' 28 | gen g1 = int(runiform() * 10000) 29 | gen g2 = int(runiform() * 100) 30 | gen g3 = int(runiform() * 10) 31 | gen x = rnormal() 32 | 33 | timer clear 34 | timer on 1 35 | gstats hdfe x1 = x, absorb(g1 g2 g3) algorithm(squarem) bench(2) 36 | disp r(feval) 37 | timer off 1 38 | 39 | timer on 2 40 | gstats hdfe x2 = x, absorb(g1 g2 g3) algorithm(cg) bench(2) 41 | disp r(feval) 42 | timer off 2 43 | 44 | timer on 3 45 | gstats hdfe x3 = x, absorb(g1 g2 g3) algorithm(map) bench(2) 46 | disp r(feval) 47 | timer off 3 48 | 49 | timer on 4 50 | gstats hdfe x4 = x, absorb(g1 g2 g3) algorithm(it) bench(2) 51 | disp r(feval) 52 | timer off 4 53 | 54 | timer on 5 55 | * equivalent to cg 56 | qui reghdfe x, absorb(g1 g2 g3) resid(x5) acceleration(cg) 57 | timer off 5 58 | 59 | timer on 6 60 | * equivalent to map 61 | qui reghdfe x, absorb(g1 g2 g3) resid(x6) acceleration(none) 62 | timer off 6 63 | 64 | assert reldif(x1, x2) < 1e-6 65 | assert reldif(x1, x3) < 1e-6 66 | assert reldif(x1, x4) < 1e-6 67 | assert reldif(x1, x5) < 1e-6 68 | assert reldif(x1, x6) < 1e-6 69 | 70 | timer list 71 | 72 | * 1: 2.73 / 1 = 2.7260 73 | * 2: 2.94 / 1 = 2.9430 74 | * 3: 2.46 / 1 = 2.4620 75 | * 4: 2.90 / 1 = 2.8980 76 | * 5: 41.24 / 1 = 41.2390 77 | * 6: 44.05 / 1 = 44.0450 78 | -------------------------------------------------------------------------------- /docs/examples/gstats_summarize.do: -------------------------------------------------------------------------------- 1 | ************* 2 | * Tabstat * 3 | ************* 4 | 5 | * Basic usage 6 | sysuse auto, clear 7 | gstats tab price 8 | gstats tab price, s(mean sd min max) by(foreign) 9 | gstats tab price, by(foreign rep78) 10 | 11 | * Custom printing 12 | gstats tab price mpg, s(p5 q p95 select7 select-3 gini) pretty 13 | gstats tab price mpg, s(p5 q p95 select7 select-3 gini) col(var) 14 | gstats tab price mpg, s(p5 q p95 select7 select-3 gini) col(stat) 15 | 16 | * Mata API 17 | gen strvar = "string" + string(rep78) 18 | gstats tab price mpg, by(foreign strvar) matasave 19 | 20 | mata 21 | GstatsOutput.getf(1, 1, .) 22 | GstatsOutput.getnum(., 1) 23 | GstatsOutput.getchar((2, 5, 6), .) 24 | 25 | GstatsOutput.getOutputRow(1) 26 | GstatsOutput.getOutputCol(1) 27 | GstatsOutput.getOutputVar("price") 28 | GstatsOutput.getOutputVar("mpg") 29 | GstatsOutput.getOutputGroup(1) 30 | end 31 | 32 | mata: st_matrix("output", GstatsOutput.output) 33 | matrix list output 34 | 35 | * The mata API allows the user to computing several runs of summary 36 | * statistics and keeping them in memory: 37 | 38 | gstats tab price mpg, by(foreign) noprint matasave(StatsByForeign) 39 | gstats tab price mpg, by(rep78) noprint matasave(StatsByRep) 40 | 41 | mata StatsByRep.desc() 42 | mata StatsByForeign.desc() 43 | mata StatsByForeign.printOutput() 44 | 45 | * It is also specially useful for a large number of groups 46 | 47 | clear 48 | set obs 100000 49 | gen g = mod(_n, 10000) 50 | gen x = runiform() 51 | gstats tab x, by(g) noprint matasave 52 | mata GstatsOutput.J 53 | mata GstatsOutput.getOutputGroup(13) 54 | 55 | *************** 56 | * Summarize * 57 | *************** 58 | 59 | * Basic usage 60 | sysuse auto, clear 61 | gstats sum price 62 | gstats sum price [pw = gear_ratio / 5] 63 | gstats sum price mpg, f 64 | 65 | * In the style of tabstat 66 | gstats sum price mpg, tab nod 67 | gstats sum price mpg, tab meanonly 68 | gstats sum price mpg, by(foreign) tab 69 | gstats sum price mpg, by(foreign) nod 70 | gstats sum price mpg, by(foreign) meanonly 71 | 72 | * Pool inputs 73 | gstats sum price *, nod 74 | gstats sum price *, nod pool 75 | -------------------------------------------------------------------------------- /docs/examples/gstats_winsor.do: -------------------------------------------------------------------------------- 1 | * _Note_: These examples are taken verbatim from `help winsor2`. 2 | 3 | * winsor at (p1 p99), get new variable "wage_w" 4 | sysuse nlsw88, clear 5 | gstats winsor wage 6 | 7 | * winsor 3 variables at 0.5th and 99.5th percentiles, and overwrite the 8 | * old variables 9 | 10 | gstats winsor wage age hours, cuts(0.5 99.5) replace 11 | 12 | * winsor 3 variables at (p1 p99), gen new variables with suffix _win, 13 | * and add variable labels 14 | 15 | gstats winsor wage age hours, suffix(_win) label 16 | 17 | * left-winsorizing only, at 1th percentile 18 | 19 | cap noi gstats winsor wage, cuts(1 100) 20 | gstats winsor wage, cuts(1 100) s(_w2) 21 | 22 | * right-trimming only, at 99th percentile 23 | 24 | gstats winsor wage, cuts(0 99) trim 25 | 26 | * winsor variables at (p1 p99) by (industry), overwrite the old 27 | * variables 28 | 29 | gstats winsor wage hours, replace by(industry) 30 | -------------------------------------------------------------------------------- /docs/examples/gtoplevelsof.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | 3 | gtoplevelsof rep78 4 | 5 | gtop rep78 [fw = weight] 6 | 7 | gtop rep78 [w = gear_ratio] 8 | 9 | gtop rep78, missrow 10 | 11 | gtop rep78, colsep(", ") 12 | 13 | gtop rep78, pctfmt(%7.3f) 14 | 15 | gtop mpg, numfmt(%7.3f) 16 | 17 | gtop foreign 18 | 19 | gtop foreign, colmax(3) 20 | 21 | gtop foreign, novaluelab 22 | 23 | gtop foreign rep78, ntop(4) missrow colstrmax(2) 24 | 25 | gtop foreign rep78, ntop(4) missrow groupmiss 26 | 27 | gtop foreign rep78, ntop(4) missrow groupmiss noother 28 | 29 | gtop foreign rep78, cols(<<) missrow("I am missing") matrix(lvl) 30 | matrix list lvl 31 | 32 | gtop foreign rep78, mata(lvl) ntop(3) 33 | mata lvl.desc() 34 | mata lvl.printed 35 | mata lvl.toplevels 36 | -------------------------------------------------------------------------------- /docs/examples/gunique.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | 3 | gunique * 4 | 5 | gunique *, miss 6 | 7 | gunique make-headroom 8 | 9 | gunique rep78, d 10 | 11 | gunique rep78, by(foreign) 12 | -------------------------------------------------------------------------------- /docs/examples/hashsort.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | hashsort price 3 | hashsort +price 4 | hashsort rep78 -price 5 | hashsort make 6 | hashsort foreign -make 7 | 8 | * One thing that is useful is that hashsort can encode a set of variables and 9 | * set the encoded variable as the sorting variable: 10 | 11 | sysuse auto, clear 12 | 13 | hashsort foreign -rep78, gen(id) sortgen 14 | 15 | disp "`: sortedby'" 16 | 17 | tab id 18 | -------------------------------------------------------------------------------- /docs/helpers.js: -------------------------------------------------------------------------------- 1 | MathJax.Hub.Config({ 2 | tex2jax: { 3 | inlineMath: [ ['$','$'], ["\\(","\\)"] ], 4 | processEscapes: true 5 | } 6 | }); 7 | -------------------------------------------------------------------------------- /docs/stata/gstats.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* *! version 0.4.0 09Jun2019}{...} 3 | {viewerdialog gstats "dialog gstats"}{...} 4 | {vieweralsosee "[R] gstats" "mansection R gstats"}{...} 5 | {viewerjumpto "Syntax" "gstats##syntax"}{...} 6 | {viewerjumpto "Description" "gstats##description"}{...} 7 | {title:Title} 8 | 9 | {p2colset 5 15 23 2}{...} 10 | {p2col :{cmd:gstats} {hline 2}} Various statistical fucntions and transformations. {p_end} 11 | {p2colreset}{...} 12 | 13 | {pstd} 14 | {it:Important}: Please run {stata gtools, upgrade} to update {cmd:gtools} to 15 | the latest stable version. 16 | 17 | {marker syntax}{...} 18 | {title:Syntax} 19 | 20 | {p 8 17 2} 21 | {cmd:gstats} 22 | {it:subcommand} 23 | {varlist} 24 | {ifin} 25 | [{it:{help gstats##weight:weight}}] 26 | [{cmd:,} {opth by(varlist)} {it:{help gstats##table_options:subcommand_options}}] 27 | 28 | {phang} 29 | {opt gstats} is a wrapper for various statistical functions and 30 | transformations, including: 31 | 32 | {p 8 17 2} 33 | {help gstats hdfee:{bf:hdfe}} 34 | (alias {help gstats hdfe:{bf:residualize}}) is a fast utility for residualizing variables (i.e. HDFE transform; accepts weights). {p_end} 35 | 36 | {p 8 17 2} 37 | {help gstats winsor:{bf:winsor}} 38 | as a fast {opt winsor2} alternative (accepts weights). {p_end} 39 | 40 | {p 8 17 2} 41 | {help gstats summarize:{bf:{ul:sum}marize}} and 42 | {help gstats summarize:{bf:{ul:tab}stat}} are fast, 43 | by-able alternatives to {opt summarize, detail} and {opt tabtsat} (accept weights). {p_end} 44 | 45 | {p 8 17 2} 46 | {help gstats transform:{bf:transform}} 47 | to apply various statistical transformations (accepts weights). {p_end} 48 | 49 | {marker description}{...} 50 | {title:Description} 51 | 52 | {pstd} 53 | {opt gstats} is a wrapper to several statistical fucntions and 54 | transformations. In theory {opt gegen} would be the place to expand 55 | {opt gtools}; however, {opt gegen}'s internally implemented functions 56 | were written with two assumptions: first, the output is unique at the 57 | group level; second, there is always a target variable. {opt gstats} 58 | is written to be more flexible and allow arbitrary functions and 59 | transformations. 60 | 61 | {pstd} 62 | Weights are supported for the following subcommands: {it:winsor}, {it:summarize}, {it:tabstat}, {it:residualize}. 63 | 64 | {marker author}{...} 65 | {title:Author} 66 | 67 | {pstd}Mauricio Caceres{p_end} 68 | {pstd}{browse "mailto:mauricio.caceres.bravo@gmail.com":mauricio.caceres.bravo@gmail.com }{p_end} 69 | {pstd}{browse "https://mcaceresb.github.io":mcaceresb.github.io}{p_end} 70 | 71 | {title:Website} 72 | 73 | {pstd}{cmd:gstats} is maintained as part of the {manhelp gtools R:gtools} project at {browse "https://github.com/mcaceresb/stata-gtools":github.com/mcaceresb/stata-gtools}{p_end} 74 | 75 | {marker acknowledgment}{...} 76 | {title:Acknowledgment} 77 | 78 | {pstd} 79 | {opt gtools} was largely inspired by Sergio Correia's {it:ftools}: 80 | {browse "https://github.com/sergiocorreia/ftools"}. 81 | {p_end} 82 | 83 | {pstd} 84 | The OSX version of gtools was implemented with invaluable help from @fbelotti; 85 | see {browse "https://github.com/mcaceresb/stata-gtools/issues/11"}. 86 | {p_end} 87 | 88 | {title:Also see} 89 | 90 | {p 4 13 2} 91 | help for 92 | {help gtools} 93 | -------------------------------------------------------------------------------- /docs/usage/gisid.md: -------------------------------------------------------------------------------- 1 | gisid 2 | ===== 3 | 4 | Efficiently check for unique identifiers using C plugins. This is a fast 5 | option to Stata's isid. It checks whether a set of variables uniquely 6 | identifies observations in a dataset. It can additionally take `if` and 7 | `in` but it cannot check an external data set or sort the data. 8 | 9 | !!! tip "Important" 10 | Run `gtools, upgrade` to update `gtools` to the latest stable version. 11 | 12 | Syntax 13 | ------ 14 | 15 |

gisid varlist [if] [in] [, missok ]

16 | 17 | Options 18 | ------- 19 | 20 | missok indicates that missing values are permitted in varlist. 21 | 22 | ### Gtools options 23 | 24 | (Note: These are common to every gtools command.) 25 | 26 | - `compress` Try to compress strL to str#. The Stata Plugin Interface has 27 | only limited support for strL variables. In Stata 13 and 28 | earlier (version 2.0) there is no support, and in Stata 14 29 | and later (version 3.0) there is read-only support. The user 30 | can try to compress strL variables using this option. 31 | 32 | - `forcestrl` Skip binary variable check and force gtools to read strL variables 33 | (14 and above only). __Gtools gives incorrect results when there is 34 | binary data in strL variables__. This option was included because on 35 | some windows systems Stata detects binary data even when there is none. 36 | Only use this option if you are sure you do not have binary data in your 37 | strL variables. 38 | 39 | - `verbose` prints some useful debugging info to the console. 40 | 41 | - `benchmark` or `bench(level)` prints how long in seconds various parts of the 42 | program take to execute. Level 1 is the same as `benchmark`. Levels 43 | 2 and 3 additionally prints benchmarks for internal plugin steps. 44 | 45 | - `hashmethod(str)` Hash method to use. `default` automagically chooses the 46 | algorithm. `biject` tries to biject the inputs into the 47 | natural numbers. `spooky` hashes the data and then uses the 48 | hash. 49 | 50 | - `oncollision(str)` How to handle collisions. A collision should never happen 51 | but just in case it does `gtools` will try to use native 52 | commands. The user can specify it throw an error instead by 53 | passing `oncollision(error)`. 54 | 55 | Examples 56 | -------- 57 | 58 | You can download the raw code for the examples below 59 | [here ](https://raw.githubusercontent.com/mcaceresb/stata-gtools/master/docs/examples/gisid.do) 60 | 61 | ```stata 62 | . sysuse auto, clear 63 | (1978 Automobile Data) 64 | 65 | . gisid mpg 66 | variable mpg does not uniquely identify the observations 67 | r(459); 68 | 69 | . gisid make 70 | 71 | . replace make = "" in 1 72 | (1 real change made) 73 | 74 | . gisid make 75 | variable make should never be missing 76 | r(459); 77 | 78 | . gisid make, missok 79 | ``` 80 | 81 | gisid can also take a range, that is 82 | ``` 83 | . gisid mpg in 1 84 | . gisid mpg if _n == 1 85 | ``` 86 | -------------------------------------------------------------------------------- /docs/usage/gtools.md: -------------------------------------------------------------------------------- 1 | gtools 2 | ====== 3 | 4 | The gtools command is merely a wrapper for some high-level operations to 5 | do with package maintenance. See any of the commands below for details 6 | on how to use the programs provided by this package commands, or the 7 | [introduction](index) for an overview of the package and available 8 | commands. 9 | 10 | Succintly, gtools is a Stata package that provides a fast implementation 11 | of common group commands like collapse, egen, isid, levelsof, contract, 12 | distinct, and so on using C plugins for a massive speed improvement. 13 | 14 | This program helps the user manage their gtools installation. 15 | 16 | !!! tip "Important" 17 | Run `gtools, upgrade` to update `gtools` to the latest stable version. 18 | 19 | Syntax 20 | ------ 21 | 22 | ```stata 23 | gtools [, options] 24 | ``` 25 | 26 | See `gtools, examples` for examples of how to use available gtools functions. 27 | 28 | Options 29 | ------- 30 | 31 | - `upgrade` (alias `install_latest`) Upgrades gtools to the latest github version (default is master). 32 | 33 | - `licenses` Prints the open source projects used in `gtools` 34 | 35 | - `verbose` With `licenses`, prints the licenses of the open source projects used in `gtools` 36 | 37 | - `examples` (alias `showcase`) Print examples of how to use available gtools functions. 38 | 39 | - `test[(str)]` Run unit tests, optionally specifying which tests to run. Tests 40 | available are: `dependencies`, `basic_checks`, `bench_test`, 41 | `comparisons`, `switches`, `bench_full`. A good set of "small" tests 42 | which take 10-20 minutes are `dependencies basic_checks bench_test`. By 43 | default, however, the first 5 tests are run, which take 1-3h. The bulk 44 | of that time is from `comparisons`, which compares the results from 45 | gtools to that of various native counterparts under several different 46 | conditions. `bench_full` is not run by default because this benchmarks 47 | gtools against stata using modestly-sized data (millions). Some stata 48 | commands are very slow under some of the benchmarks, meaning this can 49 | take well over a day. 50 | 51 | - `branch(str)` Github branch to use (default is master). 52 | -------------------------------------------------------------------------------- /lib/id_rsa_travis.enc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/id_rsa_travis.enc -------------------------------------------------------------------------------- /lib/material.json: -------------------------------------------------------------------------------- 1 | ../docs/benchmarks/material.json -------------------------------------------------------------------------------- /lib/plugin/gtools_macosx_v2.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_macosx_v2.plugin -------------------------------------------------------------------------------- /lib/plugin/gtools_macosx_v3.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_macosx_v3.plugin -------------------------------------------------------------------------------- /lib/plugin/gtools_unix_v2.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_unix_v2.plugin -------------------------------------------------------------------------------- /lib/plugin/gtools_unix_v3.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_unix_v3.plugin -------------------------------------------------------------------------------- /lib/plugin/gtools_windows_v2.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_windows_v2.plugin -------------------------------------------------------------------------------- /lib/plugin/gtools_windows_v3.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_windows_v3.plugin -------------------------------------------------------------------------------- /lib/plugin/lgtools.mlib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/lgtools.mlib -------------------------------------------------------------------------------- /lib/spi-2.0/stplugin.c: -------------------------------------------------------------------------------- 1 | /* 2 | stplugin.c, version 2.0 3 | copyright (c) 2003, 2006 StataCorp 4 | */ 5 | 6 | #include "stplugin.h" 7 | 8 | ST_plugin *_stata_ ; 9 | 10 | STDLL pginit(ST_plugin *p) 11 | { 12 | _stata_ = p ; 13 | return(SD_PLUGINVER) ; 14 | } 15 | -------------------------------------------------------------------------------- /lib/spi-3.0/stplugin.c: -------------------------------------------------------------------------------- 1 | /* 2 | stplugin.c, version 3.0 3 | copyright (c) 2003, 2006, 2015 StataCorp LP 4 | */ 5 | 6 | #include "stplugin.h" 7 | 8 | ST_plugin *_stata_ ; 9 | 10 | STDLL pginit(ST_plugin *p) 11 | { 12 | _stata_ = p ; 13 | return(SD_PLUGINVER) ; 14 | } 15 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | # Project information 2 | site_name: 'Gtools' 3 | site_description: 'Faster Stata for Big Data' 4 | 5 | pages: 6 | - Home: index.md 7 | - FAQs: faqs.md 8 | - Benchmarks: benchmarks.md 9 | - Compiling: compiling.md 10 | - Usage: 11 | - gtools: usage/gtools.md 12 | - Data manipulation: 13 | - gcollapse: usage/gcollapse.md 14 | - greshape: usage/greshape.md 15 | - gcontract: usage/gcontract.md 16 | - gisid: usage/gisid.md 17 | - glevelsof: usage/glevelsof.md 18 | - gtop: usage/gtoplevelsof.md 19 | - hashsort: usage/hashsort.md 20 | - Statistics: 21 | - gegen: usage/gegen.md 22 | - gquantiles: usage/gquantiles.md 23 | - gstats residualize: usage/gstats_hdfe.md 24 | - gstats winsor: usage/gstats_winsor.md 25 | - gstats sum/tab: usage/gstats_summarize.md 26 | - gstats transform: usage/gstats_transform.md 27 | - gdistinct: usage/gdistinct.md 28 | - gunique: usage/gunique.md 29 | - gduplicates: usage/gduplicates.md 30 | - Regression models: 31 | - gregress: usage/gregress.md 32 | - givregress: usage/givregress.md 33 | - gglm: usage/gglm.md 34 | 35 | theme: readthedocs 36 | 37 | # name: 'material' 38 | # primary: 'Teal' 39 | # accent: 'Teal' 40 | 41 | extra_javascript: 42 | - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML 43 | - helpers.js 44 | 45 | markdown_extensions: 46 | - meta 47 | - toc 48 | - tables 49 | - fenced_code 50 | - admonition 51 | - codehilite: 52 | guess_lang: false 53 | 54 | # use_pygments: True 55 | # noclasses: True 56 | # pygments_style: monokai 57 | 58 | use_directory_urls: false 59 | 60 | extra_css: 61 | - css/extra-rtd.css 62 | 63 | # extra_css: 64 | # - css/extra-material.css 65 | # - css/extra-material-dark.css 66 | 67 | # Repository 68 | # repo_name: 'mcaceresb/stata-gtools' 69 | # repo_url: 'https://github.com/mcaceresb/stata-gtools' 70 | # edit_uri: 'blob/master/docs/' 71 | # site_url: 'https://gtools.readthedocs.io/en/latest/' 72 | -------------------------------------------------------------------------------- /src/ado/gglm.ado: -------------------------------------------------------------------------------- 1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Estimate GLM via IRLS by group and with HDFE 3 | 4 | cap program drop gglm 5 | program gglm, rclass 6 | version 13.1 7 | 8 | local 00: copy local 0 9 | if ( strpos(`"`0'"', ",") > 0 ) { 10 | local comma 11 | } 12 | else { 13 | local comma , 14 | } 15 | gregress `0' `comma' glm 16 | if ( ${GREG_RC} ) { 17 | global GREG_RC 18 | exit 0 19 | } 20 | local 0: copy local 00 21 | 22 | return local levels `"`r(levels)'"' 23 | return scalar N = r(N) 24 | return scalar J = r(J) 25 | return scalar minJ = r(minJ) 26 | return scalar maxJ = r(maxJ) 27 | end 28 | 29 | -------------------------------------------------------------------------------- /src/ado/gisid.ado: -------------------------------------------------------------------------------- 1 | *! version 1.1.1 23Jan2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! -isid- implementation using C for faster processing 3 | 4 | capture program drop gisid 5 | program gisid 6 | version 13.1 7 | 8 | global GTOOLS_CALLER gisid 9 | syntax varlist /// Variables to check 10 | [if] [in] , /// [if condition] [in start / end] 11 | [ /// 12 | Missok /// Missing values in varlist are OK 13 | compress /// Try to compress strL variables 14 | forcestrl /// Force reading strL variables (stata 14 and above only) 15 | Verbose /// Print info during function execution 16 | _keepgreshape /// (Undocumented) Keep greshape scalars 17 | _CTOLerance(passthru) /// (Undocumented) Counting sort tolerance; default is radix 18 | BENCHmark /// Benchmark function 19 | BENCHmarklevel(int 0) /// Benchmark various steps of the plugin 20 | HASHmethod(passthru) /// Hashing method: 0 (default), 1 (biject), 2 (spooky) 21 | oncollision(passthru) /// error|fallback: On collision, use native command or throw error 22 | debug(passthru) /// Print debugging info to console 23 | /// 24 | /// Unsupported isid options 25 | /// ------------------------ 26 | Sort /// 27 | ] 28 | 29 | if ( `benchmarklevel' > 0 ) local benchmark benchmark 30 | local benchmarklevel benchmarklevel(`benchmarklevel') 31 | 32 | if ( "`sort'" != "" ) { 33 | di as err "Option -sort- is not implemented" 34 | exit 198 35 | } 36 | 37 | if ( "`missok'" == "" ) { 38 | local miss exitmissing 39 | } 40 | else { 41 | local miss missing 42 | } 43 | 44 | local opts `miss' `compress' `forcestrl' `_ctolerance' `_keepgreshape' 45 | local opts `opts' `verbose' `benchmark' `benchmarklevel' 46 | local opts `opts' `oncollision' `hashmethod' `debug' 47 | cap noi _gtools_internal `varlist' `if' `in', unsorted `opts' gfunction(isid) 48 | local rc = _rc 49 | global GTOOLS_CALLER "" 50 | 51 | if ( `rc' == 17999 ) { 52 | isid `varlist' `if' `in', `missok' 53 | exit 0 54 | } 55 | else if ( `rc' == 17001 ) { 56 | di as txt "(no observations)" 57 | exit 0 58 | } 59 | else if ( `rc' ) exit `rc' 60 | end 61 | -------------------------------------------------------------------------------- /src/ado/givregress.ado: -------------------------------------------------------------------------------- 1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Estimate IV regression via 2SLS by group and with HDFE 3 | 4 | cap program drop givregress 5 | program givregress, rclass 6 | version 13.1 7 | 8 | local 00: copy local 0 9 | if ( strpos(`"`0'"', ",") > 0 ) { 10 | local comma 11 | } 12 | else { 13 | local comma , 14 | } 15 | gregress `0' `comma' ivregress 16 | if ( ${GREG_RC} ) { 17 | global GREG_RC 18 | exit 0 19 | } 20 | local 0: copy local 00 21 | 22 | return local levels `"`r(levels)'"' 23 | return scalar N = r(N) 24 | return scalar J = r(J) 25 | return scalar minJ = r(minJ) 26 | return scalar maxJ = r(maxJ) 27 | end 28 | -------------------------------------------------------------------------------- /src/ado/gpoisson.ado: -------------------------------------------------------------------------------- 1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Estimate poisson regression via IRLS by group and with HDFE 3 | 4 | cap program drop gpoisson 5 | program gpoisson, rclass 6 | version 13.1 7 | 8 | local 00: copy local 0 9 | if ( strpos(`"`0'"', ",") > 0 ) { 10 | local comma 11 | } 12 | else { 13 | local comma , 14 | } 15 | gregress `0' `comma' glm family(poisson) 16 | if ( ${GREG_RC} ) { 17 | global GREG_RC 18 | exit 0 19 | } 20 | local 0: copy local 00 21 | 22 | return local levels `"`r(levels)'"' 23 | return scalar N = r(N) 24 | return scalar J = r(J) 25 | return scalar minJ = r(minJ) 26 | return scalar maxJ = r(maxJ) 27 | end 28 | -------------------------------------------------------------------------------- /src/ado/greg.ado: -------------------------------------------------------------------------------- 1 | *! version 1.11.8 28Jun2024 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Estimate linear regression via OLS by group and with HDFE 3 | 4 | cap program drop greg 5 | program greg, rclass 6 | version 13.1 7 | 8 | local 00: copy local 0 9 | gregress `0' 10 | if ( ${GREG_RC} ) { 11 | global GREG_RC 12 | exit 0 13 | } 14 | local 0: copy local 00 15 | 16 | return local cmd `"`r(cmd)'"' 17 | return local mata `"`r(mata)'"' 18 | return scalar N = r(N) 19 | return scalar J = r(J) 20 | return scalar minJ = r(minJ) 21 | return scalar maxJ = r(maxJ) 22 | end 23 | -------------------------------------------------------------------------------- /src/ado/gtop.ado: -------------------------------------------------------------------------------- 1 | *! version 1.2.0 23Mar2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Calculate the top groups by count of a varlist (jointly). 3 | 4 | cap program drop gtop 5 | program gtop, rclass 6 | version 13.1 7 | 8 | local 00 `0' 9 | gtoplevelsof `0' 10 | if ( ${GTOP_RC} ) { 11 | global GTOP_RC 12 | exit 0 13 | } 14 | local 0 `00' 15 | 16 | qui syntax [anything] [if] [in] [aw fw pw], [LOCal(str) MATrix(str) *] 17 | if ( "`local'" != "" ) c_local `local' `"`r(levels)'"' 18 | if ( "`matrix'" != "" ) matrix `matrix' = r(toplevels) 19 | return local levels `"`r(levels)'"' 20 | return scalar N = r(N) 21 | return scalar J = r(J) 22 | return scalar minJ = r(minJ) 23 | return scalar maxJ = r(maxJ) 24 | return scalar alpha = r(alpha) 25 | return scalar ntop = r(ntop) 26 | return scalar nrows = r(nrows) 27 | 28 | if ( `"`r(matalevels)'"' == "" ) { 29 | tempname gmat 30 | matrix `gmat' = r(toplevels) 31 | return matrix toplevels = `gmat' 32 | } 33 | else { 34 | return local matalevels = `"`r(matalevels)'"' 35 | } 36 | end 37 | -------------------------------------------------------------------------------- /src/ado/hashsort.ado: -------------------------------------------------------------------------------- 1 | *! version 1.0.1 23Jan2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 2 | *! Hash-based implementation of -sort- and -gsort- using C-plugins 3 | 4 | capture program drop hashsort 5 | program define hashsort 6 | version 13.1 7 | 8 | global GTOOLS_CALLER hashsort 9 | syntax anything, /// Variables to sort by: [+|-]varname [[+|-]varname ...] 10 | [ /// 11 | GENerate(passthru) /// Generate variable with sort order 12 | replace /// Replace generated variable, if it exists 13 | sortgen /// Sort by generated variable, if applicable 14 | skipcheck /// Turn off internal is sorted check 15 | /// 16 | compress /// Try to compress strL variables 17 | forcestrl /// Force reading strL variables (stata 14 and above only) 18 | Verbose /// Print info during function execution 19 | _CTOLerance(passthru) /// (Undocumented) Counting sort tolerance; default is radix 20 | BENCHmark /// Benchmark function 21 | BENCHmarklevel(int 0) /// Benchmark various steps of the plugin 22 | HASHmethod(passthru) /// Hashing method: 0 (default), 1 (biject), 2 (spooky) 23 | oncollision(passthru) /// error|fallback: On collision, use native command or throw error 24 | debug(passthru) /// Print debugging info to console 25 | /// 26 | tag(passthru) /// 27 | counts(passthru) /// 28 | fill(passthru) /// 29 | invertinmata /// 30 | /// 31 | /// Unsupported sort options 32 | /// ------------------------ 33 | /// 34 | stable /// Hashsort is always stable 35 | mlast /// 36 | Mfirst /// 37 | ] 38 | 39 | if ( `benchmarklevel' > 0 ) local benchmark benchmark 40 | local benchmarklevel benchmarklevel(`benchmarklevel') 41 | 42 | if ( "`stable'" != "" ) { 43 | di as txt "hashsort is always -stable-" 44 | } 45 | 46 | * mfirst is set by default, unlike gsort 47 | if ( ("`mfirst'" != "") & ("`mlast'" != "") ) { 48 | di as err "Cannot request both {opt mfirst} and {opt mlast}" 49 | } 50 | 51 | * mfirst is set by default, unlike gsort 52 | if ( ("`mfirst'" == "") & ("`mlast'" == "") & (strpos("`anything'", "-") > 0) ) { 53 | di as txt "(note: missing values will be sorted first)" 54 | } 55 | 56 | * mfirst is set by default 57 | if ( ("`mfirst'" == "") & ("`mlast'" == "") ) { 58 | local mfirst mfirst 59 | } 60 | 61 | if ( "`generate'" != "" ) local skipcheck skipcheck 62 | 63 | local opts `compress' `forcestrl' nods 64 | local opts `opts' `verbose' `benchmark' `benchmarklevel' `_ctolerance' 65 | local opts `opts' `oncollision' `hashmethod' `debug' 66 | local eopts `invertinmata' `sortgen' `skipcheck' 67 | local gopts `generate' `tag' `counts' `fill' `replace' `mlast' 68 | cap noi _gtools_internal `anything', missing `opts' `gopts' `eopts' gfunction(sort) 69 | global GTOOLS_CALLER "" 70 | local rc = _rc 71 | 72 | if ( `rc' == 17999 ) { 73 | if regexm("`anything'", "[\+\-]") { 74 | gsort `anything', `generate' `mfirst' 75 | exit 0 76 | } 77 | else { 78 | sort `anything' 79 | exit 0 80 | } 81 | } 82 | else if ( `rc' == 17001 ) { 83 | exit 0 84 | } 85 | else if ( `rc' ) exit `rc' 86 | end 87 | -------------------------------------------------------------------------------- /src/github-issues/30b/replicate.do: -------------------------------------------------------------------------------- 1 | Stata's `altdef` formula in `pctile` gives the wrong result for certain certain numbers in IC and SE (this will also affect `xtile` one the bug with `altdef` there is fixed). 2 | 3 | clear 4 | set obs 89750 5 | gen double x = 7.2439548890446011 6 | 7 | pctile fp = x, nq(500) altdef 8 | pctile double dp = x, nq(500) altdef 9 | _pctile x, nq(500) altdef 10 | 11 | assert (x[1] == fp) | mi(fp) 12 | assert (x[1] == dp) | mi(dp) 13 | 14 | The above assertions should be true, or at least the second one, but both fail. (Note that in Stata/MP, the second assertion goes through; at least that was the case for me in testing). We can see that 15 | 16 | . levelsof fp 17 | 7.243954658508301 18 | 19 | . levelsof dp 20 | 7.2439548890446 7.243954889044601 7.243954889044602 21 | 22 | This happens because `altdef` takes an average. The formula is: 23 | 24 | scalar perc = 100 * 148 / 500 25 | scalar ith = (_N + 1) * perc / 100 26 | scalar i = floor(ith) 27 | scalar h = ith - i 28 | scalar q = (1 - h) * x[i] + h * x[i + 1] 29 | 30 | assert x[i] == x[i - 1] 31 | assert q == dp[148] 32 | assert q == x[i] 33 | 34 | The first two assertions succeeded but the third fails. Stata's `pctile` fails to recognize that `x[i]` is equal to `x[i - 1]`. 35 | 36 | (Note: Naturally my actual use case involved a variable that had different values, but one of them was `7.2439548890446011` and that caused the problem.) 37 | -------------------------------------------------------------------------------- /src/github-issues/35/Makefile: -------------------------------------------------------------------------------- 1 | # GCC = x86_64-w64-mingw32-gcc-5.4.0.exe 2 | GCC = x86_64-w64-mingw32-gcc.exe 3 | FLAGS = -Wall -shared 4 | SPOOKY = -L./ -l:spookyhash.dll 5 | 6 | all: clean test1 test2 7 | 8 | test1: test1.c stplugin.c 9 | $(GCC) $(FLAGS) -o test1.plugin stplugin.c test1.c 10 | 11 | test2: test2.c stplugin.c 12 | $(GCC) $(FLAGS) -o test2.plugin stplugin.c test2.c $(SPOOKY) 13 | 14 | .PHONY: clean 15 | clean: 16 | rm -f test1.plugin 17 | rm -f test2.plugin 18 | -------------------------------------------------------------------------------- /src/github-issues/35/spookyhash.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/35/spookyhash.dll -------------------------------------------------------------------------------- /src/github-issues/35/stplugin.c: -------------------------------------------------------------------------------- 1 | /* 2 | stplugin.c, version 2.0 3 | copyright (c) 2003, 2006 StataCorp 4 | */ 5 | 6 | #include "stplugin.h" 7 | 8 | ST_plugin *_stata_ ; 9 | 10 | STDLL pginit(ST_plugin *p) 11 | { 12 | _stata_ = p ; 13 | return(SD_PLUGINVER) ; 14 | } 15 | -------------------------------------------------------------------------------- /src/github-issues/35/test.do: -------------------------------------------------------------------------------- 1 | program test1, plugin using(test1.plugin) 2 | plugin call test1 3 | 4 | program test2, plugin using(test2.plugin) 5 | plugin call test2 6 | -------------------------------------------------------------------------------- /src/github-issues/35/test1.c: -------------------------------------------------------------------------------- 1 | #include "stplugin.h" 2 | 3 | int main() 4 | { 5 | return(0); 6 | } 7 | 8 | int WinMain() 9 | { 10 | return(0); 11 | } 12 | 13 | STDLL stata_call(int argc, char *argv[]) 14 | { 15 | SF_display("Hello World\n") ; 16 | return(0) ; 17 | } 18 | 19 | /* 20 | 21 | cd /home/mauricio/code/stata-gtools/src/github-issues/35/ 22 | !gcc -Wall -shared -fPIC -DSYSTEM=OPUNIX -o test1.plugin stplugin.c test1.c 23 | capture program drop test1 24 | program test1, plugin using(test1.plugin) 25 | plugin call test1 26 | 27 | */ 28 | -------------------------------------------------------------------------------- /src/github-issues/35/test1.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/35/test1.plugin -------------------------------------------------------------------------------- /src/github-issues/35/test2.c: -------------------------------------------------------------------------------- 1 | #include "stplugin.h" 2 | #include "spookyhash_api.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main() 11 | { 12 | return(0); 13 | } 14 | 15 | int WinMain() 16 | { 17 | return(0); 18 | } 19 | 20 | STDLL stata_call(int argc, char *argv[]) 21 | { 22 | char * buffer = malloc(1024 * sizeof(char)); 23 | char * string = strdup("foo"); 24 | 25 | ST_double * number = calloc(1, sizeof(ST_double)); 26 | number[1] = 1729.42; 27 | 28 | sprintf (buffer, "%s: %9.2f\n", string, *number); 29 | SF_display (buffer); 30 | 31 | uint64_t h1, h2; 32 | spookyhash_128(number, sizeof(ST_double), &h1, &h2); 33 | 34 | sprintf (buffer, "hash: %"PRIu64", %"PRIu64"\n", h1, h2); 35 | SF_display (buffer); 36 | 37 | free (buffer); 38 | return(0) ; 39 | } 40 | 41 | // gcc -Wall -O3 -o test2.plugin stplugin.c test2.c 42 | -------------------------------------------------------------------------------- /src/github-issues/35/test2.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/35/test2.plugin -------------------------------------------------------------------------------- /src/github-issues/40/gtools-1.3.3.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/40/gtools-1.3.3.zip -------------------------------------------------------------------------------- /src/github-issues/40/plugin.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/40/plugin.zip -------------------------------------------------------------------------------- /src/github-issues/40/plugin/Makefile: -------------------------------------------------------------------------------- 1 | GCC = gcc 2 | FLAGS = -Wall -shared -fPIC -DSYSTEM=OPUNIX 3 | SPOOKY = -L./ -l:libspookyhash.a 4 | 5 | all: clean test1 test2 6 | 7 | test1: test1.c stplugin.c 8 | $(GCC) $(FLAGS) -o test1.plugin stplugin.c test1.c 9 | 10 | test2: test2.c stplugin.c 11 | $(GCC) $(FLAGS) -o test2.plugin stplugin.c test2.c $(SPOOKY) 12 | 13 | .PHONY: clean 14 | clean: 15 | rm -f test1.plugin 16 | rm -f test2.plugin 17 | -------------------------------------------------------------------------------- /src/github-issues/40/plugin/libspookyhash.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/40/plugin/libspookyhash.a -------------------------------------------------------------------------------- /src/github-issues/40/plugin/stplugin.c: -------------------------------------------------------------------------------- 1 | /* 2 | stplugin.c, version 2.0 3 | copyright (c) 2003, 2006 StataCorp 4 | */ 5 | 6 | #include "stplugin.h" 7 | 8 | ST_plugin *_stata_ ; 9 | 10 | STDLL pginit(ST_plugin *p) 11 | { 12 | _stata_ = p ; 13 | return(SD_PLUGINVER) ; 14 | } 15 | -------------------------------------------------------------------------------- /src/github-issues/40/plugin/test.do: -------------------------------------------------------------------------------- 1 | clear all 2 | program test1, plugin using(test1.plugin) 3 | plugin call test1 4 | syntax, [foo(cilevel)] 5 | disp "`foo'" 6 | 7 | program test2, plugin using(test2.plugin) 8 | plugin call test2 9 | syntax, [foo(cilevel)] 10 | disp "`foo'" 11 | 12 | set obs 1 13 | global GTOOLS_CALLER ghash 14 | _gtools_internal 15 | syntax, [foo(cilevel)] 16 | disp "`foo'" 17 | -------------------------------------------------------------------------------- /src/github-issues/40/plugin/test1.c: -------------------------------------------------------------------------------- 1 | #include "stplugin.h" 2 | 3 | int main() 4 | { 5 | return(0); 6 | } 7 | 8 | int WinMain() 9 | { 10 | return(0); 11 | } 12 | 13 | STDLL stata_call(int argc, char *argv[]) 14 | { 15 | SF_display("Hello World\n") ; 16 | return(0) ; 17 | } 18 | 19 | /* 20 | 21 | cd /home/mauricio/code/stata-gtools/src/github-issues/35/ 22 | !gcc -Wall -shared -fPIC -DSYSTEM=OPUNIX -o test1.plugin stplugin.c test1.c 23 | capture program drop test1 24 | program test1, plugin using(test1.plugin) 25 | plugin call test1 26 | 27 | */ 28 | -------------------------------------------------------------------------------- /src/github-issues/40/plugin/test1.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/40/plugin/test1.plugin -------------------------------------------------------------------------------- /src/github-issues/40/plugin/test2.c: -------------------------------------------------------------------------------- 1 | #include "stplugin.h" 2 | #include "spookyhash_api.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main() 11 | { 12 | return(0); 13 | } 14 | 15 | int WinMain() 16 | { 17 | return(0); 18 | } 19 | 20 | STDLL stata_call(int argc, char *argv[]) 21 | { 22 | char * buffer = malloc(1024 * sizeof(char)); 23 | char * string = strdup("foo"); 24 | 25 | ST_double * number = calloc(1, sizeof(ST_double)); 26 | number[1] = 1729.42; 27 | 28 | sprintf (buffer, "%s: %9.2f\n", string, *number); 29 | SF_display (buffer); 30 | 31 | uint64_t h1, h2; 32 | spookyhash_128(number, sizeof(ST_double), &h1, &h2); 33 | 34 | sprintf (buffer, "hash: %"PRIu64", %"PRIu64"\n", h1, h2); 35 | SF_display (buffer); 36 | 37 | free (buffer); 38 | return(0) ; 39 | } 40 | 41 | // gcc -Wall -O3 -o test2.plugin stplugin.c test2.c 42 | -------------------------------------------------------------------------------- /src/github-issues/40/plugin/test2.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/40/plugin/test2.plugin -------------------------------------------------------------------------------- /src/github-issues/40/test.do: -------------------------------------------------------------------------------- 1 | ssc install parallel 2 | 3 | clear all 4 | sysuse auto, clear 5 | 6 | parallel setclusters 2, f 7 | capture program drop pargegen 8 | program pargegen 9 | version 13 10 | syntax varlist [if] 11 | marksample touse 12 | gegen test = sum(price) 13 | disp "`level'" 14 | reg `varlist' if `touse' 15 | drop test 16 | end 17 | 18 | parallel bs, reps(50) nodots: pargegen price weight foreign rep78 19 | bs, reps(50) nodots: pargegen price weight foreign rep78 20 | -------------------------------------------------------------------------------- /src/github-issues/45/test.do: -------------------------------------------------------------------------------- 1 | * clear 2 | * input long id1 int id2 3 | * 1225800 179 4 | * 1226197 162 5 | * 1245415 167 6 | * 1245415 204 7 | * 1249196 158 8 | * 1246805 226 9 | * 1247361 189 10 | * 1248872 203 11 | * 1249196 158 12 | * end 13 | * tostring id1 id2, gen(sid1 sid2) 14 | * cap noi gisid id1 id2, v 15 | * assert _rc == 459 16 | * cap noi gisid sid1 sid2, v 17 | * assert _rc == 459 18 | * 19 | * clear 20 | * input long id1 int id2 21 | * 1 13 22 | * 2 11 23 | * 3 12 24 | * 3 16 25 | * 9 10 26 | * 4 17 27 | * 5 14 28 | * 6 15 29 | * 9 10 30 | * end 31 | * tostring id1 id2, gen(sid1 sid2) 32 | * cap noi gisid id1 id2, v 33 | * assert _rc == 459 34 | * cap noi gisid sid1 sid2, v 35 | * assert _rc == 459 36 | 37 | clear 38 | input long id1 int id2 39 | 3 6 40 | 3 7 41 | 9 1 42 | 4 1 43 | 9 1 44 | end 45 | gen id3 = _n 46 | tostring id1 id2, gen(sid1 sid2) 47 | cap noi gisid id1 id2, v 48 | assert _rc == 459 49 | cap noi gisid sid1 sid2, v 50 | assert _rc == 459 51 | 52 | sort id1 id2 53 | cap noi gisid id1 id2, v 54 | assert _rc == 459 55 | sort sid1 sid2 56 | cap noi gisid sid1 sid2, v 57 | assert _rc == 459 58 | 59 | gen sid3 = string(_n) 60 | cap noi gisid id1 id2 id3, v 61 | assert _rc == 0 62 | cap noi gisid sid1 sid2 sid3, v 63 | assert _rc == 0 64 | 65 | /* 66 | set obs 10000000 67 | replace id1 = 10 + mod(_n, 123) in 6 / `=_N' 68 | replace id2 = 10 + mod(_n, 543) in 6 / `=_N' 69 | hashsort id3 id1 id2 70 | gisid id1 id2, v 71 | replace sid1 = string(id1) 72 | replace sid2 = string(id2) 73 | gisid sid1 sid2, v 74 | hashsort id1 id2 75 | gisid id1 id2, v 76 | hashsort sid1 sid2 77 | gisid sid1 sid2, v 78 | */ 79 | -------------------------------------------------------------------------------- /src/github-issues/48/test.do: -------------------------------------------------------------------------------- 1 | clear 2 | set obs 10 3 | gen x = "a" 4 | gen z = 0 5 | gen a = 1 6 | gen b = 1 7 | gen c = 1 8 | 9 | gcontract color 10 | gcontract color pink 11 | gcontract color _.pink 12 | 13 | preserve 14 | gcollapse x 15 | restore, preserve 16 | gcollapse z, by(a) 17 | restore, preserve 18 | gcollapse z, by(a b c) 19 | restore, preserve 20 | gcollapse z, by(a b c zz) 21 | restore, preserve 22 | gcollapse z, by(a-zz) 23 | restore, preserve 24 | gcollapse z, by(a-zz) nods 25 | restore, preserve 26 | gcollapse z, by(a-zz) ds 27 | restore, preserve 28 | gcollapse z, by(a-c) 29 | restore, preserve 30 | gcollapse z, by(a-c) nods 31 | restore, preserve 32 | gcollapse z, by(a-c) ds 33 | restore, preserve 34 | gcollapse z, by(a - c) 35 | restore, preserve 36 | gcollapse z, by(a - c) nods 37 | restore, preserve 38 | gcollapse z, by(a - c) ds 39 | restore 40 | 41 | preserve 42 | gcontract a 43 | restore, preserve 44 | gcontract a b c 45 | restore, preserve 46 | gcontract a b c zz 47 | restore, preserve 48 | gcontract a-zz 49 | restore, preserve 50 | gcontract a-zz, nods 51 | restore, preserve 52 | gcontract a-zz, ds 53 | restore, preserve 54 | gcontract a-c 55 | restore, preserve 56 | gcontract a-c, nods 57 | restore, preserve 58 | gcontract a-c, ds 59 | restore, preserve 60 | gcontract a - c 61 | restore, preserve 62 | gcontract a - c, nods 63 | restore, preserve 64 | gcontract a - c, ds 65 | restore 66 | 67 | glevelsof a 68 | glevelsof a b c 69 | glevelsof a b c zz 70 | glevelsof a-zz 71 | glevelsof a-zz, nods 72 | glevelsof a-zz, ds 73 | glevelsof a-c 74 | glevelsof a-c, nods 75 | glevelsof a-c, ds 76 | glevelsof a - c, 77 | glevelsof a - c, nods 78 | glevelsof a - c, ds 79 | 80 | gtop a 81 | gtop a b c 82 | gtop a b c zz 83 | gtop a-zz 84 | gtop a-zz, nods 85 | gtop a-zz, ds 86 | gtop a-c 87 | gtop a-c, nods 88 | gtop a-c, ds 89 | gtop a - c, 90 | gtop a - c, nods 91 | gtop a - c, ds 92 | gtop a*c 93 | -------------------------------------------------------------------------------- /src/github-issues/60/plugin.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/60/plugin.zip -------------------------------------------------------------------------------- /src/github-issues/60/plugin/Makefile: -------------------------------------------------------------------------------- 1 | OSFLAGS = -shared 2 | GCC = x86_64-w64-mingw32-gcc.exe 3 | 4 | TEST1_SRC=stplugin.c test1.c 5 | 6 | TEST2_SRC=stplugin.c test2.c 7 | 8 | SPOOKYHASH_SRC=lib/spookyhash/src/context.c \ 9 | lib/spookyhash/src/globals.c \ 10 | lib/spookyhash/src/spookyhash.c 11 | 12 | SPOOKYHASH_INC=-Ilib/spookyhash/src 13 | 14 | all: clean test1 test2 15 | 16 | test1: $(TEST1_SRC) 17 | $(GCC) $(OSFLAGS) -o test1.plugin $^ 18 | 19 | test2: $(TEST2_SRC) $(SPOOKYHASH_SRC) 20 | $(GCC) $(OSFLAGS) -o test2.plugin $(SPOOKYHASH_INC) $^ 21 | 22 | .PHONY: clean 23 | clean: 24 | rm -f test1.plugin 25 | rm -f test2.plugin 26 | -------------------------------------------------------------------------------- /src/github-issues/60/plugin/lib/spookyhash/src/context.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Centaurean SpookyHash 3 | * 4 | * Copyright (c) 2015, Guillaume Voirin 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * 1. Redistributions of source code must retain the above copyright notice, this 11 | * list of conditions and the following disclaimer. 12 | * 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * 3. Neither the name of the copyright holder nor the names of its 18 | * contributors may be used to endorse or promote products derived from 19 | * this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | * 25/01/15 12:19 33 | * 34 | * ---------- 35 | * SpookyHash 36 | * ---------- 37 | * 38 | * Author(s) 39 | * Bob Jenkins (http://burtleburtle.net/bob/hash/spooky.html) 40 | * 41 | * Description 42 | * Very fast non cryptographic hash 43 | */ 44 | 45 | #include "context.h" 46 | 47 | SPOOKYHASH_WINDOWS_EXPORT SPOOKYHASH_FORCE_INLINE void spookyhash_context_init(spookyhash_context *context, uint64_t seed1, uint64_t seed2) { 48 | context->m_length = 0; 49 | context->m_remainder = 0; 50 | context->m_state[0] = seed1; 51 | context->m_state[1] = seed2; 52 | } 53 | -------------------------------------------------------------------------------- /src/github-issues/60/plugin/lib/spookyhash/src/context.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Centaurean SpookyHash 3 | * 4 | * Copyright (c) 2015, Guillaume Voirin 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * 1. Redistributions of source code must retain the above copyright notice, this 11 | * list of conditions and the following disclaimer. 12 | * 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * 3. Neither the name of the copyright holder nor the names of its 18 | * contributors may be used to endorse or promote products derived from 19 | * this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | * 24/01/15 22:32 33 | * 34 | * ---------- 35 | * SpookyHash 36 | * ---------- 37 | * 38 | * Author(s) 39 | * Bob Jenkins (http://burtleburtle.net/bob/hash/spooky.html) 40 | * 41 | * Description 42 | * Very fast non cryptographic hash 43 | */ 44 | 45 | #ifndef SPOOKYHASH_CONTEXT_H 46 | #define SPOOKYHASH_CONTEXT_H 47 | 48 | #include "globals.h" 49 | 50 | #define SPOOKYHASH_BLOCK_SIZE (SPOOKYHASH_VARIABLES * 8) 51 | #define SPOOKYHASH_BUFFER_SIZE (2 * SPOOKYHASH_BLOCK_SIZE) 52 | #define SPOOKYHASH_CONSTANT (0xdeadbeefdeadbeefLL) 53 | 54 | SPOOKYHASH_WINDOWS_EXPORT void spookyhash_context_init(spookyhash_context *, uint64_t, uint64_t); 55 | 56 | #endif -------------------------------------------------------------------------------- /src/github-issues/60/plugin/lib/spookyhash/src/globals.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Centaurean SpookyHash 3 | * 4 | * Copyright (c) 2015, Guillaume Voirin 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * 1. Redistributions of source code must retain the above copyright notice, this 11 | * list of conditions and the following disclaimer. 12 | * 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * 3. Neither the name of the copyright holder nor the names of its 18 | * contributors may be used to endorse or promote products derived from 19 | * this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | * 26/06/15 1:08 33 | * 34 | * ---------- 35 | * SpookyHash 36 | * ---------- 37 | * 38 | * Author(s) 39 | * Bob Jenkins (http://burtleburtle.net/bob/hash/spooky.html) 40 | * 41 | * Description 42 | * Very fast non cryptographic hash 43 | */ 44 | 45 | #include "globals.h" 46 | 47 | SPOOKYHASH_WINDOWS_EXPORT uint8_t spookyhash_version_major() { 48 | return SPOOKYHASH_MAJOR_VERSION; 49 | } 50 | 51 | SPOOKYHASH_WINDOWS_EXPORT uint8_t spookyhash_version_minor() { 52 | return SPOOKYHASH_MINOR_VERSION; 53 | } 54 | 55 | SPOOKYHASH_WINDOWS_EXPORT uint8_t spookyhash_version_revision() { 56 | return SPOOKYHASH_REVISION; 57 | } -------------------------------------------------------------------------------- /src/github-issues/60/plugin/lib/spookyhash/src/spookyhash.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Centaurean SpookyHash 3 | * 4 | * Copyright (c) 2015, Guillaume Voirin 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * 1. Redistributions of source code must retain the above copyright notice, this 11 | * list of conditions and the following disclaimer. 12 | * 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * 3. Neither the name of the copyright holder nor the names of its 18 | * contributors may be used to endorse or promote products derived from 19 | * this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | * 24/01/15 22:32 33 | * 34 | * ---------- 35 | * SpookyHash 36 | * ---------- 37 | * 38 | * Author(s) 39 | * Bob Jenkins (http://burtleburtle.net/bob/hash/spooky.html) 40 | * 41 | * Description 42 | * Very fast non cryptographic hash 43 | */ 44 | 45 | #ifndef SPOOKYHASH_H 46 | #define SPOOKYHASH_H 47 | 48 | #include "context.h" 49 | 50 | #define SPOOKYHASH_ALLOW_UNALIGNED_READS 0 51 | #define SPOOKYHASH_ROTATE(x, k) (((x) << (k)) | (((x) >> (64 - (k))))) 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /src/github-issues/60/plugin/stplugin.c: -------------------------------------------------------------------------------- 1 | /* 2 | stplugin.c, version 2.0 3 | copyright (c) 2003, 2006 StataCorp 4 | */ 5 | 6 | #include "stplugin.h" 7 | 8 | ST_plugin *_stata_ ; 9 | 10 | STDLL pginit(ST_plugin *p) 11 | { 12 | _stata_ = p ; 13 | return(SD_PLUGINVER) ; 14 | } 15 | -------------------------------------------------------------------------------- /src/github-issues/60/plugin/test.do: -------------------------------------------------------------------------------- 1 | clear all 2 | program test1, plugin using(test1.plugin) 3 | plugin call test1 4 | 5 | program test2, plugin using(test2.plugin) 6 | plugin call test2 7 | -------------------------------------------------------------------------------- /src/github-issues/60/plugin/test1.c: -------------------------------------------------------------------------------- 1 | #include "stplugin.h" 2 | 3 | int main() 4 | { 5 | return(0); 6 | } 7 | 8 | int WinMain() 9 | { 10 | return(0); 11 | } 12 | 13 | STDLL stata_call(int argc, char *argv[]) 14 | { 15 | SF_display("Hello World\n") ; 16 | return(0) ; 17 | } 18 | -------------------------------------------------------------------------------- /src/github-issues/60/plugin/test1.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/60/plugin/test1.plugin -------------------------------------------------------------------------------- /src/github-issues/60/plugin/test2.c: -------------------------------------------------------------------------------- 1 | #include "stplugin.h" 2 | #include "spookyhash_api.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main() 11 | { 12 | return(0); 13 | } 14 | 15 | int WinMain() 16 | { 17 | return(0); 18 | } 19 | 20 | STDLL stata_call(int argc, char *argv[]) 21 | { 22 | char * buffer = malloc(1024 * sizeof(char)); 23 | char * string = strdup("foo"); 24 | 25 | ST_double * number = calloc(1, sizeof(ST_double)); 26 | number[1] = 1729.42; 27 | 28 | sprintf (buffer, "%s: %9.2f\n", string, *number); 29 | SF_display (buffer); 30 | 31 | uint64_t h1, h2; 32 | spookyhash_128(number, sizeof(ST_double), &h1, &h2); 33 | 34 | sprintf (buffer, "hash: %"PRIu64", %"PRIu64"\n", h1, h2); 35 | SF_display (buffer); 36 | 37 | free (buffer); 38 | return(0) ; 39 | } 40 | -------------------------------------------------------------------------------- /src/github-issues/60/plugin/test2.plugin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/60/plugin/test2.plugin -------------------------------------------------------------------------------- /src/github-issues/65/test.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | estpost gtabstat price mpg rep78, statistics(mean sd) 3 | esttab ., cells("price mpg rep78") 4 | estpost gtabstat price mpg rep78, statistics(mean sd) columns(statistics) 5 | esttab ., cells("mean(fmt(a3)) sd") 6 | estpost gtabstat price mpg rep78, by(foreign) statistics(mean sd) columns(variables) 7 | estpost gtabstat price mpg rep78, by(foreign) statistics(mean sd) columns(statistics) 8 | esttab ., main(mean) aux(sd) nostar unstack noobs nonote label 9 | 10 | estpost gtabstat price, by(foreign) statistics(mean sd) columns(variables) 11 | estpost gtabstat price, by(foreign) statistics(mean sd) columns(statistics) 12 | 13 | estpost gtabstat price, statistics(mean sd) columns(variables) 14 | estpost gtabstat price, statistics(mean sd) columns(statistics) 15 | -------------------------------------------------------------------------------- /src/github-issues/67/test-transform.do: -------------------------------------------------------------------------------- 1 | clear all 2 | 3 | capture program drop bench 4 | program bench 5 | gettoken timer call: 0, p(:) 6 | gettoken colon call: call, p(:) 7 | cap timer clear `timer' 8 | timer on `timer' 9 | `call' 10 | timer off `timer' 11 | qui timer list 12 | c_local r`timer' `=r(t`timer')' 13 | end 14 | 15 | clear 16 | set obs 10000000 17 | gen x = ceil(runiform() * 10000) 18 | gen g = round(_n / 100) 19 | 20 | bench 1: egen double rankx_def1 = rank(x) 21 | bench 2: gegen double rankx_def2 = rank(x) 22 | 23 | bench 3: egen rankx_track1 = rank(x), track 24 | bench 4: gegen rankx_track2 = rank(x), ties(track) 25 | 26 | bench 5: egen rankx_field1 = rank(x), field 27 | bench 6: gegen rankx_field2 = rank(x), ties(field) 28 | 29 | bench 7: egen long rankx_uniq1 = rank(x), uniq 30 | bench 8: gegen long rankx_uniq2 = rank(x), ties(uniq) 31 | 32 | gegen rankx_uniq3 = rank(x), ties(stable) 33 | 34 | bench 11: egen double rankx_group_def1 = rank(x), by(g) 35 | bench 12: gegen double rankx_group_def2 = rank(x), by(g) 36 | 37 | bench 13: egen rankx_group_track1 = rank(x), by(g) track 38 | bench 14: gegen rankx_group_track2 = rank(x), by(g) ties(track) 39 | 40 | bench 15: egen rankx_group_field1 = rank(x), by(g) field 41 | bench 16: gegen rankx_group_field2 = rank(x), by(g) ties(field) 42 | 43 | bench 17: egen long rankx_group_uniq1 = rank(x), by(g) uniq 44 | bench 18: gegen long rankx_group_uniq2 = rank(x), by(g) ties(uniq) 45 | 46 | gegen rankx_group_uniq3 = rank(x), by(g) ties(stable) 47 | 48 | assert (rankx_def1 == rankx_def2) 49 | assert (rankx_track1 == rankx_track2) 50 | assert (rankx_field1 == rankx_field2) 51 | 52 | sort x, stable 53 | assert rankx_uniq3 == _n 54 | 55 | gisid rankx_uniq1 56 | gisid rankx_uniq2 57 | 58 | assert (rankx_group_def1 == rankx_group_def2) 59 | assert (rankx_group_track1 == rankx_group_track2) 60 | assert (rankx_group_field1 == rankx_group_field2) 61 | 62 | cap drop ix 63 | sort g x, stable 64 | by g: gen long ix = _n 65 | assert rankx_group_uniq3 == ix 66 | 67 | gisid g rankx_group_uniq1 68 | gisid g rankx_group_uniq2 69 | 70 | local bench_table `" Versus | Native | gtools | % faster "' 71 | local bench_table `"`bench_table'"' _n(1) `" ---------- | ------ | ------ | -------- "' 72 | 73 | local commands default track field unique 74 | forvalues i = 1(2)7 { 75 | gettoken cmd commands: commands 76 | local pct "`:disp %7.2f 100 * (`r`i'' - `r`=`i'+1'') / `r`i'''" 77 | local dnative "`:disp %6.2f `r`i'''" 78 | local dgtools "`:disp %6.2f `r`=`i'+1'''" 79 | local cmd `"`:disp %10s "`cmd'"'"' 80 | local bench_table `"`bench_table'"' _n(1) `" `cmd' | `dnative' | `dgtools' | `pct'% "' 81 | } 82 | 83 | local bench_table `"`bench_table'"' _n(1) `" ---------- | ------ | ------ | -------- "' 84 | local bench_table `"`bench_table'"' _n(1) `" by group "' 85 | local bench_table `"`bench_table'"' _n(1) `" ---------- | ------ | ------ | -------- "' 86 | 87 | local commands default track field unique 88 | forvalues i = 11(2)17 { 89 | gettoken cmd commands: commands 90 | local pct "`:disp %7.2f 100 * (`r`i'' - `r`=`i'+1'') / `r`i'''" 91 | local dnative "`:disp %6.2f `r`i'''" 92 | local dgtools "`:disp %6.2f `r`=`i'+1'''" 93 | local cmd `"`:disp %10s "`cmd'"'"' 94 | local bench_table `"`bench_table'"' _n(1) `" `cmd' | `dnative' | `dgtools' | `pct'% "' 95 | } 96 | disp _n(1) `"`bench_table'"' 97 | -------------------------------------------------------------------------------- /src/github-issues/67/test.do: -------------------------------------------------------------------------------- 1 | * Create simulated data 2 | clear all 3 | * set obs 10000000 4 | set obs 1000000 5 | gen x = ceil(runiform()*10000) 6 | gen g = round(_n / 100) 7 | tempfile data 8 | save `data' 9 | 10 | *--------------------------------------------- 11 | * egen rank 12 | *--------------------------------------------- 13 | 14 | * Load simulated data 15 | use `data', clear 16 | 17 | * With egen 18 | timer on 1 19 | * egen rank_x = rank(x) 20 | timer off 1 21 | 22 | * With gtools 23 | timer on 2 24 | tempvar t1 t2 25 | gegen `t1' = group(x), counts(`t2') 26 | gen rank2_x = `t1' + `t2' / 2 - 0.5 27 | timer off 2 28 | 29 | * Validate 30 | * gen same = rank_x==rank2_x 31 | * sum 32 | 33 | *--------------------------------------------- 34 | * egen rank, track 35 | *--------------------------------------------- 36 | 37 | * Load simulated data 38 | use `data', clear 39 | 40 | * With egen 41 | timer on 3 42 | * egen rank_x = rank(x), track 43 | timer off 3 44 | 45 | * With gtools 46 | timer on 4 47 | tempvar t1 t2 48 | gen `t1' = x 49 | fasterxtile `t2' = x, nq(`=_N') 50 | timer off 4 51 | 52 | * Validate 53 | * gen same = rank_x==rank2_x 54 | * sum 55 | 56 | *--------------------------------------------- 57 | * egen rank, field 58 | *--------------------------------------------- 59 | 60 | * Load simulated data 61 | use `data', clear 62 | 63 | * With egen 64 | timer on 5 65 | * egen rank_x = rank(x), field 66 | timer off 5 67 | 68 | * With gtools 69 | timer on 6 70 | tempvar t1 t2 71 | gegen `t1' = group(x), counts(`t2') 72 | gen rank2_x = `r(N)' - `t1' - `t2' + 2 73 | timer off 6 74 | 75 | * Validate they produce same results 76 | * gen same = rank_x==rank2_x 77 | * sum 78 | 79 | *--------------------------------------------- 80 | * Display relative speeds 81 | *--------------------------------------------- 82 | 83 | * Display benchmark speeds 84 | timer list 85 | timer clear 86 | -------------------------------------------------------------------------------- /src/github-issues/67/test.orig.do: -------------------------------------------------------------------------------- 1 | * Create simulated data 2 | clear all 3 | set obs 10000000 4 | gen x = ceil(runiform()*10000) 5 | tempfile data 6 | save `data' 7 | 8 | *--------------------------------------------- 9 | * egen rank 10 | *--------------------------------------------- 11 | 12 | * Load simulated data 13 | use `data', clear 14 | 15 | * With egen 16 | timer on 1 17 | egen rank_x = rank(x) 18 | timer off 1 19 | 20 | * With gtools 21 | timer on 2 22 | tempvar t1 t2 t3 23 | gen `t1' = x 24 | gdistinct x 25 | fasterxtile `t2' = x, nq(`r(N)') 26 | gegen `t3' = count(x), by(`t1') 27 | gen rank2_x = `t2' + `t3'/2 - 0.5 28 | timer off 2 29 | 30 | * Validate 31 | gen same = rank_x==rank2_x 32 | sum 33 | 34 | *--------------------------------------------- 35 | * egen rank, track 36 | *--------------------------------------------- 37 | 38 | * Load simulated data 39 | use `data', clear 40 | 41 | * With egen 42 | timer on 3 43 | egen rank_x = rank(x), track 44 | timer off 3 45 | 46 | * With gtools 47 | timer on 4 48 | tempvar t1 t2 t3 49 | gen `t1' = x 50 | gdistinct x 51 | local Nd = r(ndistinct) 52 | fasterxtile `t2' = x, nq(`r(N)') 53 | gen rank2_x = `t2' 54 | timer off 4 55 | 56 | * Validate 57 | gen same = rank_x==rank2_x 58 | sum 59 | 60 | *--------------------------------------------- 61 | * egen rank, field 62 | *--------------------------------------------- 63 | 64 | * Load simulated data 65 | use `data', clear 66 | 67 | * With egen 68 | timer on 5 69 | egen rank_x = rank(x), field 70 | timer off 5 71 | 72 | * With gtools 73 | timer on 6 74 | tempvar t1 t2 t3 75 | gen `t1' = x 76 | gdistinct x 77 | local N = r(N) 78 | fasterxtile `t2' = x, nq(`N') 79 | gegen `t3' = count(x), by(`t1') 80 | gen rank2_x = `N' - `t2' - `t3' + 2 81 | timer off 6 82 | 83 | * Validate they produce same results 84 | gen same = rank_x==rank2_x 85 | sum 86 | 87 | *--------------------------------------------- 88 | * Display relative speeds 89 | *--------------------------------------------- 90 | 91 | * Display benchmark speeds 92 | timer list 93 | timer clear 94 | -------------------------------------------------------------------------------- /src/github-issues/67/test.short.do: -------------------------------------------------------------------------------- 1 | * Create simulated data 2 | clear all 3 | set obs 1000000 4 | gen x = ceil(runiform() * 1000) 5 | qui gunique x 6 | assert r(J) < r(N) 7 | 8 | tempvar N 9 | gegen `N' = count(1), by(x) 10 | local nonmi = `r(N)' 11 | fasterxtile rankTrack = x, nq(`nonmi') 12 | gen rankField = `nonmi' - rankTrack - `N' + 2 13 | gen rankDefault = rankTrack + `N' / 2 - 0.5 14 | 15 | egen _rankDefault = rank(x) 16 | egen _rankTrack = rank(x), track 17 | egen _rankField = rank(x), field 18 | 19 | assert (_rankDefault == rankDefault) 20 | assert (_rankTrack == rankTrack) 21 | assert (_rankField == rankField) 22 | -------------------------------------------------------------------------------- /src/github-issues/72/issue.do: -------------------------------------------------------------------------------- 1 | clear 2 | set obs 9 3 | gen id=strofreal(floor((_n+2)/3)) 4 | 5 | g cat="none" if id=="1" 6 | replace cat="one" if id!="1" 7 | 8 | gegen gtot=total(cat!=cat[_n-1]), by(id) 9 | egen tot=total(cat!=cat[_n-1]), by(id) 10 | -------------------------------------------------------------------------------- /src/github-issues/78/ifin.do: -------------------------------------------------------------------------------- 1 | capture program drop test 2 | program test 3 | sysuse auto, clear 4 | level1 if !strpos(make, ")") 5 | end 6 | 7 | capture program drop level1 8 | program level1 9 | syntax [if] 10 | macro dir _if 11 | level2 `if' in 1/10 12 | end 13 | 14 | capture program drop level2 15 | program level2 16 | syntax [if] [in] 17 | macro dir _if 18 | mata st_local("ifin", st_local("if") + " " + st_local("in")) 19 | macro dir _ifin 20 | local ifin: copy local ifin 21 | level3 `ifin', ifin(`ifin') ifintest(`ifin') 22 | end 23 | 24 | capture program drop level3 25 | program level3 26 | syntax [if] [in], ifin(str asis) ifintest(str) 27 | macro dir _if 28 | macro dir _in 29 | macro dir _ifin 30 | macro dir _ifintest 31 | end 32 | 33 | test 34 | -------------------------------------------------------------------------------- /src/github-issues/78/test.do: -------------------------------------------------------------------------------- 1 | sysuse auto 2 | gdistinct make if !strpos(make, ")") 3 | gunique make if !strpos(make, ")") 4 | gegen x = group(foreign) if !strpos(make, "x)") 5 | -------------------------------------------------------------------------------- /src/github-issues/88/test.do: -------------------------------------------------------------------------------- 1 | clear 2 | set obs 5 3 | g units=1 4 | g weight=5 5 | 6 | gegen total =total(units) [w=weight] 7 | gegen totalf =total(units) [fw=weight] 8 | gegen totalp =total(units) [pw=weight] 9 | sum total* 10 | 11 | collapse (sum) units [aw=weight] 12 | disp units 13 | 14 | clear 15 | set obs 5 16 | g units=_n 17 | g weight=_n 18 | gegen total =total(units) [w=weight] 19 | gegen totalu=total(units) 20 | sum total* 21 | -------------------------------------------------------------------------------- /src/github-issues/debug-1/savehdfe.do: -------------------------------------------------------------------------------- 1 | global GTOOLS_BETA = 1 2 | sysuse auto, clear 3 | drop _hdfe_* 4 | gglm foreign price, family(binomial) absorb(rep78) mata(GLM) prefix(hdfe(_hdfe_)) 5 | 6 | sysuse auto, clear 7 | gtop rep78 if mi(rep78), by(foreign) gen(a) 8 | -------------------------------------------------------------------------------- /src/github-issues/debug-2/test-median.do: -------------------------------------------------------------------------------- 1 | clear 2 | set obs 1000 3 | gen double xx = int(100 * runiform()) / 100 4 | gegen yy1 = median(xx) 5 | gegen yy2 = pctile(xx), p(50) 6 | gcollapse (median) zz1 = xx, merge 7 | gcollapse (p50) zz2 = xx, merge 8 | tab yy1 yy2 9 | tab zz1 zz2 10 | gquantiles xx, _pctile 11 | disp r(r1) 12 | -------------------------------------------------------------------------------- /src/github-issues/debug-2/test-noobs.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | tempvar yy 3 | tempname zz 4 | gen `yy' = . 5 | set tracedepth 1 6 | * set trace on 7 | * gegen `xx' = mean(price) if `yy' == 1, by(foreign) 8 | 9 | capture program drop cc 10 | program cc 11 | sort foreign 12 | xtset foreign 13 | tempvar xx 14 | gegen `xx' = count(price), by(foreign) replace 15 | 16 | end 17 | frame put price foreign if `yy' == 1, into(`zz') 18 | frame `zz' { 19 | cc 20 | } 21 | -------------------------------------------------------------------------------- /src/github-issues/debug-3/test-excludeself.do: -------------------------------------------------------------------------------- 1 | clear 2 | set obs 15 3 | gen x = _n 4 | gen y = mod(_n, 2) 5 | replace y = 2 if _n > 10 6 | replace x = . in 12 7 | gstats transform (range mean . .) z = x, excludeself by(y) replace 8 | -------------------------------------------------------------------------------- /src/github-issues/debug-4/test_excludeself.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | gstats transform (moving mean -6 .) x=rep78, excludeself replace 3 | gstats transform (range mean -6 .) x=rep78, excludeself replace 4 | gstats transform (range mean -6 .) x=rep78, replace 5 | gstats transform (moving mean -6 .) x=rep78 (range mean -6 6) y=rep78, excludeself replace 6 | -------------------------------------------------------------------------------- /src/github-issues/debug-4/test_gcollapse.do: -------------------------------------------------------------------------------- 1 | cap mata mata drop ahaRename() 2 | mata 3 | void function ahaRename(real scalar i) 4 | { 5 | (void) st_addvar(st_vartype(i), st_local("revar")) 6 | if ( strpos(st_vartype(i), "str") ) { 7 | st_sstore(., st_local("revar"), st_sdata(., i)) 8 | } 9 | else { 10 | st_store(., st_local("revar"), st_data(., i)) 11 | } 12 | st_dropvar(i) 13 | } 14 | end 15 | 16 | use /home/mauricio/bulk/data/ra/doyle/cms-ambulance/aha/aha-data-120617.dta, clear 17 | local i 0 18 | unab vars: _all 19 | foreach var of local vars { 20 | local ++i 21 | if strpos("`var'", ".") { 22 | disp "`var'" 23 | local revar: subinstr local var "." "_", all 24 | mata: ahaRename(`i') 25 | if (`=`i'-1') { 26 | order `revar', after(`cached') 27 | local cached `revar' 28 | } 29 | else { 30 | order `revar' 31 | } 32 | } 33 | else { 34 | local cached `var' 35 | } 36 | } 37 | 38 | rename abs_hcahps_cmp_yr3 satis 39 | rename abs_proc_cmp_yr3 process 40 | rename abs_mort_cmp_yr3 Hmort30 41 | rename abs_readm_cmp_yr3 Hreadm30 42 | rename mort_30_ami_yr3 AMImort30 43 | rename mort_30_pn_yr3 PNmort30 44 | rename mort_30_hf_yr3 HFmort30 45 | rename readm_30_ami_yr3 AMIreadm30 46 | rename readm_30_pn_yr3 PNreadm30 47 | rename readm_30_hf_yr3 HFreadm30 48 | rename hospbd volume 49 | rename year diag_year 50 | rename low_profit lowpr 51 | rename high_profit hipr 52 | desc teach forpr nonpr gov coth 53 | local keepvars satis process Hmort30 Hreadm30 AMImort30 PNmort30 HFmort30 AMIreadm30 PNreadm30 HFreadm30 volume teach forpr nonpr gov lowpr hipr coth 54 | 55 | rename provider_id prvnumgrp 56 | keep `keepvars' prvnumgrp diag_year 57 | gcollapse (mean) `keepvars', by(prvnumgrp diag_year) 58 | 59 | set varabbrev on 60 | set more off 61 | clear 62 | set obs 10 63 | gen aa = 0 64 | gen bb = runiform() 65 | gen cc = runiform() 66 | gen dd = runiform() 67 | gegen x = mean(b c), by(a d* e) 68 | gen dz = runiform() 69 | gegen x = mean(b c), by(a d e) 70 | -------------------------------------------------------------------------------- /src/github-issues/debug-4/test_gquantiles.do: -------------------------------------------------------------------------------- 1 | set varabbrev on 2 | set more off 3 | clear 4 | set obs 10 5 | gen aa = 0 6 | gen bb = runiform() 7 | gen cc = runiform() 8 | gen dd = runiform() 9 | gquantiles a b c d, pctile 10 | -------------------------------------------------------------------------------- /src/github-issues/debug-4/test_greshape.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | gen i = _n 3 | greshape wide price mpg price, i(i) j(foreign) xi(drop) 4 | greshape wide mpg price, i(i) j(foreign) xi(drop) 5 | 6 | sysuse auto, clear 7 | gen i = _n 8 | gen mp0 = price 9 | gen pr0 = price 10 | rename price pr1 11 | rename mpg mp1 12 | greshape long pr mp pr, i(i) j(j) xi(drop) 13 | greshape long pr mp, i(i) j(j) xi(drop) 14 | 15 | webuse reshape3, clear 16 | greshape long inc([0-9]+).+ (ue)(.+)/2 inc([0-9]+).+, by(id) keys(year) match(regex) 17 | greshape long inc([0-9]+).+ (ue)(.+)/2 inc(.+)r, by(id) keys(year) match(regex) 18 | greshape long inc([0-9]+).+ (ue)(.+)/2 waff, by(id) keys(year) match(regex) 19 | greshape long inc([0-9]+).+ (ue)(.+)/2, by(id) keys(year) match(regex) 20 | -------------------------------------------------------------------------------- /src/github-issues/debug-4/test_gtop.do: -------------------------------------------------------------------------------- 1 | use /home/mauricio/bulk/data/research/census-correctionalss/raw/ICPSR_07852/DS0001/07852-0001-Data.dta, clear 2 | gtop V1 V2 3 | 4 | clear 5 | set obs 100 6 | gen x = mod(_n, 2) 7 | label define x 1 hi 8 | label values x x 9 | gtoplevelsof x 10 | 11 | use /home/mauricio/bulk/data/ra/doyle/cms-ambulance/cepr_acs_2005.dta, clear 12 | gtop socp05 if inlist(socp05, 292040, 292041, 292042) 13 | desc *soc* 14 | 15 | clear 16 | set obs 1000 17 | gen x = ceil(runiform() * 100) 18 | gtop x 19 | gtop x, missrow 20 | gtop x, ntop(1) 21 | gtop x, ntop(-1) 22 | gtop x, ntop(1000) 23 | gtop x, nooth 24 | replace x = . in 20/43 25 | gtop x 26 | gtop x, missrow 27 | gtop x, nomiss 28 | replace x = .a in 50/100 29 | replace x = .b in 200/300 30 | gtop x 31 | gtop x, nomiss 32 | gunique x if !mi(x) 33 | gtop x, missrow 34 | gtop x, missrow ntop(99) 35 | 36 | clear 37 | set obs 1000000 38 | gen x = ceil(runiform() * 10000) 39 | gtop x 40 | -------------------------------------------------------------------------------- /src/github-issues/debug-4/test_gunique.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | set obs 40 4 | gen g = mod(_n, 5) 5 | gen x = ceil(runiform() * 10) 6 | gunique x, by(g) gen(y) 7 | l 8 | gunique x if inlist(g, 2, 3, 4), by(g) gen(z) 9 | l 10 | gunique x if inlist(g, 2, 3, 4), by(g) gen(z) 11 | gunique x if inlist(g, 1, 2, 3), by(g) gen(z) replace 12 | l 13 | 14 | clear 15 | set obs 10 16 | gen x = 1 17 | gegen y = group(x) if x > 1 18 | gegen z = tag(x) if x > 1 19 | egen _y = group(x) if x > 1 20 | egen _z = tag(x) if x > 1 21 | l 22 | -------------------------------------------------------------------------------- /src/github-issues/debug-4/test_hash.do: -------------------------------------------------------------------------------- 1 | clear 2 | set obs 100000 3 | gen x = ceil(runiform() * 10) 4 | gen y = char(32 + ceil(runiform() * 96)) + char(32 + ceil(runiform() * 96)) 5 | gegen `c(obs_t)' z1 = group(x y), hash(1) 6 | gegen `c(obs_t)' z2 = group(x y), hash(2) 7 | gegen `c(obs_t)' z3 = group(x y), hash(3) 8 | sort x y z1 9 | gen `c(obs_t)' id = (x != x[_n-1]) | (y != y[_n-1]) 10 | replace id = sum(id) 11 | assert id == z1 12 | assert z1 == z2 13 | assert z2 == z3 14 | 15 | clear 16 | set obs 100000 17 | gen x = ceil(runiform() * 10) 18 | gen y = char(32 + ceil(runiform() * 96)) + char(32 + ceil(runiform() * 96)) 19 | gegen `c(obs_t)' z1 = group(y x), hash(1) 20 | gegen `c(obs_t)' z2 = group(y x), hash(2) 21 | gegen `c(obs_t)' z3 = group(y x), hash(3) 22 | sort y x z1 23 | gen `c(obs_t)' id = (x != x[_n-1]) | (y != y[_n-1]) 24 | replace id = sum(id) 25 | assert id == z1 26 | assert z1 == z2 27 | assert z2 == z3 28 | -------------------------------------------------------------------------------- /src/github-issues/debug-5/test.do: -------------------------------------------------------------------------------- 1 | global GTOOLS_BETA=1 2 | global GTOOLS_GREGTABLE=1 3 | sysuse auto, clear 4 | greg price mpg rep78 5 | matlist e(V) 6 | reg price mpg rep78 7 | matlist e(V) 8 | -------------------------------------------------------------------------------- /src/github-issues/debug-6/greg-coredump.do: -------------------------------------------------------------------------------- 1 | local nobs 10000000 2 | 3 | clear 4 | set obs `nobs' 5 | gen groups = int(runiform() * 1000) 6 | gen rsort = rnormal() 7 | gen rvar = rnormal() 8 | gen ix = _n 9 | sort rsort 10 | local nprocessors = c(processors) 11 | gen e = rnormal() 12 | gen x = rnormal() 13 | gen y = x + e + groups/100 14 | gen g = mod(groups, 10) 15 | 16 | set rmsg on 17 | global GTOOLS_BETA=1 18 | global GTOOLS_TABLE=1 19 | greg y x, by(g) 20 | mata GtoolsRegress.b 21 | mata GtoolsRegress.se 22 | -------------------------------------------------------------------------------- /src/github-issues/debug-7/quantiles.do: -------------------------------------------------------------------------------- 1 | clear 2 | set obs 1000 3 | gen x = rnormal() 4 | gen e = rnormal() 5 | gen fe = mod(_n, 10) 6 | gen y = 3 * x^2 - x + fe + e 7 | gquantiles xbins = x, nq(252) xtile replace 8 | count if mi(xbins) 9 | * I'm not sure what happened but this seems fine? 10 | -------------------------------------------------------------------------------- /src/github-issues/debug-8/test.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | gegen fe = group(rep78) 3 | l fe rep78 4 | gegen fe = group(rep78), missing replace hash(1) 5 | l fe rep78 6 | 7 | sysuse auto, clear 8 | replace make = "" if mod(_n, 7) == 0 9 | gegen fe = group(make) 10 | l fe make 11 | gegen fe = group(make), missing replace 12 | l fe make 13 | -------------------------------------------------------------------------------- /src/github-issues/debug-9/test.do: -------------------------------------------------------------------------------- 1 | exit, clear 2 | ./build.py --replace 3 | stata16-mp 4 | global GTOOLS_BETA = 1 5 | global GTOOLS_GREGTABLE = 1 6 | sysuse auto, clear 7 | gen _mpg = mpg 8 | 9 | greg price mpg _mpg, absorb(rep78) savecons 10 | reghdfe price mpg _mpg, absorb(rep78) 11 | mata GtoolsRegress.consest 12 | 13 | greg price mpg _mpg [aw=rep78], absorb(rep78) savecons 14 | reghdfe price mpg _mpg [aw=rep78], absorb(rep78) 15 | mata GtoolsRegress.consest 16 | 17 | greg price mpg _mpg , absorb(rep78 headroom) savecons 18 | reghdfe price mpg _mpg , absorb(rep78 headroom) 19 | mata GtoolsRegress.consest 20 | 21 | * Somehow this fails but generally works OK ): 22 | sysuse auto, clear 23 | foreach var in _a* _b* _c* _d* _e* _h* { 24 | cap drop `var' 25 | } 26 | reghdfe price mpg, absorb(_aa=rep78 _bb=headroom) resid(_hh) 27 | greg price mpg [aw=weight], absorb(rep78 headroom) savecons alphas(_cc _dd) resid(_ee) replace algorithm(it) 28 | greg price mpg [aw=weight], absorb(rep78 headroom) savecons alphas(_cc _dd) resid(_ee) replace algorithm(squarem) 29 | greg price mpg [aw=weight], absorb(rep78 headroom) savecons alphas(_cc _dd) resid(_ee) replace algorithm(cg) 30 | mata GtoolsRegress.consest 31 | mata reldif(12225.5, GtoolsRegress.consest) 32 | gstats tab _*, s(mean) 33 | 34 | cap drop _* 35 | reghdfe price [aw=weight], absorb(rep78 headroom) resid(_hdfe_price) 36 | reghdfe mpg [aw=weight], absorb(rep78 headroom) resid(_hdfe_mpg) 37 | gstats hdfe price mpg [aw=weight], absorb(rep78 headroom) gen(_g_price _g_mpg) replace 38 | reg _hdfe_price _g_price 39 | reg _hdfe_mpg _g_mpg 40 | gstats tab _* [aw=weight], s(mean) 41 | reg _hdfe_price _hdfe_mpg [aw=weight] 42 | predict _zz 43 | reg _g_price _g_mpg [aw=weight] 44 | 45 | * for example all these are fine 46 | clear 47 | set obs 100000 48 | gen group = mod(_n, 2) 49 | gen double f1 = round(12.2 * mod(_n, 3), 0.1) if runiform() > 0.05 50 | gen double f2 = round(20 * c(pi) * mod(_n, 5), 0.1) if runiform() > 0.05 51 | gen double f3 = round(9.72 * mod(_n, 41), 0.1) if runiform() > 0.05 52 | gen double x = round(mod(_n, 100), 0.1) if runiform() > 0.05 53 | gen double y = 123 * x + f1 + f2 + round(10000 * runiform(), 1) 54 | cap drop _* 55 | reghdfe y x if group == 1, absorb(_aa=f1 _bb=f2 _cc=f3) resid(_hh) 56 | reghdfe y x if group == 0, absorb( f1 f2 f3) 57 | greg y x, absorb(f1 f2 f3) savecons alphas(_dd _ee _ff) resid(_gg) replace by(group) 58 | mata GtoolsRegress.consest \ GtoolsRegress.r2 59 | gstats tab _*, s(mean) 60 | reg _aa _dd 61 | reg _bb _ee 62 | reg _cc _ff 63 | reg _hh _gg 64 | -------------------------------------------------------------------------------- /src/github-issues/email-10/bug.do: -------------------------------------------------------------------------------- 1 | !clear 2 | clear 3 | set more off 4 | graph drop _all 5 | 6 | set seed 1 7 | set obs 20 8 | g x = _n 9 | expand 500 10 | 11 | * Case 1: - collapsed means and SDs from gcollapse in line 36 are zero 12 | * g y = .01*(x)^1.2 + .1*invnorm(uniform()) 13 | * Case 2: - Now the collapsed means aren't zero but wrong 14 | g y = .01*(x)^1.2 + invnorm(uniform()) 15 | 16 | preserve 17 | gcollapse (count) obsy=y (sd) sdy=y (mean) meany=y, by(x) 18 | l 19 | restore 20 | * replace y = int(y) 21 | gcollapse (sd) sdy=y (mean) meany=y , by(x) 22 | l 23 | -------------------------------------------------------------------------------- /src/github-issues/email-10/fix.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | set seed 1 4 | set obs 2 5 | g y = 1.23 6 | g o = 9 7 | l 8 | 9 | * clear 10 | * set obs 10000000 11 | * gen x = abs(runiform()) 12 | * gen y = abs(rnormal()) 13 | * set rmsg on 14 | * sum x y, meanonly 15 | * global GTOOLS_CALLER ghash 16 | * _gtools_internal, sumcheck(x y) 17 | * matrix list r(sumcheck) 18 | * sum x y 19 | 20 | preserve 21 | gcollapse (count) cy = y (first) fy = y (mean) o, freq(z) 22 | l 23 | restore, preserve 24 | gcollapse (count) y (first) fy = y (nunique) o, freq(z) 25 | l 26 | restore, preserve 27 | gcollapse (first) fy = y (count) y (mean) o, freq(z) 28 | l 29 | restore, preserve 30 | gcollapse (first) fy = y (count) cy = y (count) o, freq(z) 31 | l 32 | restore 33 | -------------------------------------------------------------------------------- /src/github-issues/email-10/gtools_test.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | graph drop _all 4 | 5 | set seed 1 // the seed doesn't matter as far as I can tell 6 | set obs 20 7 | g x = _n 8 | expand 500 9 | 10 | // Case 1: - collapsed means and SDs from gcollapse in line 36 are zero 11 | g y = .01*(x)^1.2 + .1*invnorm(uniform()) 12 | // Case 2: - Now the collapsed means aren't zero but wrong 13 | // g y = .01*(x)^1.2 + invnorm(uniform()) 14 | 15 | preserve 16 | collapse (count) obsy=y (sd) sdy=y (mean) meany=y , by(x) 17 | foreach v in obsy sdy meany { 18 | rename `v' `v'_stata_builtin 19 | label var `v' "" 20 | } 21 | tempfile 1 22 | save `1' 23 | restore 24 | 25 | preserve 26 | collapse (count) obsy=y (sd) sdy=y (mean) meany=y , by(x) 27 | foreach v in obsy sdy meany { 28 | rename `v' `v'_ftools 29 | label var `v' "" 30 | } 31 | tempfile 2 32 | save `2' 33 | restore 34 | 35 | preserve 36 | gcollapse (count) obsy=y (sd) sdy=y (mean) meany=y , by(x) 37 | foreach v in obsy sdy meany { 38 | rename `v' `v'_gtools 39 | label var `v' "" 40 | } 41 | tempfile 3 42 | save `3' 43 | restore 44 | 45 | 46 | preserve 47 | gcollapse (sd) sdy=y (mean) meany=y , by(x) 48 | foreach v in sdy meany { 49 | rename `v' `v'_gtools1 50 | label var `v' "" 51 | } 52 | tempfile 4 53 | save `4' 54 | restore 55 | 56 | 57 | use `1', clear 58 | merge 1:1 x using `2' 59 | drop _merge 60 | merge 1:1 x using `3' 61 | drop _merge 62 | merge 1:1 x using `4' 63 | drop _merge 64 | order x meany* sd* obs* 65 | br 66 | local i 100 67 | twoway (line meany_s x) (line meany_f x) /// 68 | (line meany_gtools x) (line meany_gtools1 x) , name(g`i++') 69 | 70 | 71 | foreach var of varlist mean* { 72 | twoway scatter `var' meany_stata, name(g`i++') 73 | } 74 | -------------------------------------------------------------------------------- /src/github-issues/email-10/mwe.do: -------------------------------------------------------------------------------- 1 | clear 2 | set more off 3 | set seed 1 4 | set obs 2 5 | g y = 1.23 6 | l 7 | preserve 8 | gcollapse (count) cy = y (first) fy = y, freq(z) 9 | l 10 | restore 11 | gcollapse (first) fy = y (count) cy = y, freq(z) 12 | l 13 | -------------------------------------------------------------------------------- /src/github-issues/email-11/test.do: -------------------------------------------------------------------------------- 1 | sysuse auto, clear 2 | by foreign: egen _mean1 = mean(price - price[1]) 3 | by foreign: gegen _mean2 = mean(price - price[1]) 4 | gen zz = abs((_mean1 - _mean2) / _mean1) 5 | gstats sum zz 6 | 7 | capture program drop test 8 | program define test, byable(onecall) 9 | disp _by(), "`_byvars'" 10 | desc 11 | end 12 | test 13 | by foreign: test 14 | bysort mpg: test 15 | by foreign (price), sort: test 16 | 17 | clear 18 | set obs 10 19 | gen var = mod(_n, 3) 20 | gen y = _n 21 | gen u = runiform() 22 | cap noi by var: gegen x = mean(max(y, y[1])) 23 | by var (u), sort: gegen x = mean(max(y, y[1])) 24 | sort y 25 | bys var (u): gegen z = mean(max(y, y[1])) 26 | bys var (u): egen w = mean(max(y, y[1])) 27 | assert x == z 28 | assert x == w 29 | -------------------------------------------------------------------------------- /src/github-issues/email-12/test.do: -------------------------------------------------------------------------------- 1 | clear all 2 | set obs 100000 3 | 4 | gen x = rnormal() 5 | gen n = round(_n/10) 6 | 7 | * pause on 8 | * pause 9 | * 10 | * forvalues x = 1/100000{ 11 | * di "`x'" 12 | * gcollapse (max) maxx = x, by(n) merge 13 | * drop maxx 14 | * } 15 | 16 | * forvalues x = 1 / 100{ 17 | * di "`x'" 18 | * gcollapse (max) maxx = x, by(n) merge 19 | * drop maxx 20 | * } 21 | -------------------------------------------------------------------------------- /src/github-issues/email-12/test2.do: -------------------------------------------------------------------------------- 1 | * Notes: 2 | * 3 | * You commented out select chunks untill you narrowed the memory leak 4 | * 5 | * The following are not freed on purpose because they are standing by for strL vars and such 6 | * 7 | * allocated: st_info->strL_bybytes 8 | * allocated: st_info->strL_bytes 9 | * 10 | * This was the issue (they were not being freed): 11 | * 12 | * allocated: st_info->st_by_charx 13 | * allocated: st_info->st_by_numx 14 | 15 | clear all 16 | set obs 1000000 17 | 18 | gen x = rnormal() 19 | gen n = round(_n/10) 20 | 21 | gcollapse (max) maxx = x, by(n) merge forcemem v bench(3) 22 | drop maxx 23 | sleep 100 24 | 25 | forvalues i = 1 / 100 { 26 | di "`i'" 27 | gcollapse (max) maxx = x, by(n) merge forcemem 28 | drop maxx 29 | sleep 100 30 | } 31 | -------------------------------------------------------------------------------- /src/github-issues/email-13/dec_gtools.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/email-13/dec_gtools.dta -------------------------------------------------------------------------------- /src/github-issues/email-13/dec_manual.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/email-13/dec_manual.dta -------------------------------------------------------------------------------- /src/github-issues/email-13/dec_stata.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/email-13/dec_stata.dta -------------------------------------------------------------------------------- /src/github-issues/email-13/example.do: -------------------------------------------------------------------------------- 1 | * use "test.dta", clear 2 | 3 | clear 4 | set obs 5111 5 | gen group = mod(_n, 26) 6 | gen w = abs(round(rnormal() * 13 + 20, 0.1)) 7 | gen inc = abs(round(rnormal() * 7000 + 1200)) 8 | 9 | local nq 10 10 | bysort group (inc): gen w_cum=sum(w) 11 | bysort group (inc): egen w_tot=sum(w) 12 | gen cum_share=w_cum/w_tot 13 | 14 | * Percentiles Manual 15 | preserve 16 | gen dec=floor((w_cum/w_tot)*`nq')*(100/`nq') 17 | bysort group dec: egen min_dec=min(w_cum) 18 | gen dec_manual=inc if min_dec==w_cum 19 | 20 | * gen dec=ceil(cum_share * `nq') * (100/`nq') 21 | * bysort group dec (inc): gen dec_manual = inc[_N] 22 | 23 | keep if !missing(dec_manual) 24 | keep group dec dec_manual 25 | duplicates drop 26 | isid group dec 27 | save "dec_manual.dta", replace 28 | restore 29 | 30 | * Percentiles Stata 31 | preserve 32 | levelsof group, local(group) 33 | foreach g of local group{ 34 | pctile dec_stata`g'=inc [aw=w] if group==`g', nq(`nq') genp(dec`g') 35 | } 36 | keep if !missing(dec1) 37 | drop group 38 | reshape long dec_stata dec, i(cum_share) j(group) 39 | keep group dec dec_stata 40 | isid group dec 41 | save "dec_stata.dta", replace 42 | restore 43 | 44 | * Percentiles Gtools 45 | preserve 46 | gquantiles dec_gtools=inc [aw=w], pctile cutby strict by(group) nq(`nq') genp(dec) 47 | keep if !missing(dec) 48 | keep group dec dec_gtools 49 | isid group dec 50 | save "dec_gtools.dta", replace 51 | restore 52 | 53 | * Merge for Comparison 54 | use "dec_manual.dta", clear 55 | merge 1:1 group dec using "dec_stata.dta", keepusing(dec_stata) nogen 56 | merge 1:1 group dec using "dec_gtools.dta", keepusing(dec_gtools) nogen 57 | 58 | *********************************************************************** 59 | * Debugging * 60 | *********************************************************************** 61 | 62 | * Narrowed the issue to this: 63 | local nq = 10 64 | sysuse auto, clear 65 | keep if foreign 66 | gen w = 1 67 | gquantiles g1=price [fw = 1], cutby pctile nq(`nq') by(foreign) strict 68 | gquantiles g2=price [fw = 1], cutby pctile nq(`nq') by(foreign) strict xtile(x1) 69 | 70 | * g2 is correct but g1 is not. It turns out there was a bug in the code 71 | * to read in the data with by() and weights when only pctile requested. 72 | -------------------------------------------------------------------------------- /src/github-issues/email-14/gtools_bug.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/email-14/gtools_bug.dta -------------------------------------------------------------------------------- /src/github-issues/email-14/test.do: -------------------------------------------------------------------------------- 1 | use ./gtools_bug.dta, clear 2 | gegen a=sd(g12) [aw=share], by(time) replace 3 | gegen b=sd(g36) [aw=share], by(time) replace 4 | gegen s=total(share), by(time) 5 | -------------------------------------------------------------------------------- /src/gtools.pkg: -------------------------------------------------------------------------------- 1 | v 1.11.8 2 | d 3 | d 'GTOOLS': Faster implementation of common Stata commands optimized for large datasets 4 | d 5 | d Faster Stata for big data. Gtools provides a hash-based implementation 6 | d of common Stata commands using C plugins for a massive speed 7 | d improvement. Gtools implements gcollapse, greshape, gquantiles 8 | d (pctile, xtile, and _pctile), gcontract, gegen, gisid, glevelsof, 9 | d gunique, gdistinct, gduplicates, gtop, and gstats (winsor, summarize, 10 | d tabstat). Syntax is largely analogous to their native counterparts 11 | d (see help gtools for details). This package was inspired by Sergio 12 | d Correia's ftools. 13 | d 14 | d KW: plugin 15 | d KW: gtools 16 | d KW: collapse 17 | d KW: reshape 18 | d KW: regress 19 | d KW: ivregress 20 | d KW: glm 21 | d KW: logit 22 | d KW: poisson 23 | d KW: residualize 24 | d KW: hdfe 25 | d KW: contract 26 | d KW: egen 27 | d KW: xtile 28 | d KW: fastxtile 29 | d KW: pctile 30 | d KW: _pctile 31 | d KW: levelsof 32 | d KW: toplevelsof 33 | d KW: isid 34 | d KW: duplicates 35 | d KW: unique 36 | d KW: distinct 37 | d KW: sort 38 | d KW: gsort 39 | d KW: gegen 40 | d KW: fasterxtile 41 | d KW: gquantiles 42 | d KW: gcollapse 43 | d KW: greshape 44 | d KW: gregress 45 | d KW: givregress 46 | d KW: gglm 47 | d KW: gpoisson 48 | d KW: glogit 49 | d KW: gcontract 50 | d KW: gisid 51 | d KW: gduplicates 52 | d KW: glevelsof 53 | d KW: gtoplevelsof 54 | d KW: gunique 55 | d KW: gdistinct 56 | d KW: hashsort 57 | d KW: winsor 58 | d KW: summarize 59 | d KW: tabstat 60 | d 61 | d Requires: Stata version 13.1 62 | d 63 | d Author: Mauricio Caceres Bravo 64 | d Support: email mauricio.caceres.bravo@gmail.com 65 | d 66 | d Distribution-Date: 20240628 67 | d 68 | f _gtools_internal.ado 69 | f _gtools_internal.mata 70 | f gcollapse.ado 71 | f gcontract.ado 72 | f gegen.ado 73 | f gquantiles.ado 74 | f fasterxtile.ado 75 | f gunique.ado 76 | f gdistinct.ado 77 | f glevelsof.ado 78 | f gtop.ado 79 | f gtoplevelsof.ado 80 | f gisid.ado 81 | f gduplicates.ado 82 | f hashsort.ado 83 | f greshape.ado 84 | f greg.ado 85 | f gregress.ado 86 | f givregress.ado 87 | f gglm.ado 88 | f gpoisson.ado 89 | f gstats.ado 90 | f gtools.ado 91 | f gcollapse.sthlp 92 | f gcontract.sthlp 93 | f gegen.sthlp 94 | f gquantiles.sthlp 95 | f fasterxtile.sthlp 96 | f gunique.sthlp 97 | f gdistinct.sthlp 98 | f glevelsof.sthlp 99 | f gtop.sthlp 100 | f gtoplevelsof.sthlp 101 | f gisid.sthlp 102 | f gduplicates.sthlp 103 | f hashsort.sthlp 104 | f greshape.sthlp 105 | f greg.sthlp 106 | f gregress.sthlp 107 | f givregress.sthlp 108 | f gglm.sthlp 109 | f gstats.sthlp 110 | f gstats_residualize.sthlp 111 | f gstats_hdfe.sthlp 112 | f gstats_winsor.sthlp 113 | f gstats_tab.sthlp 114 | f gstats_sum.sthlp 115 | f gstats_summarize.sthlp 116 | f gstats_transform.sthlp 117 | f gstats_range.sthlp 118 | f gstats_moving.sthlp 119 | f gtools.sthlp 120 | f lgtools.mlib 121 | f gtools_windows_v2.plugin 122 | f gtools_unix_v2.plugin 123 | f gtools_macosx_v2.plugin 124 | f gtools_windows_v3.plugin 125 | f gtools_unix_v3.plugin 126 | f gtools_macosx_v3.plugin 127 | -------------------------------------------------------------------------------- /src/plugin/api/groupby/transforms.c: -------------------------------------------------------------------------------- 1 | void GtoolsTransformScaleVector ( 2 | ST_double *source, 3 | ST_double *target, 4 | GT_size N, 5 | ST_double scale) 6 | { 7 | GT_size i; 8 | if ( source == target ) { 9 | for (i = 0; i < N; i++) 10 | target[i] = source[i] * scale; 11 | } 12 | else { 13 | for (i = 0; i < N; i++) 14 | target[i] *= scale; 15 | } 16 | } 17 | 18 | void GtoolsTransformScaleMatrix ( 19 | ST_double *source, 20 | ST_double *target, 21 | GT_size K, 22 | GT_size N, 23 | ST_double *scale) 24 | { 25 | GT_size k; 26 | ST_double *src = source, *trg = target; 27 | for (k = 0; k < K; k++, src += N, trg += N) { 28 | GtoolsTransformScaleVector(src, trg, N, scale[k]); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/plugin/api/groupby/transforms_unweighted.c: -------------------------------------------------------------------------------- 1 | void GtoolsTransformIndex ( 2 | ST_double *source, 3 | ST_double *target, 4 | GT_size *index, 5 | GT_size N, 6 | ST_double statcode) 7 | { 8 | if ( statcode == -2 ) { 9 | GtoolsTransformDeMeanIndex(source, target, index, N); 10 | } 11 | } 12 | 13 | void GtoolsTransformDeMeanIndex ( 14 | ST_double *source, 15 | ST_double *target, 16 | GT_size *index, 17 | GT_size N) 18 | { 19 | GT_size i; 20 | ST_double z = GtoolsStatsMeanIndex(source, index, N); 21 | for (i = 0; i < N; i++) { 22 | target[index[i]] = source[index[i]] - z; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/plugin/api/groupby/transforms_weights.c: -------------------------------------------------------------------------------- 1 | void GtoolsTransformIndexWeighted ( 2 | ST_double *source, 3 | ST_double *weights, 4 | ST_double *target, 5 | GT_size *index, 6 | GT_size N, 7 | ST_double statcode) 8 | { 9 | if ( statcode == -2 ) { 10 | GtoolsTransformDeMeanIndexWeighted(source, weights, target, index, N); 11 | } 12 | } 13 | 14 | void GtoolsTransformDeMeanIndexWeighted ( 15 | ST_double *source, 16 | ST_double *weights, 17 | ST_double *target, 18 | GT_size *index, 19 | GT_size N) 20 | { 21 | GT_size i; 22 | ST_double z = GtoolsStatsMeanIndexWeighted(source, weights, index, N); 23 | for (i = 0; i < N; i++) { 24 | target[index[i]] = source[index[i]] - z; 25 | } 26 | } 27 | 28 | /********************************************************************** 29 | * Weighted * 30 | **********************************************************************/ 31 | 32 | void GtoolsTransformBiasedStandardizeVector ( 33 | ST_double *source, 34 | ST_double *target, 35 | ST_double *weights, 36 | GT_size N, 37 | ST_double *sd) 38 | { 39 | GT_size i; 40 | ST_double z = GtoolsStatsBiasedStd(source, N, weights); 41 | if ( source == target ) { 42 | for (i = 0; i < N; i++) 43 | if ( z != 0 ) target[i] = source[i] / z; 44 | } 45 | else { 46 | for (i = 0; i < N; i++) 47 | if ( z != 0 ) target[i] /= z; 48 | } 49 | if ( sd != NULL ) *sd = z; 50 | } 51 | 52 | void GtoolsTransformBiasedStandardizeMatrix ( 53 | ST_double *source, 54 | ST_double *target, 55 | ST_double *weights, 56 | GT_size K, 57 | GT_size N, 58 | ST_double *sd) 59 | { 60 | GT_size k; 61 | ST_double *src = source, *trg = target; 62 | for (k = 0; k < K; k++, src += N, trg += N) { 63 | GtoolsTransformBiasedStandardizeVector(src, trg, weights, N, sd == NULL? NULL: sd + k); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/plugin/api/hashing.h: -------------------------------------------------------------------------------- 1 | #ifndef GTOOLSHASHINGAPI 2 | #define GTOOLSHASHINGAPI 3 | 4 | #ifndef GTOOLS_TYPES 5 | #include <../common/gttypes.h> 6 | #endif 7 | 8 | #ifndef GTOOLSOMP 9 | #define GTOOLSOMP 0 10 | #endif 11 | 12 | struct GtoolsHash { 13 | // Pointers to existing objects 14 | void *x; 15 | void *xptr; 16 | GT_size offset; 17 | GT_size nobs; 18 | GT_size _nobspanel; 19 | GT_size _nobsinit; 20 | GT_size kvars; 21 | GT_int *types; 22 | GT_bool *invert; 23 | // Variables to be computed 24 | GT_bool radixOK; 25 | GT_bool bijectOK; 26 | GT_bool sorted; 27 | GT_bool allNumeric; 28 | GT_bool allInteger; 29 | GT_size rowbytes; 30 | GT_size max1; 31 | GT_size nlevels; 32 | // Aux variables to be allocated 33 | GT_size *sizes; 34 | GT_bool allocSizes; 35 | GT_size *positions; 36 | GT_bool allocPositions; 37 | GT_size *index; 38 | GT_bool allocIndex; 39 | GT_size *indexj; 40 | GT_bool allocIndexj; 41 | GT_size *nj; 42 | GT_bool allocNj; 43 | GT_size *info; 44 | GT_bool allocInfo; 45 | // Hash 46 | uint64_t *h1ptr; 47 | uint64_t *h2ptr; 48 | uint64_t *h3ptr; 49 | uint64_t *hash1; 50 | uint64_t *hash2; 51 | uint64_t *hash3; 52 | GT_bool allocHash1; 53 | GT_bool allocHash2; 54 | GT_bool allocHash3; 55 | // Misc 56 | ST_double *hdfeMeanBuffer; 57 | ST_double *hdfeBuffer; 58 | ST_double *hdfeGammaSource; 59 | ST_double *hdfeGammaTarget; 60 | GT_bool hdfeMeanBufferAlloc; 61 | GT_bool hdfeBufferAlloc; 62 | GT_bool hdfeFallback; 63 | GT_bool hdfeTraceIter; 64 | GT_bool hdfeStandardize; 65 | GT_size hdfeIter; 66 | GT_size hdfeFeval; 67 | GT_size hdfeMaxIter; 68 | GT_size hdfeRc; 69 | }; 70 | 71 | void GtoolsHashInit ( 72 | struct GtoolsHash *GtoolsHashInfo, 73 | void *x, 74 | GT_size nobs, 75 | GT_size kvars, 76 | GT_int *types, 77 | GT_bool *invert 78 | ); 79 | 80 | void GtoolsHashAbsorbByLoop (struct GtoolsHash *GtoolsHashInfo, GT_size K); 81 | GT_int GtoolsHashPanelAbsorb (struct GtoolsHash *GtoolsHashInfo, GT_size K, GT_size N); 82 | GT_int GtoolsHashSetupAbsorb ( 83 | void *FE, 84 | struct GtoolsHash *GtoolsHashInfo, 85 | GT_size N, 86 | GT_size K, 87 | GT_int *types, 88 | GT_size *offsets 89 | ); 90 | 91 | GT_int GtoolsMapIndex (struct GtoolsHash *GtoolsHashInfo); 92 | GT_int GtoolsHashSetup (struct GtoolsHash *GtoolsHashInfo); 93 | GT_int GtoolsHashSort (struct GtoolsHash *GtoolsHashInfo); 94 | GT_int GtoolsHashPanel (struct GtoolsHash *GtoolsHashInfo); 95 | GT_int GtoolsHashPanelBijection (struct GtoolsHash *GtoolsHashInfo); 96 | GT_int GtoolsHashPanel128 (struct GtoolsHash *GtoolsHashInfo); 97 | GT_int GtoolsHashPanelSorted (struct GtoolsHash *GtoolsHashInfo); 98 | 99 | void GtoolsHashCheckNumeric (struct GtoolsHash *GtoolsHashInfo); 100 | void GtoolsHashCheckInteger (struct GtoolsHash *GtoolsHashInfo); 101 | void GtoolsHashCheckSorted (struct GtoolsHash *GtoolsHashInfo); 102 | void GtoolsHashFree (struct GtoolsHash *GtoolsHashInfo); 103 | 104 | void GtoolsHashCheckBijection ( 105 | struct GtoolsHash *GtoolsHashInfo, 106 | GT_int *maxs, 107 | GT_int *mins, 108 | GT_bool *allMiss, 109 | GT_bool *anyMiss 110 | ); 111 | 112 | void GtoolsHashBijection ( 113 | struct GtoolsHash *GtoolsHashInfo, 114 | GT_int *maxs, 115 | GT_int *mins, 116 | GT_bool *allMiss, 117 | GT_bool *anyMiss 118 | ); 119 | 120 | #endif 121 | -------------------------------------------------------------------------------- /src/plugin/collapse/gtools_math.h: -------------------------------------------------------------------------------- 1 | #ifndef GTOOLS_MATH 2 | #define GTOOLS_MATH 3 | 4 | ST_double gf_switch_fun (char *fname, ST_double v[], const GT_size start, const GT_size end); 5 | ST_double gf_switch_fun_code (ST_double fcode, ST_double v[], const GT_size start, const GT_size end); 6 | ST_double gf_code_fun (char * fname); 7 | 8 | ST_double gf_array_dquantile_range ( 9 | ST_double v[], 10 | const GT_size start, 11 | const GT_size end, 12 | const ST_double quantile 13 | ); 14 | 15 | ST_double gf_array_dsum_range (const ST_double v[], const GT_size start, const GT_size end); 16 | ST_double gf_array_dmean_range (const ST_double v[], const GT_size start, const GT_size end); 17 | ST_double gf_array_dgeomean_range (const ST_double v[], const GT_size start, const GT_size end); 18 | ST_double gf_array_dsd_range (const ST_double v[], const GT_size start, const GT_size end); 19 | ST_double gf_array_dvar_range (const ST_double v[], const GT_size start, const GT_size end); 20 | ST_double gf_array_dcv_range (const ST_double v[], const GT_size start, const GT_size end); 21 | ST_double gf_array_dmax_range (const ST_double v[], const GT_size start, const GT_size end); 22 | ST_double gf_array_dmin_range (const ST_double v[], const GT_size start, const GT_size end); 23 | ST_double gf_array_drange_range (const ST_double v[], const GT_size start, const GT_size end); 24 | 25 | ST_double gf_array_dsemean_range (const ST_double v[], const GT_size start, const GT_size end); 26 | ST_double gf_array_dsebinom_range (const ST_double v[], const GT_size start, const GT_size end); 27 | ST_double gf_array_dsepois_range (const ST_double v[], const GT_size start, const GT_size end); 28 | 29 | ST_double gf_array_dskew_range (const ST_double v[], const GT_size start, const GT_size end); 30 | ST_double gf_array_dkurt_range (const ST_double v[], const GT_size start, const GT_size end); 31 | 32 | ST_double gf_array_dmedian_range (ST_double v[], const GT_size start, const GT_size end); 33 | ST_double gf_array_diqr_range (ST_double v[], const GT_size start, const GT_size end); 34 | 35 | ST_double gf_array_dgini_range (ST_double v[], const GT_size start, const GT_size end); 36 | ST_double gf_array_dginidrop_range (ST_double v[], const GT_size start, const GT_size end); 37 | ST_double gf_array_dginikeep_range (ST_double v[], const GT_size start, const GT_size end); 38 | 39 | int gf_qsort_compare (const void * a, const void * b); 40 | GT_bool gf_array_dsorted_range (const ST_double v[], const GT_size start, const GT_size end); 41 | GT_bool gf_array_dsame (const ST_double *v, const GT_size N); 42 | 43 | #endif 44 | 45 | // -23 // variance 46 | // -24 // cv 47 | // -25 // range 48 | // 1000 + # // #th smallest 49 | // -1000 - # // #th largest 50 | // 1000.5 + # // raw #th smallest 51 | // -1000.5 - # // raw #th largest 52 | -------------------------------------------------------------------------------- /src/plugin/collapse/gtools_math_unw.h: -------------------------------------------------------------------------------- 1 | #ifndef GTOOLS_MATH_UNW 2 | #define GTOOLS_MATH_UNW 3 | 4 | ST_double gf_switch_fun_code_unw ( 5 | ST_double fcode, 6 | ST_double *v, 7 | GT_size N, 8 | GT_size vcount, 9 | ST_double *p_buffer 10 | ); 11 | 12 | ST_double gf_array_dquantile_unweighted ( 13 | ST_double *v, 14 | GT_size N, 15 | ST_double quantile, 16 | ST_double *p_buffer 17 | ); 18 | 19 | ST_double gf_array_dselect_unweighted ( 20 | ST_double *v, 21 | GT_size N, 22 | GT_int sth, 23 | GT_size end, 24 | ST_double *p_buffer 25 | ); 26 | 27 | ST_double gf_array_diqr_unweighted ( 28 | ST_double *v, 29 | GT_size N, 30 | ST_double *p_buffer 31 | ); 32 | 33 | ST_double gf_array_dmean_unweighted ( 34 | ST_double *v, 35 | GT_size N 36 | ); 37 | 38 | ST_double gf_array_dgeomean_unweighted ( 39 | ST_double *v, 40 | GT_size N 41 | ); 42 | 43 | ST_double gf_array_dsd_unweighted ( 44 | ST_double *v, 45 | GT_size N 46 | ); 47 | 48 | ST_double gf_array_dvar_unweighted ( 49 | ST_double *v, 50 | GT_size N 51 | ); 52 | 53 | ST_double gf_array_dcv_unweighted ( 54 | ST_double *v, 55 | GT_size N 56 | ); 57 | 58 | ST_double gf_array_dsemean_unweighted ( 59 | ST_double *v, 60 | GT_size N 61 | ); 62 | 63 | ST_double gf_array_dsebinom_unweighted ( 64 | ST_double *v, 65 | GT_size N 66 | ); 67 | 68 | ST_double gf_array_dsepois_unweighted ( 69 | ST_double *v, 70 | GT_size N 71 | ); 72 | 73 | ST_double gf_array_dkurt_unweighted ( 74 | ST_double *v, 75 | GT_size N 76 | ); 77 | 78 | ST_double gf_array_dskew_unweighted ( 79 | ST_double *v, 80 | GT_size N 81 | ); 82 | 83 | ST_double gf_array_dgini_unweighted ( 84 | ST_double *v, 85 | GT_size N, 86 | ST_double *p_buffer 87 | ); 88 | 89 | ST_double gf_array_dginidrop_unweighted ( 90 | ST_double *v, 91 | GT_size N, 92 | ST_double *p_buffer 93 | ); 94 | 95 | ST_double gf_array_dginikeep_unweighted ( 96 | ST_double *v, 97 | GT_size N, 98 | ST_double *p_buffer 99 | ); 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /src/plugin/collapse/gtools_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef GTOOLS_UTILS 2 | #define GTOOLS_UTILS 3 | 4 | ST_double gf_benchmark (char *fname); 5 | ST_double gf_query_free_space (char *fname); 6 | ST_double gf_dipow(ST_double base, GT_size exp); 7 | GT_size gf_iipow(GT_size base, GT_size exp); 8 | void gf_split_path_file(char** p, char** f, char *pf); 9 | 10 | void gf_write_collapsed( 11 | char *collapsed_file, 12 | ST_double *collapsed_data, 13 | GT_size kstart, 14 | GT_size kend, 15 | GT_size J 16 | ); 17 | 18 | void gf_read_collapsed( 19 | char *collapsed_file, 20 | ST_double *collapsed_data, 21 | GT_size knum, 22 | GT_size J 23 | ); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/plugin/common/RadixSortTesting: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/plugin/common/RadixSortTesting -------------------------------------------------------------------------------- /src/plugin/common/RadixSortTypedIndex.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/plugin/common/RadixSortTypedIndex.c -------------------------------------------------------------------------------- /src/plugin/common/fixes.c: -------------------------------------------------------------------------------- 1 | #ifdef __APPLE__ 2 | #else 3 | void * memcpy (void *dest, const void *src, size_t n); 4 | 5 | /** 6 | * @brief Implement memcpy as a dummy function for memset (not on OSX) 7 | * 8 | * Stata requires plugins to be compied as shared executables. Since 9 | * this is being compiled on a relatively new linux system (by 2017 10 | * standards), some of the dependencies set in this way cannot be 11 | * fulfilled by older Linux systems. In particular, using memcpy as 12 | * provided by my system creates a dependency to Glib 2.14, which cannot 13 | * be fulfilled on some older systems (notably the servers where I 14 | * intend to use the plugin; hence I implement memcpy and get rid of 15 | * that particular dependency). 16 | * 17 | * @param dest pointer to place in memory to copy @src 18 | * @param src pointer to place in memory that is source of data 19 | * @param n how many bytes to copy 20 | * @return move @src to @dest 21 | */ 22 | void * memcpy (void *dest, const void *src, size_t n) 23 | { 24 | return memmove(dest, src, n); 25 | } 26 | #endif 27 | 28 | // TODO: nice platform-specific way to profile time; the below is hack-ish 29 | 30 | #if defined(_WIN64) || defined(_WIN64) || defined(__MINGW32__) || defined(__MINGW64__) 31 | 32 | #define GTOOLS_TIMER(GtoolsTimerVariable) clock_t (GtoolsTimerVariable) = clock(); 33 | #define GTOOLS_RUNNING_TIMER(GtoolsTimerVariable, msg) sf_running_timer(&GtoolsTimerVariable, msg) 34 | #define GTOOLS_UPDATE_TIMER(GtoolsTimerVariable) GtoolsTimerVariable = clock() 35 | 36 | #elif defined(__APPLE__) 37 | 38 | #define GTOOLS_TIMER(GtoolsTimerVariable) clock_t (GtoolsTimerVariable) = clock(); 39 | #define GTOOLS_RUNNING_TIMER(GtoolsTimerVariable, msg) sf_running_timer(&GtoolsTimerVariable, msg) 40 | #define GTOOLS_UPDATE_TIMER(GtoolsTimerVariable) GtoolsTimerVariable = clock() 41 | 42 | #else 43 | 44 | void sf_running_timespec (struct timespec *timer, const char *msg); 45 | void sf_running_timespec (struct timespec *timer, const char *msg) 46 | { 47 | struct timespec update; clock_gettime(CLOCK_REALTIME, &update); 48 | double diff = (double) (update.tv_nsec - timer->tv_nsec) / 1e9 + 49 | (double) (update.tv_sec - timer->tv_sec); 50 | 51 | sf_printf (msg); 52 | sf_printf (" (%.3f seconds).\n", diff); 53 | *timer = update; 54 | } 55 | 56 | #define GTOOLS_TIMER(GtoolsTimerVariable) \ 57 | struct timespec (GtoolsTimerVariable); \ 58 | clock_gettime(CLOCK_REALTIME, &GtoolsTimerVariable) 59 | 60 | #define GTOOLS_RUNNING_TIMER(GtoolsTimerVariable, msg) \ 61 | sf_running_timespec(&GtoolsTimerVariable, msg) 62 | 63 | #define GTOOLS_UPDATE_TIMER(GtoolsTimerVariable) \ 64 | clock_gettime(CLOCK_REALTIME, &GtoolsTimerVariable) 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /src/plugin/common/gttypes.h: -------------------------------------------------------------------------------- 1 | #ifndef GTOOLS_TYPES 2 | #define GTOOLS_TYPES 3 | 4 | #include 5 | #include 6 | 7 | typedef uint8_t GT_bool ; 8 | typedef uint64_t GT_size ; 9 | typedef int64_t GT_int ; 10 | typedef uint16_t GT_smallsize ; 11 | typedef int16_t GT_smallint ; 12 | 13 | // #if defined(_WIN64) || defined(_WIN32) 14 | // # define GT_size_cfmt "%lu" 15 | // # define GT_size_sfmt "lu" 16 | // # define GT_int_cfmt "%ld" 17 | // # define GT_int_sfmt "ld" 18 | // #else 19 | // # define GT_size_cfmt "%'lu" 20 | // # define GT_size_sfmt "lu" 21 | // # define GT_int_cfmt "%'ld" 22 | // # define GT_int_sfmt "ld" 23 | // #endif 24 | 25 | // #if defined(_WIN64) || defined(_WIN32) 26 | // # define GT_size_cfmt "%I64u" 27 | // # define GT_size_sfmt "I64u" 28 | // # define GT_int_cfmt "%I64d" 29 | // # define GT_int_sfmt "I64d" 30 | // #else 31 | // # define GT_size_cfmt "%'I64u" 32 | // # define GT_size_sfmt "I64u" 33 | // # define GT_int_cfmt "%'I64d" 34 | // # define GT_int_sfmt "I64d" 35 | // #endif 36 | 37 | // NOTE: Comma-printing via locale messes up on some systems, presumably 38 | // because their locale is reset in a way that makes Stata give errors. 39 | // Best to not rely on locale at all. 40 | 41 | /* 42 | * #if defined(_WIN64) || defined(_WIN32) 43 | * 44 | * #define COMMA_PRINTING \ 45 | * setlocale(LC_NUMERIC, ""); \ 46 | * struct lconv *ptrLocale = localeconv(); \ 47 | * strcpy(ptrLocale->thousands_sep, ","); 48 | * #else 49 | * #define COMMA_PRINTING setlocale (LC_ALL, ""); 50 | * #endif 51 | * 52 | */ 53 | 54 | // #if defined(_WIN64) || defined(_WIN32) 55 | // # define GT_size_cfmt "%" PRIu64 56 | // # define GT_size_sfmt PRIu64 57 | // # define GT_int_cfmt "%" PRId64 58 | // # define GT_int_sfmt PRId64 59 | // #else 60 | // # define GT_size_cfmt "%'" PRIu64 61 | // # define GT_size_sfmt PRIu64 62 | // # define GT_int_cfmt "%'" PRId64 63 | // # define GT_int_sfmt PRId64 64 | // #endif 65 | 66 | #define GT_size_cfmt "%" PRIu64 67 | #define GT_size_sfmt PRIu64 68 | #define GT_int_cfmt "%" PRId64 69 | #define GT_int_sfmt PRId64 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /src/plugin/common/sf_printf.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sf_printf.h" 5 | 6 | #define BUF_MAX 4096 7 | 8 | void sf_printf_debug (const char *fmt, ...) 9 | { 10 | va_list args; 11 | va_start (args, fmt); 12 | char buf[BUF_MAX]; 13 | vsprintf (buf, fmt, args); 14 | printf ("%s", buf); 15 | SF_display (buf); 16 | va_end (args); 17 | } 18 | 19 | /** 20 | * @brief Short wrapper to print to Stata 21 | * 22 | * Basic wrapper to print formatted strings to Stata 23 | * 24 | * @param *fmt a string to format 25 | * @param ... Arguments to pass to pritnf 26 | * @return Prints to Stata's console 27 | */ 28 | void sf_printf (const char *fmt, ...) 29 | { 30 | va_list args; 31 | va_start (args, fmt); 32 | char buf[BUF_MAX]; 33 | vsprintf (buf, fmt, args); 34 | SF_display (buf); 35 | // printf (buf); 36 | va_end (args); 37 | } 38 | 39 | /** 40 | * @brief Short wrapper to print error to Stata 41 | * 42 | * Basic wrapper to print formatted error strings to Stata 43 | * 44 | * @param *fmt a string to format 45 | * @param ... Arguments to pass to pritnf 46 | * @return Prints to Stata's console 47 | */ 48 | void sf_errprintf (const char *fmt, ...) 49 | { 50 | va_list args; 51 | va_start (args, fmt); 52 | char buf[BUF_MAX]; 53 | vsprintf (buf, fmt, args); 54 | SF_error (buf); 55 | va_end (args); 56 | } 57 | -------------------------------------------------------------------------------- /src/plugin/common/sf_printf.h: -------------------------------------------------------------------------------- 1 | #ifndef SF_PRINTF 2 | #define SF_PRINTF 3 | 4 | void sf_printf (const char *fmt, ...); 5 | void sf_errprintf (const char *fmt, ...); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/plugin/common/sf_wrappers.h: -------------------------------------------------------------------------------- 1 | #ifndef SF_WRAPPERS 2 | #define SF_WRAPPERS 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "gttypes.h" 11 | #include "../spi/stplugin.h" 12 | 13 | ST_retcode sf_empty_varlist(GT_size *pos, GT_size start, GT_size K); 14 | GT_size sf_anyobs_sel (); 15 | 16 | void sf_running_timer (clock_t *timer, const char *msg); 17 | 18 | ST_retcode sf_oom_error (char * step_desc, char * obj_desc); 19 | 20 | GT_int sf_get_vector_length (char *st_matrix); 21 | ST_retcode sf_get_vector (char *st_matrix, ST_double *v); 22 | ST_retcode sf_get_vector_int (char *st_matrix, GT_int *v); 23 | ST_retcode sf_get_vector_size (char *st_matrix, GT_size *v); 24 | ST_retcode sf_get_vector_bool (char *st_matrix, GT_bool *v); 25 | ST_retcode sf_byx_save (struct StataInfo *st_info); 26 | ST_retcode sf_byx_save_top (struct StataInfo *st_info, GT_size ntop, GT_size *topix); 27 | 28 | void sf_format_size (GT_size n, char *out); 29 | 30 | ST_retcode sf_scalar_int (char *st_scalar, GT_int *sval); 31 | ST_retcode sf_scalar_size (char *st_scalar, GT_size *sval); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/plugin/hash/gtools_hash.h: -------------------------------------------------------------------------------- 1 | #ifndef GTOOLS_HASH 2 | #define GTOOLS_HASH 3 | 4 | #define RADIX_SHIFT 24 5 | 6 | int gf_hash ( 7 | uint64_t *h1, 8 | uint64_t *h2, 9 | struct StataInfo *st_info, 10 | GT_size *ix 11 | ); 12 | 13 | int gf_biject_varlist (uint64_t *h1, struct StataInfo *st_info); 14 | 15 | int gf_panelsetup ( 16 | uint64_t *h1, 17 | uint64_t *h2, 18 | struct StataInfo *st_info, 19 | GT_size *ix, 20 | const GT_bool hash_level 21 | ); 22 | 23 | int gf_check_allequal (uint64_t *hash, GT_size start, GT_size end); 24 | int gf_panelsetup_bijection (uint64_t *h1, struct StataInfo *st_info); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/plugin/hash/gtools_sort.h: -------------------------------------------------------------------------------- 1 | #ifndef GTOOLS_SORT 2 | #define GTOOLS_SORT 3 | 4 | typedef union { 5 | struct { 6 | uint32_t c8[256]; 7 | uint32_t c7[256]; 8 | uint32_t c6[256]; 9 | uint32_t c5[256]; 10 | uint32_t c4[256]; 11 | uint32_t c3[256]; 12 | uint32_t c2[256]; 13 | uint32_t c1[256]; 14 | }; 15 | uint32_t counts[256 * 8]; 16 | } radixCounts8; 17 | 18 | struct radixCounts16 { 19 | uint32_t *c4; 20 | uint32_t *c3; 21 | uint32_t *c2; 22 | uint32_t *c1; 23 | }; 24 | 25 | struct radixCounts16_32 { 26 | uint32_t *c2; 27 | uint32_t *c1; 28 | }; 29 | 30 | struct radixCounts12_24 { 31 | uint32_t *c2; 32 | uint32_t *c1; 33 | }; 34 | 35 | struct radixCounts8_16 { 36 | uint32_t *c2; 37 | uint32_t *c1; 38 | }; 39 | 40 | ST_retcode gf_sort_hash (uint64_t *hash, GT_size *index, GT_size N, GT_bool verbose, GT_size ctol); 41 | ST_retcode gf_radix_sort8 (uint64_t *hash, GT_size *index, GT_size N); 42 | ST_retcode gf_radix_sort16 (uint64_t *hash, GT_size *index, GT_size N); 43 | ST_retcode gf_radix_sort16_32 (uint64_t *hash, GT_size *index, GT_size N); 44 | ST_retcode gf_radix_sort12_24 (uint64_t *hash, GT_size *index, GT_size N); 45 | ST_retcode gf_radix_sort8_16 (uint64_t *hash, GT_size *index, GT_size N); 46 | ST_retcode gf_counting_sort (uint64_t *hash, GT_size *index, GT_size N, uint64_t min, uint64_t max); 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /src/plugin/lib: -------------------------------------------------------------------------------- 1 | ../../lib -------------------------------------------------------------------------------- /src/plugin/quantiles/gquantiles_math.h: -------------------------------------------------------------------------------- 1 | #ifndef GQUANTILES_MATH 2 | #define GQUANTILES_MATH 3 | 4 | void gf_quantiles_nq ( 5 | ST_double *qout, 6 | ST_double *x, 7 | GT_size nquants, 8 | GT_size N, 9 | GT_size kx 10 | ); 11 | 12 | void gf_quantiles ( 13 | ST_double *qout, 14 | ST_double *x, 15 | ST_double *quants, 16 | GT_size nquants, 17 | GT_size N, 18 | GT_size kx 19 | ); 20 | 21 | void gf_quantiles_nq_altdef ( 22 | ST_double *qout, 23 | ST_double *x, 24 | GT_size nquants, 25 | GT_size N, 26 | GT_size kx 27 | ); 28 | 29 | void gf_quantiles_altdef ( 30 | ST_double *qout, 31 | ST_double *x, 32 | ST_double *quants, 33 | GT_size nquants, 34 | GT_size N, 35 | GT_size kx 36 | ); 37 | 38 | void gf_quantiles_nq_qselect ( 39 | ST_double *qout, 40 | ST_double *x, 41 | GT_size nquants, 42 | GT_size N 43 | ); 44 | 45 | void gf_quantiles_qselect ( 46 | ST_double *qout, 47 | ST_double *x, 48 | ST_double *quants, 49 | GT_size nquants, 50 | GT_size N 51 | ); 52 | 53 | void gf_quantiles_nq_qselect_altdef ( 54 | ST_double *qout, 55 | ST_double *x, 56 | GT_size nquants, 57 | GT_size N 58 | ); 59 | 60 | void gf_quantiles_qselect_altdef ( 61 | ST_double *qout, 62 | ST_double *x, 63 | ST_double *quants, 64 | GT_size nquants, 65 | GT_size N 66 | ); 67 | 68 | GT_size gf_quantiles_gcd ( 69 | GT_size a, 70 | GT_size b 71 | ); 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /src/plugin/quantiles/gquantiles_math_w.h: -------------------------------------------------------------------------------- 1 | #ifndef GQUANTILES_MATH_W 2 | #define GQUANTILES_MATH_W 3 | 4 | void gf_quantiles_nq_w ( 5 | ST_double *qout, 6 | ST_double *x, 7 | GT_size nquants, 8 | GT_size N, 9 | GT_size kx 10 | ); 11 | 12 | void gf_quantiles_w ( 13 | ST_double *qout, 14 | ST_double *x, 15 | ST_double *quants, 16 | GT_size nquants, 17 | GT_size N, 18 | GT_size kx 19 | ); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/plugin/quantiles/gquantiles_utils.c: -------------------------------------------------------------------------------- 1 | GT_size gf_xtile_clean ( 2 | ST_double *x, 3 | GT_size lsize, 4 | GT_bool dropmiss, 5 | GT_bool dedup) 6 | { 7 | GT_size i, _lsize; 8 | GT_bool sortme, dedupcheck; 9 | 10 | if ( lsize > 1 ) { 11 | _lsize = lsize; 12 | sortme = 0; 13 | 14 | for (i = 1; i < lsize; i++) { 15 | if ( x[i] < x[i - 1] ) { 16 | sortme = 1; 17 | break; 18 | } 19 | else if ( x[i] == x[i - 1] ) { 20 | dedupcheck = 1; 21 | } 22 | } 23 | 24 | if ( sortme ) { 25 | quicksort_bsd ( 26 | x, 27 | lsize, 28 | sizeof *x, 29 | xtileCompare, 30 | NULL 31 | ); 32 | dedupcheck = 1; 33 | sortme = 0; 34 | } 35 | 36 | if ( dedup & dedupcheck ) { 37 | _lsize = 0; 38 | if ( dropmiss ) { 39 | if ( SF_is_missing(x[0]) ) return (0); 40 | for (i = 1; i < lsize; i++) { 41 | if ( SF_is_missing(x[i]) ) break; 42 | else if ( x[_lsize] == x[i] ) continue; 43 | x[++_lsize] = x[i]; 44 | } 45 | } 46 | else { 47 | for (i = 1; i < lsize; i++) { 48 | if ( x[_lsize] == x[i] ) continue; 49 | x[++_lsize] = x[i]; 50 | } 51 | } 52 | _lsize++; 53 | } 54 | else if ( dropmiss ) { 55 | for (i = 0; i < lsize; i++) { 56 | if ( SF_is_missing(x[i]) ) return (i); 57 | } 58 | } 59 | 60 | return (_lsize); 61 | } 62 | else if ( (lsize == 1) & dropmiss ) { 63 | if ( SF_is_missing(x[0]) ) return (0); 64 | return (lsize); 65 | } 66 | else { 67 | return (lsize); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/plugin/quantiles/gquantiles_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef GTOOLS_GQUANTILES_UTILS 2 | #define GTOOLS_GQUANTILES_UTILS 3 | 4 | GT_size gf_xtile_clean ( 5 | ST_double *x, 6 | GT_size lsize, 7 | GT_bool dropmiss, 8 | GT_bool dedup 9 | ); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /src/plugin/regress/gregress.h: -------------------------------------------------------------------------------- 1 | #ifndef GREGRESS 2 | #define GREGRESS 3 | 4 | ST_retcode sf_regress (struct StataInfo *st_info, int level, char *fname); 5 | 6 | ST_retcode sf_regress_read_colmajor ( 7 | struct StataInfo *st_info, 8 | ST_double *y, 9 | ST_double *X, 10 | ST_double *w, 11 | void *G, 12 | void *FE, 13 | ST_double *I, 14 | GT_size *nj 15 | ); 16 | 17 | ST_retcode (*sf_regress_read)( 18 | struct StataInfo *, 19 | ST_double *, 20 | ST_double *, 21 | ST_double *, 22 | void *, 23 | void *, 24 | ST_double *, 25 | GT_size * 26 | ); 27 | 28 | ST_retcode gf_regress_absorb ( 29 | struct GtoolsHash *AbsorbHashes, 30 | GtoolsAlgorithmHDFE GtoolsAlgorithmHDFE, 31 | ST_double *stats, 32 | GT_size *maps, 33 | GT_size nj, 34 | GT_size kabs, 35 | GT_size kx, 36 | GT_size *kmodel, 37 | ST_double **njabsptr, 38 | ST_double *xptr, 39 | ST_double *yptr, 40 | ST_double *wptr, 41 | ST_double *xtarget, 42 | ST_double *ytarget, 43 | GT_bool setup, 44 | ST_double hdfetol 45 | ); 46 | 47 | ST_retcode gf_regress_absorb_iter( 48 | struct GtoolsHash *AbsorbHashes, 49 | GtoolsAlgorithmHDFE GtoolsAlgorithmHDFE, 50 | ST_double *stats, 51 | GT_size *maps, 52 | GT_size J, 53 | GT_size *nj, 54 | GT_size kabs, 55 | GT_size kx, 56 | ST_double **njabsptr, 57 | ST_double *xptr, 58 | ST_double *yptr, 59 | ST_double *wptr, 60 | ST_double hdfetol 61 | ); 62 | 63 | ST_retcode gf_regress_iv_notidentified ( 64 | GT_size nj, 65 | GT_size kabs, 66 | GT_size ivkendog, 67 | GT_size ivkexog, 68 | GT_size ivkz, 69 | GT_size kmodel, 70 | char *buf1, 71 | char *buf2, 72 | char *buf3 73 | ); 74 | 75 | ST_retcode gf_regress_notidentified ( 76 | GT_size nj, 77 | GT_size kabs, 78 | GT_size kx, 79 | GT_size kmodel, 80 | char *buf1, 81 | char *buf2, 82 | char *buf3 83 | ); 84 | 85 | void gf_regress_warnings ( 86 | GT_size J, 87 | GT_size warncollinear, 88 | GT_size warnsingular, 89 | GT_size warnivnotiden, 90 | GT_size warnnocols, 91 | GT_size warnalpha, 92 | char *buf1, 93 | char *buf2, 94 | char *buf3, 95 | char *buf4, 96 | char *buf5 97 | ); 98 | 99 | 100 | void gf_regress_adjust_collinear_b ( 101 | ST_double *b, 102 | ST_double *buffer, 103 | GT_size *colix, 104 | GT_size k1, 105 | GT_size k2 106 | ); 107 | 108 | void gf_regress_adjust_collinear_se ( 109 | ST_double *se, 110 | ST_double *buffer, 111 | GT_size *colix, 112 | GT_size k1, 113 | GT_size k2 114 | ); 115 | 116 | void gf_regress_adjust_collinear_V ( 117 | ST_double *V, 118 | ST_double *buffer, 119 | GT_size *colix, 120 | GT_size k1, 121 | GT_size k2 122 | ); 123 | 124 | #endif 125 | -------------------------------------------------------------------------------- /src/plugin/regress/linalg/common.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Computes the transpose of a matrix B = A' 3 | * 4 | * @A k1 x k2 matrix to be transposed 5 | * @B k2 x k1 transpose of A 6 | * @k1 Number of rows in A 7 | * @k2 Number of columns in A 8 | * @return Store A' in @B 9 | */ 10 | void gf_regress_linalg_dtrans_colmajor (ST_double *A, ST_double *B, GT_size k1, GT_size k2) 11 | { 12 | GT_size i, j; 13 | for (j = 0; j < k2; j++) { 14 | for (i = 0; i < k1; i++) { 15 | B[j * k2 + i] = A[i * k2 + j]; 16 | } 17 | } 18 | } 19 | 20 | /** 21 | * @brief Print matrix A 22 | * 23 | * @A k1 x k2 matrix to be printed 24 | * @k1 Number of rows in A 25 | * @k2 Number of columns in A 26 | * @return Prints entries of matrix A 27 | */ 28 | void gf_regress_dprintf_colmajor ( 29 | ST_double *matrix, 30 | GT_size k1, 31 | GT_size k2, 32 | char *name) 33 | { 34 | GT_size i, j; 35 | sf_printf_debug("%s\n", name); 36 | for (i = 0; i < k1; i++) { 37 | for (j = 0; j < k2; j++) { 38 | sf_printf_debug("%.8g\t", matrix[i + k1 * j]); 39 | } 40 | sf_printf_debug("\n"); 41 | } 42 | sf_printf_debug("\n"); 43 | } 44 | 45 | /** 46 | * @brief Print matrix A 47 | * 48 | * @A k1 x k2 matrix to be printed 49 | * @k1 Number of rows in A 50 | * @k2 Number of columns in A 51 | * @return Prints entries of matrix A 52 | */ 53 | void gf_regress_lprintf_colmajor ( 54 | GT_size *matrix, 55 | GT_size k1, 56 | GT_size k2, 57 | char *name) 58 | { 59 | GT_size i, j; 60 | sf_printf_debug("%s\n", name); 61 | for (i = 0; i < k1; i++) { 62 | for (j = 0; j < k2; j++) { 63 | sf_printf_debug("%lu\t", matrix[i + k1 * j]); 64 | } 65 | sf_printf_debug("\n"); 66 | } 67 | sf_printf_debug("\n"); 68 | } 69 | -------------------------------------------------------------------------------- /src/plugin/regress/linalg/rowmajor.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Compute C = AB assuming that either both A and B are symmetric or that A = B 3 | * 4 | * @A N x K matrix (symmetric, or A = B) 5 | * @B N x K matrix (symmetric, or A = B) 6 | * @C K x K array where to store AB 7 | * @N Number of rows in A, B 8 | * @K Number of columns in A, B 9 | * @return Store AB in @C 10 | */ 11 | void gf_regress_linalg_dsymm_rowmajor( 12 | ST_double *A, 13 | ST_double *B, 14 | ST_double *C, 15 | GT_size N, 16 | GT_size K) 17 | { 18 | GT_size i, j, l; 19 | ST_double *aptr, *bptr; 20 | 21 | for (i = 0; i < K; i++) { 22 | for (j = 0; j < K; j++) { 23 | C[i * K + j] = 0; 24 | } 25 | } 26 | 27 | bptr = B; 28 | for (i = 0; i < N; i++) { 29 | for (j = 0; j < K; j++, bptr++) { 30 | aptr = A + i * K + j; 31 | for (l = j; l < K; l++, aptr++) { 32 | C[j * K + l] += (*aptr) * (*bptr); 33 | } 34 | } 35 | } 36 | 37 | // Since C is symmetric, we only compute the upper triangle and then 38 | // copy it back into the lower triangle 39 | 40 | for (i = 0; i < K; i++) { 41 | for (j = i + 1; j < K; j++) { 42 | C[j * K + i] = C[i * K + j]; 43 | } 44 | } 45 | } 46 | 47 | void gf_regress_linalg_dsymm_ixrowmajor( 48 | ST_double *A, 49 | ST_double *B, 50 | ST_double *C, 51 | GT_size *ix, 52 | GT_size N, 53 | GT_size K) 54 | { 55 | GT_size i, j, l, m; 56 | ST_double *aptr, *bptr; 57 | 58 | for (i = 0; i < K; i++) { 59 | for (j = 0; j < K; j++) { 60 | C[i * K + j] = 0; 61 | } 62 | } 63 | 64 | for (i = 0; i < N; i++) { 65 | m = ix[i]; 66 | bptr = B + m * K; 67 | for (j = 0; j < K; j++, bptr++) { 68 | aptr = A + m * K + j; 69 | for (l = j; l < K; l++, aptr++) { 70 | C[j * K + l] += (*aptr) * (*bptr); 71 | } 72 | } 73 | } 74 | 75 | // Since C is symmetric, we only compute the upper triangle and then 76 | // copy it back into the lower triangle 77 | 78 | for (i = 0; i < K; i++) { 79 | for (j = i + 1; j < K; j++) { 80 | C[j * K + i] = C[i * K + j]; 81 | } 82 | } 83 | } 84 | 85 | void gf_regress_linalg_dsymm_wrowmajor( 86 | ST_double *A, 87 | ST_double *B, 88 | ST_double *C, 89 | ST_double *w, 90 | GT_size N, 91 | GT_size K) 92 | { 93 | GT_size i, j, l; 94 | ST_double *aptr, *bptr, *wptr; 95 | 96 | for (i = 0; i < K; i++) { 97 | for (j = 0; j < K; j++) { 98 | C[i * K + j] = 0; 99 | } 100 | } 101 | 102 | bptr = B; 103 | wptr = w; 104 | for (i = 0; i < N; i++, wptr++) { 105 | for (j = 0; j < K; j++, bptr++) { 106 | aptr = A + i * K + j; 107 | for (l = j; l < K; l++, aptr++) { 108 | C[j * K + l] += (*aptr) * (*bptr) * (*wptr); 109 | } 110 | } 111 | } 112 | 113 | // Since C is symmetric, we only compute the upper triangle and then 114 | // copy it back into the lower triangle 115 | 116 | for (i = 0; i < K; i++) { 117 | for (j = i + 1; j < K; j++) { 118 | C[j * K + i] = C[i * K + j]; 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/plugin/regress/models/glm.c: -------------------------------------------------------------------------------- 1 | ST_retcode gf_regress_glm_post( 2 | GT_bool wcode, 3 | ST_double *wptr, 4 | ST_double *e, 5 | ST_double *wgt, 6 | GT_size nj, 7 | ST_double diff, 8 | ST_double poistol, 9 | GT_size poisiter, 10 | char *buf1) 11 | { 12 | GT_size i; 13 | if ( diff < poistol ) { 14 | if ( wcode == 2 ) { 15 | for (i = 0; i < nj; i++) { 16 | e[i] *= wgt[i] / wptr[i]; 17 | } 18 | memcpy(wgt, wptr, nj * sizeof(ST_double)); 19 | } 20 | return(0); 21 | } 22 | else { 23 | sf_format_size(poisiter, buf1); 24 | sf_errprintf("max iter (%s) reached; tolerance not achieved (%15.9g > %15.9g)\n", 25 | buf1, diff, poistol); 26 | return(198); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/plugin/regress/models/glm.h: -------------------------------------------------------------------------------- 1 | #ifndef GREGRESS_GLM 2 | #define GREGRESS_GLM 3 | 4 | // GLM 5 | // --- 6 | 7 | void (*gf_regress_glm_init)( 8 | ST_double *, 9 | ST_double *, 10 | ST_double *, 11 | ST_double *, 12 | ST_double *, 13 | ST_double *, 14 | ST_double *, 15 | GT_size 16 | ); 17 | 18 | ST_double (*gf_regress_glm_iter)( 19 | ST_double *, 20 | ST_double *, 21 | ST_double *, 22 | ST_double *, 23 | ST_double *, 24 | ST_double *, 25 | ST_double *, 26 | ST_double *, 27 | ST_double *, 28 | GT_size 29 | ); 30 | 31 | ST_retcode gf_regress_glm_post( 32 | GT_bool wcode, 33 | ST_double *wptr, 34 | ST_double *e, 35 | ST_double *wgt, 36 | GT_size nj, 37 | ST_double diff, 38 | ST_double glmtol, 39 | GT_size glmiter, 40 | char *buf1 41 | ); 42 | 43 | // Logit 44 | // ----- 45 | 46 | void gf_regress_logit_init_w( 47 | ST_double *yptr, 48 | ST_double *wptr, 49 | ST_double *mu, 50 | ST_double *wgt, 51 | ST_double *eta, 52 | ST_double *dev, 53 | ST_double *lhs, 54 | GT_size nj 55 | ); 56 | 57 | void gf_regress_logit_init_unw( 58 | ST_double *yptr, 59 | ST_double *wptr, 60 | ST_double *mu, 61 | ST_double *wgt, 62 | ST_double *eta, 63 | ST_double *dev, 64 | ST_double *lhs, 65 | GT_size nj 66 | ); 67 | 68 | ST_double gf_regress_logit_iter_unw( 69 | ST_double *yptr, 70 | ST_double *wptr, 71 | ST_double *e, 72 | ST_double *mu, 73 | ST_double *wgt, 74 | ST_double *eta, 75 | ST_double *dev, 76 | ST_double *dev0, 77 | ST_double *lhs, 78 | GT_size nj 79 | ); 80 | 81 | ST_double gf_regress_logit_iter_w( 82 | ST_double *yptr, 83 | ST_double *wptr, 84 | ST_double *e, 85 | ST_double *mu, 86 | ST_double *wgt, 87 | ST_double *eta, 88 | ST_double *dev, 89 | ST_double *dev0, 90 | ST_double *lhs, 91 | GT_size nj 92 | ); 93 | 94 | // Poisson 95 | // ------- 96 | 97 | void gf_regress_poisson_init_w( 98 | ST_double *yptr, 99 | ST_double *wptr, 100 | ST_double *mu, 101 | ST_double *wgt, 102 | ST_double *eta, 103 | ST_double *dev, 104 | ST_double *lhs, 105 | GT_size nj 106 | ); 107 | 108 | void gf_regress_poisson_init_unw( 109 | ST_double *yptr, 110 | ST_double *wptr, 111 | ST_double *mu, 112 | ST_double *wgt, 113 | ST_double *eta, 114 | ST_double *dev, 115 | ST_double *lhs, 116 | GT_size nj 117 | ); 118 | 119 | ST_double gf_regress_poisson_iter_unw( 120 | ST_double *yptr, 121 | ST_double *wptr, 122 | ST_double *e, 123 | ST_double *mu, 124 | ST_double *wgt, 125 | ST_double *eta, 126 | ST_double *dev, 127 | ST_double *dev0, 128 | ST_double *lhs, 129 | GT_size nj 130 | ); 131 | 132 | ST_double gf_regress_poisson_iter_w( 133 | ST_double *yptr, 134 | ST_double *wptr, 135 | ST_double *e, 136 | ST_double *mu, 137 | ST_double *wgt, 138 | ST_double *eta, 139 | ST_double *dev, 140 | ST_double *dev0, 141 | ST_double *lhs, 142 | GT_size nj 143 | ); 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /src/plugin/regress/models/logit.c: -------------------------------------------------------------------------------- 1 | void gf_regress_logit_init_unw( 2 | ST_double *yptr, 3 | ST_double *wptr, 4 | ST_double *mu, 5 | ST_double *wgt, 6 | ST_double *eta, 7 | ST_double *dev, 8 | ST_double *lhs, 9 | GT_size nj) 10 | { 11 | GT_size i; 12 | ST_double mean = 0; 13 | for (i = 0; i < nj; i++) { 14 | mean += yptr[i]; 15 | } 16 | mean /= (ST_double) nj; 17 | 18 | for (i = 0; i < nj; i++) { 19 | mu[i] = (yptr[i] + mean) / 2; 20 | eta[i] = log(mu[i] / (1 - mu[i])); 21 | wgt[i] = mu[i] * (1 - mu[i]); 22 | dev[i] = 0; 23 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i]; 24 | } 25 | } 26 | 27 | void gf_regress_logit_init_w( 28 | ST_double *yptr, 29 | ST_double *wptr, 30 | ST_double *mu, 31 | ST_double *wgt, 32 | ST_double *eta, 33 | ST_double *dev, 34 | ST_double *lhs, 35 | GT_size nj) 36 | { 37 | GT_size i; 38 | ST_double mean = 0; 39 | ST_double W = 0; 40 | for (i = 0; i < nj; i++) { 41 | mean += yptr[i] * wptr[i]; 42 | W += wptr[i]; 43 | } 44 | mean /= (ST_double) W; 45 | for (i = 0; i < nj; i++) { 46 | mu[i] = (yptr[i] + mean) / 2; 47 | eta[i] = log(mu[i] / (1 - mu[i])); 48 | wgt[i] = mu[i] * (1 - mu[i]); 49 | dev[i] = 0; 50 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i]; 51 | wgt[i] *= wptr[i]; 52 | } 53 | } 54 | 55 | ST_double gf_regress_logit_iter_unw( 56 | ST_double *yptr, 57 | ST_double *wptr, 58 | ST_double *e, 59 | ST_double *mu, 60 | ST_double *wgt, 61 | ST_double *eta, 62 | ST_double *dev, 63 | ST_double *dev0, 64 | ST_double *lhs, 65 | GT_size nj) 66 | { 67 | GT_size i; 68 | ST_double diff = 0; 69 | 70 | for (i = 0; i < nj; i++) { 71 | eta[i] = lhs[i] - e[i]; 72 | mu[i] = 1 / (1 + exp(-eta[i])); 73 | wgt[i] = mu[i] * (1 - mu[i]); 74 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i]; 75 | dev0[i] = dev[i]; 76 | dev[i] = - 2 * (yptr[i] * log(mu[i]) + (1 - yptr[i]) * log(1 - mu[i])); 77 | diff = GTOOLS_PWMAX(diff, fabs(dev[i] - dev0[i]) / (fabs(dev0[i]) + 1)); 78 | } 79 | return (diff); 80 | } 81 | 82 | ST_double gf_regress_logit_iter_w( 83 | ST_double *yptr, 84 | ST_double *wptr, 85 | ST_double *e, 86 | ST_double *mu, 87 | ST_double *wgt, 88 | ST_double *eta, 89 | ST_double *dev, 90 | ST_double *dev0, 91 | ST_double *lhs, 92 | GT_size nj) 93 | { 94 | GT_size i; 95 | ST_double diff = 0; 96 | for (i = 0; i < nj; i++) { 97 | eta[i] = lhs[i] - e[i]; 98 | mu[i] = 1 / (1 + exp(-eta[i])); 99 | wgt[i] = mu[i] * (1 - mu[i]); 100 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i]; 101 | dev0[i] = dev[i]; 102 | dev[i] = - 2 * (yptr[i] * log(mu[i]) + (1 - yptr[i]) * log(1 - mu[i])); 103 | diff = GTOOLS_PWMAX(diff, fabs(dev[i] - dev0[i]) / (fabs(dev0[i]) + 1)); 104 | wgt[i] *= wptr[i]; 105 | } 106 | return (diff); 107 | } 108 | -------------------------------------------------------------------------------- /src/plugin/regress/models/models.h: -------------------------------------------------------------------------------- 1 | #ifndef GREGRESS_MODELS 2 | #define GREGRESS_MODELS 3 | 4 | // OLS 5 | // --- 6 | 7 | GT_bool (*gf_regress_ols) ( 8 | ST_double *, 9 | ST_double *, 10 | ST_double *, 11 | ST_double *, 12 | ST_double *, 13 | ST_double *, 14 | ST_double *, 15 | GT_size *, 16 | GT_size, 17 | GT_size 18 | ); 19 | 20 | GT_bool gf_regress_ols_colmajor( 21 | ST_double *X, 22 | ST_double *y, 23 | ST_double *w, 24 | ST_double *XX, 25 | ST_double *Xy, 26 | ST_double *e, 27 | ST_double *b, 28 | GT_size *colix, 29 | GT_size N, 30 | GT_size kx 31 | ); 32 | 33 | GT_bool gf_regress_ols_wcolmajor( 34 | ST_double *X, 35 | ST_double *y, 36 | ST_double *w, 37 | ST_double *XX, 38 | ST_double *Xy, 39 | ST_double *e, 40 | ST_double *b, 41 | GT_size *colix, 42 | GT_size N, 43 | GT_size kx 44 | ); 45 | 46 | // IV regression 47 | // ------------- 48 | 49 | GT_bool (*gf_regress_iv) ( 50 | ST_double *, 51 | ST_double *, 52 | ST_double *, 53 | ST_double *, 54 | ST_double *, 55 | ST_double *, 56 | ST_double *, 57 | ST_double *, 58 | ST_double *, 59 | ST_double *, 60 | GT_size *, 61 | GT_size, 62 | GT_size, 63 | GT_size, 64 | GT_size 65 | ); 66 | 67 | GT_bool gf_regress_iv_unw( 68 | ST_double *Xendog, 69 | ST_double *Xexog, 70 | ST_double *Z, 71 | ST_double *y, 72 | ST_double *w, 73 | ST_double *XX, 74 | ST_double *XZ, 75 | ST_double *BZ, 76 | ST_double *e, 77 | ST_double *b, 78 | GT_size *colix, 79 | GT_size N, 80 | GT_size kendog, 81 | GT_size kexog, 82 | GT_size kz 83 | ); 84 | 85 | GT_bool gf_regress_iv_w( 86 | ST_double *Xendog, 87 | ST_double *Xexog, 88 | ST_double *Z, 89 | ST_double *y, 90 | ST_double *w, 91 | ST_double *XX, 92 | ST_double *XZ, 93 | ST_double *BZ, 94 | ST_double *e, 95 | ST_double *b, 96 | GT_size *colix, 97 | GT_size N, 98 | GT_size kendog, 99 | GT_size kexog, 100 | GT_size kz 101 | ); 102 | 103 | void gf_regress_linalg_iverror( 104 | ST_double *y, 105 | ST_double *A1, 106 | ST_double *A2, 107 | ST_double *b, 108 | ST_double *c, 109 | GT_size N, 110 | GT_size k1, 111 | GT_size k2 112 | ); 113 | 114 | void gf_regress_linalg_iverror_ix( 115 | ST_double *y, 116 | ST_double *A1, 117 | ST_double *A2, 118 | ST_double *b, 119 | ST_double *c, 120 | GT_size *colix, 121 | GT_size N, 122 | GT_size koffset, 123 | GT_size k1, 124 | GT_size k2 125 | ); 126 | 127 | void gf_regress_linalg_ivcollinear_ix( 128 | GT_size *colix, 129 | GT_size kendog, 130 | GT_size kexog, 131 | GT_size kz 132 | ); 133 | 134 | #endif 135 | -------------------------------------------------------------------------------- /src/plugin/regress/models/ols.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Run basic OLS 3 | * 4 | * @X Independent variables; array of length N x kx 5 | * @y Dependent variable; array of length N 6 | * @XX Array of length kx x kx where to store X' X and (X' X)^-1 7 | * @Xy Array of length kx where to store X y 8 | * @b Array of length kx where to store the coefficients 9 | * @N Number of observations 10 | * @kx Number of X variables 11 | * @return Store OLS coefficients in @b 12 | */ 13 | GT_bool gf_regress_ols_colmajor( 14 | ST_double *X, 15 | ST_double *y, 16 | ST_double *w, 17 | ST_double *XX, 18 | ST_double *Xy, 19 | ST_double *e, 20 | ST_double *b, 21 | GT_size *colix, 22 | GT_size N, 23 | GT_size kx) 24 | { 25 | GT_size kindep; 26 | GT_bool singular = 0; 27 | 28 | gf_regress_linalg_dsymm_colmajor (X, X, XX, N, kx); 29 | gf_regress_linalg_dsyldu (XX, kx, XX + kx * kx, colix, &singular); 30 | 31 | // gf_regress_dprintf_colmajor (XX, kx, kx, "XX"); 32 | // gf_regress_linalg_dsysv (XX, kx, &singular); 33 | // gf_regress_dprintf_colmajor (XX, kindep, kindep, "XX^-1"); 34 | 35 | kindep = colix[kx]; 36 | if ( kindep > 0 ) { 37 | if ( kindep < kx ) { 38 | gf_regress_linalg_dgemTv_colmajor_ix1 (X, y, Xy, colix, N, kindep); 39 | gf_regress_linalg_dgemTv_colmajor (XX, Xy, b, kindep, kindep); 40 | gf_regress_linalg_error_colmajor_ix1 (y, X, b, e, colix, N, kindep); 41 | } 42 | else { 43 | gf_regress_linalg_dgemTv_colmajor (X, y, Xy, N, kx); 44 | gf_regress_linalg_dgemTv_colmajor (XX, Xy, b, kx, kx); 45 | gf_regress_linalg_error_colmajor (y, X, b, e, N, kx); 46 | } 47 | } 48 | 49 | // gf_regress_dprintf_colmajor (Xy, 1, kindep, "Xy"); 50 | // gf_regress_dprintf_colmajor (b, 1, kindep, "b"); 51 | 52 | return(singular); 53 | } 54 | 55 | GT_bool gf_regress_ols_wcolmajor( 56 | ST_double *X, 57 | ST_double *y, 58 | ST_double *w, 59 | ST_double *XX, 60 | ST_double *Xy, 61 | ST_double *e, 62 | ST_double *b, 63 | GT_size *colix, 64 | GT_size N, 65 | GT_size kx) 66 | { 67 | GT_size kindep; 68 | GT_bool singular = 0; 69 | gf_regress_linalg_dsymm_wcolmajor (X, X, XX, w, N, kx); 70 | gf_regress_linalg_dsyldu (XX, kx, XX + kx * kx, colix, &singular); 71 | 72 | kindep = colix[kx]; 73 | if ( kindep > 0 ) { 74 | if ( kindep < kx ) { 75 | gf_regress_linalg_dgemTv_wcolmajor_ix1 (X, y, Xy, w, colix, N, kindep); 76 | gf_regress_linalg_dgemTv_colmajor (XX, Xy, b, kindep, kindep); 77 | gf_regress_linalg_error_colmajor_ix1 (y, X, b, e, colix, N, kindep); 78 | } 79 | else { 80 | gf_regress_linalg_dgemTv_wcolmajor (X, y, Xy, w, N, kx); 81 | gf_regress_linalg_dgemTv_colmajor (XX, Xy, b, kx, kx); 82 | gf_regress_linalg_error_colmajor (y, X, b, e, N, kx); 83 | } 84 | } 85 | 86 | return (singular); 87 | } 88 | -------------------------------------------------------------------------------- /src/plugin/regress/models/poisson.c: -------------------------------------------------------------------------------- 1 | void gf_regress_poisson_init_unw( 2 | ST_double *yptr, 3 | ST_double *wptr, 4 | ST_double *mu, 5 | ST_double *wgt, 6 | ST_double *eta, 7 | ST_double *dev, 8 | ST_double *lhs, 9 | GT_size nj) 10 | { 11 | GT_size i; 12 | ST_double mean = 0; 13 | for (i = 0; i < nj; i++) { 14 | mean += yptr[i]; 15 | } 16 | mean /= (ST_double) nj; 17 | 18 | for (i = 0; i < nj; i++) { 19 | mu[i] = (yptr[i] + mean) / 2; 20 | eta[i] = log(mu[i]); 21 | wgt[i] = mu[i]; 22 | dev[i] = 0; 23 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i]; 24 | } 25 | } 26 | 27 | void gf_regress_poisson_init_w( 28 | ST_double *yptr, 29 | ST_double *wptr, 30 | ST_double *mu, 31 | ST_double *wgt, 32 | ST_double *eta, 33 | ST_double *dev, 34 | ST_double *lhs, 35 | GT_size nj) 36 | { 37 | GT_size i; 38 | ST_double mean = 0; 39 | ST_double W = 0; 40 | for (i = 0; i < nj; i++) { 41 | mean += yptr[i] * wptr[i]; 42 | W += wptr[i]; 43 | } 44 | mean /= (ST_double) W; 45 | for (i = 0; i < nj; i++) { 46 | mu[i] = (yptr[i] + mean) / 2; 47 | eta[i] = log(mu[i]); 48 | wgt[i] = mu[i]; 49 | dev[i] = 0; 50 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i]; 51 | wgt[i] *= wptr[i]; 52 | } 53 | } 54 | 55 | ST_double gf_regress_poisson_iter_unw( 56 | ST_double *yptr, 57 | ST_double *wptr, 58 | ST_double *e, 59 | ST_double *mu, 60 | ST_double *wgt, 61 | ST_double *eta, 62 | ST_double *dev, 63 | ST_double *dev0, 64 | ST_double *lhs, 65 | GT_size nj) 66 | { 67 | GT_size i; 68 | ST_double diff = 0; 69 | 70 | for (i = 0; i < nj; i++) { 71 | eta[i] = lhs[i] - e[i]; 72 | mu[i] = exp(eta[i]); 73 | wgt[i] = mu[i]; 74 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i]; 75 | dev0[i] = dev[i]; 76 | // is dropping these OK? 77 | dev[i] = yptr[i] > 0? 2 * (yptr[i] * log(yptr[i] / mu[i]) - (yptr[i] - mu[i])): 0; 78 | diff = GTOOLS_PWMAX(diff, fabs(dev[i] - dev0[i]) / (fabs(dev0[i]) + 1)); 79 | } 80 | return (diff); 81 | } 82 | 83 | ST_double gf_regress_poisson_iter_w( 84 | ST_double *yptr, 85 | ST_double *wptr, 86 | ST_double *e, 87 | ST_double *mu, 88 | ST_double *wgt, 89 | ST_double *eta, 90 | ST_double *dev, 91 | ST_double *dev0, 92 | ST_double *lhs, 93 | GT_size nj) 94 | { 95 | GT_size i; 96 | ST_double diff = 0; 97 | for (i = 0; i < nj; i++) { 98 | eta[i] = lhs[i] - e[i]; 99 | mu[i] = exp(eta[i]); 100 | wgt[i] = mu[i]; 101 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i]; 102 | dev0[i] = dev[i]; 103 | // is dropping these OK? 104 | dev[i] = yptr[i] > 0? 2 * (yptr[i] * log(yptr[i] / mu[i]) - (yptr[i] - mu[i])): 0; 105 | diff = GTOOLS_PWMAX(diff, fabs(dev[i] - dev0[i]) / (fabs(dev0[i]) + 1)); 106 | wgt[i] *= wptr[i]; 107 | } 108 | return (diff); 109 | } 110 | -------------------------------------------------------------------------------- /src/plugin/regress/vce/cluster.c: -------------------------------------------------------------------------------- 1 | void gf_regress_ols_cluster_colmajor( 2 | ST_double *e, 3 | ST_double *w, 4 | GT_size *info, 5 | GT_size *index, 6 | GT_size J, 7 | ST_double *U, 8 | GT_size *ux, 9 | ST_double *V, 10 | ST_double *VV, 11 | ST_double *X, 12 | ST_double *XX, 13 | ST_double *se, 14 | GT_size *colix, 15 | GT_size N, 16 | GT_size kx, 17 | GT_size kmodel, 18 | gf_regress_vceadj vceadj) 19 | { 20 | GT_size i, j, k, start, end, kindep; 21 | ST_double qc, *aptr, *bptr; 22 | 23 | kindep = colix[kx]; 24 | memset(U, '\0', J * kindep * sizeof(ST_double)); 25 | for (j = 0; j < J; j++) { 26 | start = info[j]; 27 | end = info[j + 1]; 28 | for (i = start; i < end; i++) { 29 | ux[index[i]] = j; 30 | } 31 | } 32 | 33 | if ( kindep < kx ) { 34 | for (k = 0; k < kindep; k++) { 35 | aptr = X + colix[k] * N; 36 | bptr = e; 37 | for (i = 0; i < N; i++, aptr++, bptr++) { 38 | U[ux[i] * kindep + k] += (*aptr) * (*bptr); 39 | } 40 | } 41 | } 42 | else { 43 | aptr = X; 44 | for (k = 0; k < kindep; k++) { 45 | bptr = e; 46 | for (i = 0; i < N; i++, aptr++, bptr++) { 47 | U[ux[i] * kindep + k] += (*aptr) * (*bptr); 48 | } 49 | } 50 | } 51 | 52 | gf_regress_linalg_dsymm_rowmajor (U, U, V, J, kindep); 53 | gf_regress_linalg_dgemm_colmajor (XX, V, VV, kindep, kindep, kindep); 54 | gf_regress_linalg_dgemm_colmajor (VV, XX, V, kindep, kindep, kindep); 55 | 56 | qc = vceadj(N, kmodel, J, w); 57 | for (i = 0; i < kindep; i++) { 58 | se[i] = sqrt(V[i * kindep + i] * qc); 59 | } 60 | } 61 | 62 | void gf_regress_ols_cluster_wcolmajor( 63 | ST_double *e, 64 | ST_double *w, 65 | GT_size *info, 66 | GT_size *index, 67 | GT_size J, 68 | ST_double *U, 69 | GT_size *ux, 70 | ST_double *V, 71 | ST_double *VV, 72 | ST_double *X, 73 | ST_double *XX, 74 | ST_double *se, 75 | GT_size *colix, 76 | GT_size N, 77 | GT_size kx, 78 | GT_size kmodel, 79 | gf_regress_vceadj vceadj) 80 | { 81 | GT_size i, j, k, start, end, kindep; 82 | ST_double qc, *aptr, *bptr, *wptr; 83 | 84 | kindep = colix[kx]; 85 | memset(U, '\0', J * kindep * sizeof(ST_double)); 86 | for (j = 0; j < J; j++) { 87 | start = info[j]; 88 | end = info[j + 1]; 89 | for (i = start; i < end; i++) { 90 | ux[index[i]] = j; 91 | } 92 | } 93 | 94 | if ( kindep < kx ) { 95 | for (k = 0; k < kindep; k++) { 96 | aptr = X + colix[k] * N; 97 | bptr = e; 98 | wptr = w; 99 | for (i = 0; i < N; i++, aptr++, bptr++, wptr++) { 100 | U[ux[i] * kindep + k] += (*aptr) * (*bptr) * (*wptr); 101 | } 102 | } 103 | } 104 | else { 105 | aptr = X; 106 | for (k = 0; k < kindep; k++) { 107 | bptr = e; 108 | wptr = w; 109 | for (i = 0; i < N; i++, aptr++, bptr++, wptr++) { 110 | U[ux[i] * kindep + k] += (*aptr) * (*bptr) * (*wptr); 111 | } 112 | } 113 | } 114 | 115 | gf_regress_linalg_dsymm_rowmajor (U, U, V, J, kindep); 116 | gf_regress_linalg_dgemm_colmajor (XX, V, VV, kindep, kindep, kindep); 117 | gf_regress_linalg_dgemm_colmajor (VV, XX, V, kindep, kindep, kindep); 118 | 119 | qc = vceadj(N, kmodel, J, w); 120 | for (i = 0; i < kindep; i++) { 121 | se[i] = sqrt(V[i * kindep + i] * qc); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/plugin/regress/vce/heteroskedastic.c: -------------------------------------------------------------------------------- 1 | void gf_regress_ols_robust_colmajor( 2 | ST_double *e, 3 | ST_double *w, 4 | ST_double *V, 5 | ST_double *VV, 6 | ST_double *X, 7 | ST_double *XX, 8 | ST_double *se, 9 | GT_size *colix, 10 | GT_size N, 11 | GT_size kx, 12 | GT_size kmodel, 13 | gf_regress_vceadj vceadj) 14 | { 15 | GT_size i; 16 | ST_double qc = vceadj(N, kmodel, 0, w); 17 | GT_size kindep = colix[kx]; 18 | 19 | // Compute D = X' diag(e) X 20 | if ( kindep < kx ) { 21 | gf_regress_linalg_dsymm_w2colmajor_ix (X, X, V, e, colix, N, kindep); 22 | } 23 | else { 24 | gf_regress_linalg_dsymm_w2colmajor (X, X, V, e, N, kx); 25 | } 26 | 27 | // Compute V = (X' X)^-1 D (X' X)^-1 28 | gf_regress_linalg_dgemm_colmajor (XX, V, VV, kindep, kindep, kindep); 29 | gf_regress_linalg_dgemm_colmajor (VV, XX, V, kindep, kindep, kindep); 30 | 31 | // Extract standard errors from diag(V) 32 | for (i = 0; i < kindep; i++) { 33 | se[i] = sqrt(V[i * kindep + i] * qc); 34 | } 35 | } 36 | 37 | void gf_regress_ols_robust_wcolmajor( 38 | ST_double *e, 39 | ST_double *w, 40 | ST_double *V, 41 | ST_double *VV, 42 | ST_double *X, 43 | ST_double *XX, 44 | ST_double *se, 45 | GT_size *colix, 46 | GT_size N, 47 | GT_size kx, 48 | GT_size kmodel, 49 | gf_regress_vceadj vceadj) 50 | { 51 | GT_size i; 52 | ST_double qc = vceadj(N, kmodel, 0, w); 53 | GT_size kindep = colix[kx]; 54 | 55 | if ( kindep < kx ) { 56 | gf_regress_linalg_dsymm_we2colmajor_ix (X, X, V, e, w, colix, N, kindep); 57 | } 58 | else { 59 | gf_regress_linalg_dsymm_we2colmajor (X, X, V, e, w, N, kx); 60 | } 61 | 62 | gf_regress_linalg_dgemm_colmajor (XX, V, VV, kindep, kindep, kindep); 63 | gf_regress_linalg_dgemm_colmajor (VV, XX, V, kindep, kindep, kindep); 64 | 65 | for (i = 0; i < kindep; i++) { 66 | se[i] = sqrt(V[i * kindep + i] * qc); 67 | } 68 | } 69 | 70 | void gf_regress_ols_robust_fwcolmajor( 71 | ST_double *e, 72 | ST_double *w, 73 | ST_double *V, 74 | ST_double *VV, 75 | ST_double *X, 76 | ST_double *XX, 77 | ST_double *se, 78 | GT_size *colix, 79 | GT_size N, 80 | GT_size kx, 81 | GT_size kmodel, 82 | gf_regress_vceadj vceadj) 83 | { 84 | GT_size i; 85 | ST_double qc = vceadj(N, kmodel, 0, w); 86 | GT_size kindep = colix[kx]; 87 | 88 | if ( kindep < kx ) { 89 | gf_regress_linalg_dsymm_fwe2colmajor_ix (X, X, V, e, w, colix, N, kindep); 90 | } 91 | else { 92 | gf_regress_linalg_dsymm_fwe2colmajor (X, X, V, e, w, N, kx); 93 | } 94 | 95 | gf_regress_linalg_dgemm_colmajor (XX, V, VV, kindep, kindep, kindep); 96 | gf_regress_linalg_dgemm_colmajor (VV, XX, V, kindep, kindep, kindep); 97 | 98 | for (i = 0; i < kindep; i++) { 99 | se[i] = sqrt(V[i * kindep + i] * qc); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/plugin/regress/vce/homoskedastic.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @brief Compute homo SE for OLS 3 | * 4 | * @e N length array of error terms 5 | * @V kx by kx matrix with (X' X)^-1 6 | * @se Array where to store SE 7 | * @N Number of obs 8 | * @kx Number of columns in A 9 | * @return Store sqrt(diag(sum(@e^2 / (@N - @kx)) * @V)) in @se 10 | */ 11 | void gf_regress_ols_seunw ( 12 | ST_double *e, 13 | ST_double *w, 14 | ST_double *V, 15 | ST_double *se, 16 | GT_size *colix, 17 | GT_size N, 18 | GT_size kx, 19 | GT_size kmodel) 20 | { 21 | GT_size i; 22 | ST_double z, *eptr; 23 | GT_size kindep = colix[kx]; 24 | 25 | z = 0; 26 | for (eptr = e; eptr < e + N; eptr++) { 27 | z += (*eptr) * (*eptr); 28 | } 29 | z /= ((ST_double) (N - kmodel)); 30 | 31 | for (i = 0; i < kindep; i++) { 32 | se[i] = sqrt(V[i * kindep + i] * z); 33 | } 34 | } 35 | 36 | void gf_regress_ols_sew ( 37 | ST_double *e, 38 | ST_double *w, 39 | ST_double *V, 40 | ST_double *se, 41 | GT_size *colix, 42 | GT_size N, 43 | GT_size kx, 44 | GT_size kmodel) 45 | { 46 | GT_size i; 47 | ST_double *eptr; 48 | ST_double z = 0; 49 | ST_double *wptr = w; 50 | GT_size kindep = colix[kx]; 51 | 52 | for (eptr = e; eptr < e + N; eptr++, wptr++) { 53 | z += (*eptr) * (*eptr) * (*wptr); 54 | } 55 | z /= ((ST_double) (N - kmodel)); 56 | 57 | for (i = 0; i < kindep; i++) { 58 | se[i] = sqrt(V[i * kindep + i] * z); 59 | } 60 | } 61 | 62 | void gf_regress_ols_sefw ( 63 | ST_double *e, 64 | ST_double *w, 65 | ST_double *V, 66 | ST_double *se, 67 | GT_size *colix, 68 | GT_size N, 69 | GT_size kx, 70 | GT_size kmodel) 71 | { 72 | GT_size i; 73 | ST_double *eptr; 74 | ST_double z = 0; 75 | ST_double Ndbl = 0; 76 | ST_double *wptr = w; 77 | GT_size kindep = colix[kx]; 78 | 79 | for (eptr = e; eptr < e + N; eptr++, wptr++) { 80 | z += (*eptr) * (*eptr) * (*wptr); 81 | Ndbl += *wptr; 82 | } 83 | z /= (Ndbl - kmodel); 84 | 85 | for (i = 0; i < kindep; i++) { 86 | se[i] = sqrt(V[i * kindep + i] * z); 87 | } 88 | } 89 | 90 | void gf_regress_ols_copyvcov ( 91 | ST_double *V, 92 | ST_double *XX, 93 | GT_size kx, 94 | GT_size *colix) 95 | { 96 | GT_size i, j, kindep = colix[kx]; 97 | for (i = 0; i < kindep; i++) { 98 | for (j = 0; j < kindep; j++) { 99 | V[i * kindep + j] = XX[i * kindep + j]; 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/plugin/regress/vce/vceadj.c: -------------------------------------------------------------------------------- 1 | ST_double gf_regress_vceadj_ols_robust( 2 | GT_size N, 3 | GT_size kmodel, 4 | GT_size J, 5 | ST_double *w) 6 | { 7 | ST_double Ndbl = N; 8 | return(Ndbl / (Ndbl - kmodel)); 9 | } 10 | 11 | ST_double gf_regress_vceadj_ols_cluster( 12 | GT_size N, 13 | GT_size kmodel, 14 | GT_size J, 15 | ST_double *w) 16 | { 17 | ST_double Ndbl = N; 18 | ST_double Jdbl = J; 19 | return(((Ndbl - 1) / (Ndbl - kmodel)) * (Jdbl / (Jdbl - 1))); 20 | } 21 | 22 | ST_double gf_regress_vceadj_mle_robust( 23 | GT_size N, 24 | GT_size kmodel, 25 | GT_size J, 26 | ST_double *w) 27 | { 28 | ST_double Ndbl = N; 29 | return(Ndbl / (Ndbl - 1)); 30 | } 31 | 32 | ST_double gf_regress_vceadj_mle_cluster( 33 | GT_size N, 34 | GT_size kmodel, 35 | GT_size J, 36 | ST_double *w) 37 | { 38 | ST_double Jdbl = J; 39 | return(Jdbl / (Jdbl - 1)); 40 | } 41 | 42 | ST_double gf_regress_vceadj_ols_robust_fw( 43 | GT_size N, 44 | GT_size kmodel, 45 | GT_size J, 46 | ST_double *w) 47 | { 48 | GT_size i; 49 | ST_double Ndbl = 0; 50 | for (i = 0; i < N; i++) { 51 | Ndbl += w[i]; 52 | } 53 | return(Ndbl / (Ndbl - kmodel)); 54 | } 55 | 56 | ST_double gf_regress_vceadj_ols_cluster_fw( 57 | GT_size N, 58 | GT_size kmodel, 59 | GT_size J, 60 | ST_double *w) 61 | { 62 | GT_size i; 63 | ST_double Ndbl = 0; 64 | ST_double Jdbl = J; 65 | for (i = 0; i < N; i++) { 66 | Ndbl += w[i]; 67 | } 68 | return(((Ndbl - 1) / (Ndbl - kmodel)) * (Jdbl / (Jdbl - 1))); 69 | } 70 | 71 | ST_double gf_regress_vceadj_mle_robust_fw( 72 | GT_size N, 73 | GT_size kmodel, 74 | GT_size J, 75 | ST_double *w) 76 | { 77 | GT_size i; 78 | ST_double Ndbl = 0; 79 | for (i = 0; i < N; i++) { 80 | Ndbl += w[i]; 81 | } 82 | return(Ndbl / (Ndbl - 1)); 83 | } 84 | 85 | ST_double gf_regress_vceadj_mle_cluster_fw( 86 | GT_size N, 87 | GT_size kmodel, 88 | GT_size J, 89 | ST_double *w) 90 | { 91 | ST_double Jdbl = J; 92 | return(Jdbl / (Jdbl - 1)); 93 | } 94 | -------------------------------------------------------------------------------- /src/plugin/spi: -------------------------------------------------------------------------------- 1 | lib/spi-3.0 -------------------------------------------------------------------------------- /src/plugin/stats/gstats.c: -------------------------------------------------------------------------------- 1 | #include "gstats.h" 2 | #include "hdfe.c" 3 | #include "winsor.c" 4 | #include "summarize.c" 5 | #include "transform.c" 6 | 7 | ST_retcode sf_stats (struct StataInfo *st_info, int level, char *fname) 8 | { 9 | 10 | if ( st_info->gstats_code == 1 ) { 11 | return (sf_stats_winsor(st_info, level)); 12 | } 13 | else if ( st_info->gstats_code == 2 ) { 14 | return (sf_stats_summarize(st_info, level, fname)); 15 | } 16 | else if ( st_info->gstats_code == 3 ) { 17 | return (sf_stats_transform(st_info, level)); 18 | } 19 | else if ( st_info->gstats_code == 4 ) { 20 | return (sf_stats_hdfe(st_info, level)); 21 | } 22 | else { 23 | sf_errprintf("Unknown gstats code; error in sf_stats."); 24 | return (198); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/plugin/stats/gstats.h: -------------------------------------------------------------------------------- 1 | #ifndef GSTATS 2 | #define GSTATS 3 | 4 | ST_retcode sf_stats (struct StataInfo *st_info, int level, char *fname); 5 | ST_retcode sf_stats_winsor (struct StataInfo *st_info, int level); 6 | ST_retcode sf_stats_summarize (struct StataInfo *st_info, int level, char *fname); 7 | ST_retcode sf_stats_summarize_p (struct StataInfo *st_info, int level, char *fname); 8 | ST_retcode sf_stats_summarize_w (struct StataInfo *st_info, int level, char *fname); 9 | ST_retcode sf_stats_transform (struct StataInfo *st_info, int level); 10 | ST_retcode sf_stats_hdfe (struct StataInfo *st_info, int level); 11 | 12 | void sf_stats_hdfe_index ( 13 | struct StataInfo *st_info, 14 | GT_size *index_st); 15 | 16 | ST_retcode sf_stats_hdfe_read ( 17 | struct StataInfo *st_info, 18 | ST_double *X, 19 | ST_double *w, 20 | void *FE, 21 | GT_size *nj, 22 | GT_size *index_st); 23 | 24 | ST_retcode sf_stats_hdfe_write ( 25 | struct StataInfo *st_info, 26 | ST_double *X, 27 | GT_size *nj, 28 | GT_size *index_st); 29 | 30 | ST_retcode sf_stats_hdfe_absorb( 31 | struct GtoolsHash *AbsorbHashes, 32 | GtoolsAlgorithmHDFE AlgorithmHDFE, 33 | ST_double *stats, 34 | GT_size *maps, 35 | GT_size J, 36 | GT_size kabs, 37 | GT_size kx, 38 | GT_size *nj, 39 | GT_size *njptr, 40 | ST_double *xptr, 41 | ST_double *wptr, 42 | ST_double hdfetol, 43 | GT_size benchmark); 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/stata.toc: -------------------------------------------------------------------------------- 1 | v 1.11.8 2 | d Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com 3 | p 'GTOOLS': Faster implementation of common Stata commands for big data 4 | -------------------------------------------------------------------------------- /src/test/bench_v2/glevelsof: -------------------------------------------------------------------------------- 1 | version| id| N| J| stata| gtools| ratio 2 | 2| 1| 100000| 10| .087| .133| .65413533834586 3 | 2| 2| 100000| 10| .023| .018| 1.2777777777778 4 | 2| 3| 100000| 10| .025| .024| 1.0416666666667 5 | 2| 4| 100000| 10| .024| .024| 1 6 | 2| 5| 100000| 10| .171| .022| 7.7727272727273 7 | 2| 6| 100000| 10| .153| .021| 7.2857142857143 8 | 2| 7| 1000000| 10| .208| .106| 1.9622641509434 9 | 2| 8| 1000000| 10| .174| .103| 1.6893203883495 10 | 2| 9| 1000000| 10| .206| .149| 1.3825503355705 11 | 2| 10| 1000000| 10| .206| .144| 1.4305555555556 12 | 2| 11| 1000000| 10| 2.848| .141| 20.198581560284 13 | 2| 12| 1000000| 10| 3.063| .141| 21.723404255319 14 | 2| 13| 10000000| 10| 2.066| .964| 2.143153526971 15 | 2| 14| 10000000| 10| 1.868| 1.085| 1.7216589861751 16 | 2| 15| 10000000| 10| 2.242| 1.617| 1.3865182436611 17 | 2| 16| 10000000| 10| 2.058| 1.275| 1.6141176470588 18 | 2| 17| 10000000| 10| 52.763| 1.461| 36.114305270363 19 | 2| 18| 10000000| 10| 52.36| 1.387| 37.7505407354 20 | -------------------------------------------------------------------------------- /src/test/bench_v2/gquantiles_by: -------------------------------------------------------------------------------- 1 | version| id| N| J| stata| gtools| ratio 2 | 2| 1| 100000| 10| .242| .116| 2.0862068965517 3 | 2| 2| 100000| 10| .191| .053| 3.6037735849057 4 | 2| 3| 100000| 10| .164| .057| 2.8771929824561 5 | 2| 4| 100000| 10| .173| .059| 2.9322033898305 6 | 2| 5| 100000| 10| .176| .058| 3.0344827586207 7 | 2| 6| 100000| 10| .195| .061| 3.1967213114754 8 | 2| 7| 100000| 10| .19| .061| 3.1147540983607 9 | 2| 8| 1000000| 10| 2.109| .486| 4.3395061728395 10 | 2| 9| 1000000| 10| 2.118| .516| 4.1046511627907 11 | 2| 10| 1000000| 10| 2.063| .51| 4.0450980392157 12 | 2| 11| 1000000| 10| 2.132| .579| 3.6822107081174 13 | 2| 12| 1000000| 10| 2.207| .521| 4.236084452975 14 | 2| 13| 1000000| 10| 2.412| .543| 4.4419889502762 15 | 2| 14| 1000000| 10| 2.44| .544| 4.4852941176471 16 | 2| 15| 10000000| 10| 28.178| 5.258| 5.3590718904526 17 | 2| 16| 10000000| 10| 28.556| 5.396| 5.2920681986657 18 | 2| 17| 10000000| 10| 28.04| 5.297| 5.2935623938078 19 | 2| 18| 10000000| 10| 29.124| 5.883| 4.9505354411015 20 | 2| 19| 10000000| 10| 30.825| 5.443| 5.6632371853757 21 | 2| 20| 10000000| 10| 32.669| 5.638| 5.7944306491664 22 | 2| 21| 10000000| 10| 31.434| 5.63| 5.5833037300178 23 | -------------------------------------------------------------------------------- /src/test/bench_v2/gquantiles_pctile: -------------------------------------------------------------------------------- 1 | version| id| N| J| stata| gtools| ratio 2 | 2| 1| 100000| 10| .079| .024| 3.2916666666667 3 | 2| 2| 1000000| 10| .871| .154| 5.6558441558442 4 | 2| 3| 10000000| 10| 14.8| 1.605| 9.2211838006231 5 | -------------------------------------------------------------------------------- /src/test/bench_v2/gquantiles_xtile: -------------------------------------------------------------------------------- 1 | version| id| N| J| stata| gtools| ratio 2 | 2| 1| 100000| 10| .351| .098| 3.5816326530612 3 | 2| 2| 1000000| 10| 3.566| .288| 12.381944444444 4 | 2| 3| 10000000| 10| 50.886| 2.856| 17.817226890756 5 | -------------------------------------------------------------------------------- /src/test/bench_v2/gstats_sum: -------------------------------------------------------------------------------- 1 | version| id| N| J| stata| gtools| ratio 2 | 2| 1| 100000| 10| .09| .154| .58441558441558 3 | 2| 2| 1000000| 10| 1.219| .336| 3.6279761904762 4 | 2| 3| 10000000| 10| 19.135| 3.844| 4.9778876170656 5 | -------------------------------------------------------------------------------- /src/test/bench_v2/gstats_tab: -------------------------------------------------------------------------------- 1 | version| id| N| J| stata| gtools| ratio 2 | 2| 1| 100000| 10| .249| .292| .8527397260274 3 | 2| 2| 100000| 10| .235| .035| 6.7142857142857 4 | 2| 3| 100000| 10| .252| .038| 6.6315789473684 5 | 2| 4| 100000| 10| .245| .039| 6.2820512820513 6 | 2| 5| 100000| 10| .242| .033| 7.3333333333333 7 | 2| 6| 100000| 10| .257| .035| 7.3428571428571 8 | 2| 7| 1000000| 10| 2.254| .224| 10.0625 9 | 2| 8| 1000000| 10| 1.708| .217| 7.8709677419355 10 | 2| 9| 1000000| 10| 2.1| .246| 8.5365853658537 11 | 2| 10| 1000000| 10| 2.008| .259| 7.7528957528958 12 | 2| 11| 1000000| 10| 2.382| .248| 9.6048387096774 13 | 2| 12| 1000000| 10| 2.792| .24| 11.633333333333 14 | 2| 13| 10000000| 10| 28.533| 2.405| 11.864033264033 15 | 2| 14| 10000000| 10| 23.831| 2.252| 10.58214920071 16 | 2| 15| 10000000| 10| 28.836| 2.711| 10.636665437108 17 | 2| 16| 10000000| 10| 27.756| 2.613| 10.622273249139 18 | 2| 17| 10000000| 10| 27.384| 2.492| 10.988764044944 19 | 2| 18| 10000000| 10| 33.357| 2.627| 12.69775409212 20 | -------------------------------------------------------------------------------- /src/test/bench_v2/gstats_winsor: -------------------------------------------------------------------------------- 1 | version| id| N| J| stata| gtools| ratio 2 | 2| 1| 100000| 10| .164| .027| 6.0740740740741 3 | 2| 2| 1000000| 10| 1.875| .628| 2.9856687898089 4 | 2| 3| 10000000| 10| 22.507| 4.983| 4.5167569737106 5 | -------------------------------------------------------------------------------- /src/test/bench_v2/gstats_winsor_by: -------------------------------------------------------------------------------- 1 | version| id| N| J| stata| gtools| ratio 2 | 2| 1| 100000| 10| .566| .059| 9.5932203389831 3 | 2| 2| 100000| 10| .547| .033| 16.575757575758 4 | 2| 3| 100000| 10| .479| .034| 14.088235294118 5 | 2| 4| 100000| 10| .517| .04| 12.925 6 | 2| 5| 100000| 10| .516| .034| 15.176470588235 7 | 2| 6| 100000| 10| .664| .037| 17.945945945946 8 | 2| 7| 100000| 10| .689| .041| 16.80487804878 9 | 2| 8| 1000000| 10| 7.242| .29| 24.972413793103 10 | 2| 9| 1000000| 10| 7.772| .343| 22.65889212828 11 | 2| 10| 1000000| 10| 6.87| .313| 21.948881789137 12 | 2| 11| 1000000| 10| 7.721| .389| 19.848329048843 13 | 2| 12| 1000000| 10| 7.561| .44| 17.184090909091 14 | 2| 13| 1000000| 10| 9.694| .353| 27.461756373938 15 | 2| 14| 1000000| 10| 9.235| .346| 26.690751445087 16 | 2| 15| 10000000| 10| 97.966| 2.855| 34.313835376532 17 | 2| 16| 10000000| 10| 106.955| 3.063| 34.918380672543 18 | 2| 17| 10000000| 10| 91.639| 3.014| 30.404445919044 19 | 2| 18| 10000000| 10| 103.061| 3.555| 28.990436005626 20 | 2| 19| 10000000| 10| 97.148| 3.108| 31.2574002574 21 | 2| 20| 10000000| 10| 120.579| 3.317| 36.351823937293 22 | 2| 21| 10000000| 10| 122.325| 3.357| 36.438784629133 23 | -------------------------------------------------------------------------------- /src/test/bench_v2/material.json: -------------------------------------------------------------------------------- 1 | /home/mauricio/code/stata-gtools/docs/benchmarks/material.json -------------------------------------------------------------------------------- /src/test/test_benchmarks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # import matplotlib.pyplot as plt 5 | import pandas as pd 6 | # import numpy as np 7 | import json 8 | 9 | palette = json.loads(open('bench_v2/material.json').read()) 10 | df = pd.read_csv('bench_v2/gisid', delimiter = '|') 11 | 12 | # df['ix'] = np.arange(df.shape[0]) 13 | # df[' '] = df[' '].astype('category') 14 | 15 | # int1 16 | # int1 int2 17 | # double1 18 | # double1 double2 19 | # str_short 20 | # str_short str_long 21 | # int1 double1 str_mid 22 | -------------------------------------------------------------------------------- /src/test/test_pthreads.do: -------------------------------------------------------------------------------- 1 | version 13 2 | clear all 3 | set more off 4 | set varabbrev off 5 | set seed 1729 6 | set linesize 255 7 | 8 | if ( inlist("`c(os)'", "MacOSX") | strpos("`c(machine_type)'", "Mac") ) { 9 | local c_os_ macosx 10 | } 11 | else { 12 | local c_os_: di lower("`c(os)'") 13 | } 14 | log using gtools_pthreads_`c_os_'.log, text replace name(gtools_pthreads) 15 | 16 | set obs 1000 17 | gen rand = runiform() 18 | expand 20000 19 | 20 | global GTOOLS_FORCE_PARALLEL = 1 21 | gunique rand, b 22 | log close gtools_pthreads 23 | --------------------------------------------------------------------------------