├── .appveyor.yml
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
└── workflows
│ ├── linux.yml
│ └── osx.yml
├── .gitignore
├── .gitmodules
├── LICENSE
├── Makefile
├── README.md
├── build.py
├── build
├── _gtools_internal.ado
├── _gtools_internal.mata
├── changelog.md
├── fasterxtile.ado
├── fasterxtile.sthlp
├── gcollapse.ado
├── gcollapse.sthlp
├── gcontract.ado
├── gcontract.sthlp
├── gdistinct.ado
├── gdistinct.sthlp
├── gduplicates.ado
├── gduplicates.sthlp
├── gegen.ado
├── gegen.sthlp
├── gglm.ado
├── gglm.sthlp
├── gisid.ado
├── gisid.sthlp
├── givregress.ado
├── givregress.sthlp
├── glevelsof.ado
├── glevelsof.sthlp
├── gpoisson.ado
├── gpoisson.sthlp
├── gquantiles.ado
├── gquantiles.sthlp
├── greg.ado
├── greg.sthlp
├── gregress.ado
├── gregress.sthlp
├── greshape.ado
├── greshape.sthlp
├── gstats.ado
├── gstats.sthlp
├── gstats_hdfe.sthlp
├── gstats_moving.sthlp
├── gstats_range.sthlp
├── gstats_residualize.sthlp
├── gstats_sum.sthlp
├── gstats_summarize.sthlp
├── gstats_tab.sthlp
├── gstats_transform.sthlp
├── gstats_winsor.sthlp
├── gtools.ado
├── gtools.pkg
├── gtools.sthlp
├── gtools_macosx_v2.plugin
├── gtools_macosx_v3.plugin
├── gtools_tests.do
├── gtools_tests_macosx.log
├── gtools_tests_unix.log
├── gtools_tests_windows.log
├── gtools_unix_v2.plugin
├── gtools_unix_v3.plugin
├── gtools_windows_v2.plugin
├── gtools_windows_v3.plugin
├── gtop.ado
├── gtop.sthlp
├── gtoplevelsof.ado
├── gtoplevelsof.sthlp
├── gunique.ado
├── gunique.sthlp
├── hashsort.ado
├── hashsort.sthlp
├── lgtools.mlib
└── stata.toc
├── changelog.md
├── docs
├── benchmarks.md
├── benchmarks
│ ├── material.json
│ ├── quick.do
│ ├── quick.png
│ ├── quick.py
│ └── quickdark.png
├── compiling.md
├── css
│ ├── extra-material-dark.css
│ ├── extra-material-dark.scss
│ ├── extra-material.css
│ └── extra-rtd.css
├── examples
│ ├── gcollapse.do
│ ├── gcontract.do
│ ├── gdistinct.do
│ ├── gduplicates.do
│ ├── gegen.do
│ ├── gglm.do
│ ├── gisid.do
│ ├── givregress.do
│ ├── glevelsof.do
│ ├── gquantiles.do
│ ├── gregress.do
│ ├── greshape.do
│ ├── gstats_hdfe.do
│ ├── gstats_summarize.do
│ ├── gstats_transform.do
│ ├── gstats_winsor.do
│ ├── gtoplevelsof.do
│ ├── gunique.do
│ └── hashsort.do
├── faqs.md
├── helpers.js
├── index.md
├── stata
│ ├── fasterxtile.sthlp
│ ├── gcollapse.sthlp
│ ├── gcontract.sthlp
│ ├── gdistinct.sthlp
│ ├── gduplicates.sthlp
│ ├── gegen.sthlp
│ ├── generic.sthlp
│ ├── gglm.sthlp
│ ├── gisid.sthlp
│ ├── givregress.sthlp
│ ├── glevelsof.sthlp
│ ├── gquantiles.sthlp
│ ├── greg.sthlp
│ ├── gregress.sthlp
│ ├── greshape.sthlp
│ ├── gstats.sthlp
│ ├── gstats_hdfe.sthlp
│ ├── gstats_moving.sthlp
│ ├── gstats_range.sthlp
│ ├── gstats_residualize.sthlp
│ ├── gstats_sum.sthlp
│ ├── gstats_summarize.sthlp
│ ├── gstats_tab.sthlp
│ ├── gstats_transform.sthlp
│ ├── gstats_winsor.sthlp
│ ├── gtools.sthlp
│ ├── gtop.sthlp
│ ├── gtoplevelsof.sthlp
│ ├── gunique.sthlp
│ └── hashsort.sthlp
└── usage
│ ├── gcollapse.md
│ ├── gcontract.md
│ ├── gdistinct.md
│ ├── gduplicates.md
│ ├── gegen.md
│ ├── gglm.md
│ ├── gisid.md
│ ├── givregress.md
│ ├── glevelsof.md
│ ├── gquantiles.md
│ ├── gregress.md
│ ├── greshape.md
│ ├── gstats_hdfe.md
│ ├── gstats_summarize.md
│ ├── gstats_transform.md
│ ├── gstats_winsor.md
│ ├── gtools.md
│ ├── gtoplevelsof.md
│ ├── gunique.md
│ └── hashsort.md
├── lib
├── bumpver.py
├── id_rsa_travis.enc
├── material.json
├── plugin
│ ├── gtools_macosx_v2.plugin
│ ├── gtools_macosx_v3.plugin
│ ├── gtools_unix_v2.plugin
│ ├── gtools_unix_v3.plugin
│ ├── gtools_windows_v2.plugin
│ ├── gtools_windows_v3.plugin
│ └── lgtools.mlib
├── spi-2.0
│ ├── stplugin.c
│ └── stplugin.h
├── spi-3.0
│ ├── stplugin.c
│ └── stplugin.h
└── update_version.py
├── mkdocs.yml
└── src
├── ado
├── _gtools_internal.ado
├── _gtools_internal.mata
├── fasterxtile.ado
├── gcollapse.ado
├── gcontract.ado
├── gdistinct.ado
├── gduplicates.ado
├── gegen.ado
├── gglm.ado
├── gisid.ado
├── givregress.ado
├── glevelsof.ado
├── gpoisson.ado
├── gquantiles.ado
├── greg.ado
├── gregress.ado
├── greshape.ado
├── gstats.ado
├── gtools.ado
├── gtop.ado
├── gtoplevelsof.ado
├── gunique.ado
└── hashsort.ado
├── github-issues
├── 29
│ └── reproduce.ado
├── 35
│ ├── Makefile
│ ├── spookyhash.dll
│ ├── spookyhash_api.h
│ ├── stplugin.c
│ ├── stplugin.h
│ ├── test.do
│ ├── test1.c
│ ├── test1.plugin
│ ├── test2.c
│ └── test2.plugin
├── 40
│ ├── gtools-1.3.3.zip
│ ├── plugin.zip
│ ├── plugin
│ │ ├── Makefile
│ │ ├── libspookyhash.a
│ │ ├── spookyhash_api.h
│ │ ├── stplugin.c
│ │ ├── stplugin.h
│ │ ├── test.do
│ │ ├── test1.c
│ │ ├── test1.plugin
│ │ ├── test2.c
│ │ └── test2.plugin
│ └── test.do
├── 45
│ └── test.do
├── 48
│ └── test.do
├── 60
│ ├── plugin.zip
│ └── plugin
│ │ ├── Makefile
│ │ ├── lib
│ │ └── spookyhash
│ │ │ └── src
│ │ │ ├── context.c
│ │ │ ├── context.h
│ │ │ ├── globals.c
│ │ │ ├── globals.h
│ │ │ ├── spookyhash.c
│ │ │ ├── spookyhash.h
│ │ │ └── spookyhash_api.h
│ │ ├── stplugin.c
│ │ ├── stplugin.h
│ │ ├── test.do
│ │ ├── test1.c
│ │ ├── test1.plugin
│ │ ├── test2.c
│ │ └── test2.plugin
├── 65
│ ├── estpost.ado
│ └── test.do
├── 67
│ ├── test-transform.do
│ ├── test.do
│ ├── test.orig.do
│ └── test.short.do
├── 71
│ └── test.do
├── 72
│ └── issue.do
├── 78
│ ├── ifin.do
│ └── test.do
├── 88
│ └── test.do
├── 30a
│ └── replicate.do
├── 30b
│ ├── replicate.do
│ └── reply01.do
├── debug-1
│ └── savehdfe.do
├── debug-2
│ ├── test-median.do
│ └── test-noobs.do
├── debug-3
│ └── test-excludeself.do
├── debug-4
│ ├── test_excludeself.do
│ ├── test_gcollapse.do
│ ├── test_gquantiles.do
│ ├── test_greshape.do
│ ├── test_gtop.do
│ ├── test_gunique.do
│ ├── test_hash.do
│ └── test_replace.do
├── debug-5
│ └── test.do
├── debug-6
│ └── greg-coredump.do
├── debug-7
│ └── quantiles.do
├── debug-8
│ └── test.do
├── debug-9
│ └── test.do
├── email-10
│ ├── bug.do
│ ├── fix.do
│ ├── gtools_test.do
│ └── mwe.do
├── email-11
│ └── test.do
├── email-12
│ ├── test.do
│ └── test2.do
├── email-13
│ ├── dec_gtools.dta
│ ├── dec_manual.dta
│ ├── dec_stata.dta
│ └── example.do
└── email-14
│ ├── gtools_bug.dta
│ └── test.do
├── gtools.pkg
├── plugin
├── api
│ ├── groupby.c
│ ├── groupby.h
│ ├── groupby
│ │ ├── accelerators.c
│ │ ├── alphas.c
│ │ ├── base.c
│ │ ├── berge.c
│ │ ├── stats.c
│ │ ├── stats_unweighted.c
│ │ ├── stats_weights.c
│ │ ├── transforms.c
│ │ ├── transforms_unweighted.c
│ │ └── transforms_weights.c
│ ├── hashing.c
│ ├── hashing.h
│ └── hashing
│ │ ├── bijection.c
│ │ ├── panelsetup.c
│ │ ├── radix.c
│ │ └── utils.c
├── collapse
│ ├── gegen.c
│ ├── gegen_w.c
│ ├── gtools_math.c
│ ├── gtools_math.h
│ ├── gtools_math_unw.c
│ ├── gtools_math_unw.h
│ ├── gtools_math_w.c
│ ├── gtools_math_w.h
│ ├── gtools_nunique.c
│ ├── gtools_utils.c
│ ├── gtools_utils.h
│ └── qselect.c
├── common
│ ├── RadixSort.c
│ ├── RadixSortGeneric.c
│ ├── RadixSortTesting
│ ├── RadixSortTesting.c
│ ├── RadixSortTyped.c
│ ├── RadixSortTypedIndex.c
│ ├── encode.c
│ ├── fixes.c
│ ├── gttypes.h
│ ├── quicksort.c
│ ├── quicksortComparators.c
│ ├── quicksortMultiLevel.c
│ ├── quicksortMultiLevelMlast.c
│ ├── readWrite.c
│ ├── sf_printf.c
│ ├── sf_printf.h
│ ├── sf_wrappers.c
│ └── sf_wrappers.h
├── extra
│ ├── gcontract.c
│ ├── gisid.c
│ ├── glevelsof.c
│ ├── greshape.c
│ ├── greshape_fast.c
│ ├── gtop.c
│ └── hashsort.c
├── gtools.c
├── gtools.h
├── hash
│ ├── gtools_hash.c
│ ├── gtools_hash.h
│ ├── gtools_hash_fast.c
│ ├── gtools_sort.c
│ └── gtools_sort.h
├── lib
├── quantiles
│ ├── gquantiles.c
│ ├── gquantiles_by.c
│ ├── gquantiles_math.c
│ ├── gquantiles_math.h
│ ├── gquantiles_math_w.c
│ ├── gquantiles_math_w.h
│ ├── gquantiles_utils.c
│ └── gquantiles_utils.h
├── regress
│ ├── gregress.c
│ ├── gregress.h
│ ├── linalg
│ │ ├── colmajor.c
│ │ ├── colmajor_ix.c
│ │ ├── colmajor_w.c
│ │ ├── common.c
│ │ ├── decompositions.c
│ │ ├── inverses.c
│ │ ├── linalg.h
│ │ └── rowmajor.c
│ ├── models
│ │ ├── glm.c
│ │ ├── glm.h
│ │ ├── ivregress.c
│ │ ├── logit.c
│ │ ├── models.h
│ │ ├── ols.c
│ │ └── poisson.c
│ ├── utils
│ │ └── read.c
│ └── vce
│ │ ├── cluster.c
│ │ ├── heteroskedastic.c
│ │ ├── homoskedastic.c
│ │ ├── vce.h
│ │ └── vceadj.c
├── spi
└── stats
│ ├── gstats.c
│ ├── gstats.h
│ ├── hdfe.c
│ ├── summarize.c
│ ├── transform.c
│ └── winsor.c
├── stata.toc
└── test
├── bench_v2
├── gcollapse_complex
├── gcollapse_simple
├── gcontract
├── gduplicates_drop
├── gegen
├── gisid
├── gisid_ix
├── glevelsof
├── gquantiles_by
├── gquantiles_pctile
├── gquantiles_xtile
├── gstats_sum
├── gstats_tab
├── gstats_winsor
├── gstats_winsor_by
├── gunique
├── hashsort_gsort
├── hashsort_sort
└── material.json
├── gtools_tests.do
├── test_benchmarks.do
├── test_benchmarks.py
├── test_gcollapse.do
├── test_gcontract.do
├── test_gduplicates.do
├── test_gegen.do
├── test_gisid.do
├── test_glevelsof.do
├── test_gquantiles.do
├── test_gquantiles_by.do
├── test_gregress.do
├── test_greshape.do
├── test_gstats.do
├── test_gtoplevelsof.do
├── test_gunique.do
├── test_hashsort.do
└── test_pthreads.do
/.appveyor.yml:
--------------------------------------------------------------------------------
1 | version: "generic-1.11.8-{build}"
2 |
3 | environment:
4 | matrix:
5 | - ARCH: x86_64
6 | CYGWIN: C:\Cygwin64
7 | CHOST: x86_64-w64-mingw32
8 | CC: x86_64-w64-mingw32-gcc
9 |
10 | build_script:
11 | - git submodule update --init --recursive
12 | - "%CYGWIN%\\bin\\bash -lc 'set -eux; cd /cygdrive/c/projects/%APPVEYOR_PROJECT_NAME%; cd lib/spookyhash; git checkout spookyhash-1.0.6; cd -'"
13 | - "%CYGWIN%\\bin\\bash -lc 'set -eux; cd /cygdrive/c/projects/%APPVEYOR_PROJECT_NAME%; make clean SPI=2.0 SPIVER=v2; make all SPI=2.0 SPIVER=v2'"
14 | - "%CYGWIN%\\bin\\bash -lc 'set -eux; cd /cygdrive/c/projects/%APPVEYOR_PROJECT_NAME%; make clean SPI=3.0 SPIVER=v3; make all SPI=3.0 SPIVER=v3'"
15 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 |
5 | ---
6 |
7 | **Describe the bug**
8 | - A clear and concise description of what the bug is.
9 | - If possible, try `gtools, upgrade` before submitting the bug; your issue might have already been solved.
10 |
11 | **Code Sample**
12 | Include a code snippet to reproduce the problem; be sure to run the relevant `gtools` program(s) with options `verbose bench(3)`.
13 |
14 | ```stata
15 | // code snippet
16 | ```
17 |
18 | ```stata
19 | // output (or attach a .log file)
20 | ```
21 |
22 | **Version info**
23 | - OS: [e.g. Windows 10]
24 | - Version: [i.e. output of `gtools`]
25 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 |
5 | ---
6 |
7 | **What would you like gtools to add or change (and why)?**
8 | If this relates to a new feature, specify what behavior you would like to see. If this relates to an existing part of Gtools, explain how you would like that to change.
9 |
10 | **Please include a specific suggestion**
11 | Ex. Add option `foo` to function `gcollapse` to do "bar".
12 | Ex. Function `gquantiles` should do A by default instead of B.
13 |
14 | **Additional context**
15 | Add any other context related to your feature request.
16 |
--------------------------------------------------------------------------------
/.github/workflows/linux.yml:
--------------------------------------------------------------------------------
1 | # Test compile plugin
2 |
3 | name: linux
4 | on:
5 | push:
6 | branches:
7 | - master
8 | - develop
9 | pull_request:
10 | branches: [ master ]
11 | jobs:
12 | build:
13 | name: Compile Plugin
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Build Plugin
18 | run: |
19 | # brew install xx
20 | git submodule update --init
21 | cd lib/spookyhash && git checkout spookyhash-1.0.6 && cd -
22 | make clean SPI=2.0 SPIVER=v2 && make all SPI=2.0 SPIVER=v2 GCC=gcc
23 | make clean SPI=3.0 SPIVER=v3 && make all SPI=3.0 SPIVER=v3 GCC=gcc
24 |
--------------------------------------------------------------------------------
/.github/workflows/osx.yml:
--------------------------------------------------------------------------------
1 | # Compile and pull osx plugin
2 |
3 | name: osx
4 | on:
5 | push:
6 | branches:
7 | - master
8 | - develop
9 | pull_request:
10 | branches: [ master ]
11 | jobs:
12 | build:
13 | name: Compile and Push Plugin
14 | runs-on: macos-latest
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Build Plugin
18 | run: |
19 | # brew install xx
20 | git submodule update --init
21 | cd lib/spookyhash && git checkout spookyhash-1.0.6 && cd -
22 | make clean SPI=2.0 SPIVER=v2 && make osx SPI=2.0 SPIVER=v2 GCC=gcc
23 | make clean SPI=3.0 SPIVER=v3 && make osx SPI=3.0 SPIVER=v3 GCC=gcc
24 | - name: Commit OSX Plugin
25 | run: |
26 | shasum build/gtools_macosx_v3.plugin
27 | shasum build/gtools_macosx_v2.plugin
28 | #
29 | otool -L build/gtools_macosx_v3.plugin
30 | otool -L build/gtools_macosx_v2.plugin
31 | #
32 | cp -f build/gtools_macosx_v3.plugin lib/plugin/gtools_macosx_v3.plugin
33 | cp -f build/gtools_macosx_v2.plugin lib/plugin/gtools_macosx_v2.plugin
34 | #
35 | git config --global user.name 'Mauricio Caceres'
36 | git config --global user.email 'mauricio.caceres.bravo@gmail.com'
37 | git remote set-url origin https://x-access-token:${{ secrets.COMPILE_TOKEN }}@github.com/${{ github.repository }}
38 | #
39 | git add build/gtools_macosx_v3.plugin
40 | git add build/gtools_macosx_v2.plugin
41 | git add lib/plugin/gtools_macosx_v3.plugin
42 | git add lib/plugin/gtools_macosx_v2.plugin
43 | #
44 | echo ${GITHUB_REF##*/}
45 | [ -n "$(git status --porcelain)" ] && git commit -m "[Automated Commit] OSX plugin"
46 | git fetch
47 | git push -f origin HEAD:${GITHUB_REF##*/}
48 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | releases
2 | testing
3 | site/
4 | docs/site/
5 | build/gtools_macosx_v2.plugin.arm64
6 | build/gtools_macosx_v2.plugin.x86_64
7 | build/gtools_macosx_v3.plugin.arm64
8 | build/gtools_macosx_v3.plugin.x86_64
9 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/spookyhash"]
2 | path = lib/spookyhash
3 | url = https://github.com/centaurean/spookyhash
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017, 2018 Mauricio Caceres Bravo
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to
7 | deal in the Software without restriction, including without limitation the
8 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
9 | sell copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/build/gglm.ado:
--------------------------------------------------------------------------------
1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Estimate GLM via IRLS by group and with HDFE
3 |
4 | cap program drop gglm
5 | program gglm, rclass
6 | version 13.1
7 |
8 | local 00: copy local 0
9 | if ( strpos(`"`0'"', ",") > 0 ) {
10 | local comma
11 | }
12 | else {
13 | local comma ,
14 | }
15 | gregress `0' `comma' glm
16 | if ( ${GREG_RC} ) {
17 | global GREG_RC
18 | exit 0
19 | }
20 | local 0: copy local 00
21 |
22 | return local levels `"`r(levels)'"'
23 | return scalar N = r(N)
24 | return scalar J = r(J)
25 | return scalar minJ = r(minJ)
26 | return scalar maxJ = r(maxJ)
27 | end
28 |
29 |
--------------------------------------------------------------------------------
/build/gisid.ado:
--------------------------------------------------------------------------------
1 | *! version 1.1.1 23Jan2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! -isid- implementation using C for faster processing
3 |
4 | capture program drop gisid
5 | program gisid
6 | version 13.1
7 |
8 | global GTOOLS_CALLER gisid
9 | syntax varlist /// Variables to check
10 | [if] [in] , /// [if condition] [in start / end]
11 | [ ///
12 | Missok /// Missing values in varlist are OK
13 | compress /// Try to compress strL variables
14 | forcestrl /// Force reading strL variables (stata 14 and above only)
15 | Verbose /// Print info during function execution
16 | _keepgreshape /// (Undocumented) Keep greshape scalars
17 | _CTOLerance(passthru) /// (Undocumented) Counting sort tolerance; default is radix
18 | BENCHmark /// Benchmark function
19 | BENCHmarklevel(int 0) /// Benchmark various steps of the plugin
20 | HASHmethod(passthru) /// Hashing method: 0 (default), 1 (biject), 2 (spooky)
21 | oncollision(passthru) /// error|fallback: On collision, use native command or throw error
22 | debug(passthru) /// Print debugging info to console
23 | ///
24 | /// Unsupported isid options
25 | /// ------------------------
26 | Sort ///
27 | ]
28 |
29 | if ( `benchmarklevel' > 0 ) local benchmark benchmark
30 | local benchmarklevel benchmarklevel(`benchmarklevel')
31 |
32 | if ( "`sort'" != "" ) {
33 | di as err "Option -sort- is not implemented"
34 | exit 198
35 | }
36 |
37 | if ( "`missok'" == "" ) {
38 | local miss exitmissing
39 | }
40 | else {
41 | local miss missing
42 | }
43 |
44 | local opts `miss' `compress' `forcestrl' `_ctolerance' `_keepgreshape'
45 | local opts `opts' `verbose' `benchmark' `benchmarklevel'
46 | local opts `opts' `oncollision' `hashmethod' `debug'
47 | cap noi _gtools_internal `varlist' `if' `in', unsorted `opts' gfunction(isid)
48 | local rc = _rc
49 | global GTOOLS_CALLER ""
50 |
51 | if ( `rc' == 17999 ) {
52 | isid `varlist' `if' `in', `missok'
53 | exit 0
54 | }
55 | else if ( `rc' == 17001 ) {
56 | di as txt "(no observations)"
57 | exit 0
58 | }
59 | else if ( `rc' ) exit `rc'
60 | end
61 |
--------------------------------------------------------------------------------
/build/givregress.ado:
--------------------------------------------------------------------------------
1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Estimate IV regression via 2SLS by group and with HDFE
3 |
4 | cap program drop givregress
5 | program givregress, rclass
6 | version 13.1
7 |
8 | local 00: copy local 0
9 | if ( strpos(`"`0'"', ",") > 0 ) {
10 | local comma
11 | }
12 | else {
13 | local comma ,
14 | }
15 | gregress `0' `comma' ivregress
16 | if ( ${GREG_RC} ) {
17 | global GREG_RC
18 | exit 0
19 | }
20 | local 0: copy local 00
21 |
22 | return local levels `"`r(levels)'"'
23 | return scalar N = r(N)
24 | return scalar J = r(J)
25 | return scalar minJ = r(minJ)
26 | return scalar maxJ = r(maxJ)
27 | end
28 |
--------------------------------------------------------------------------------
/build/gpoisson.ado:
--------------------------------------------------------------------------------
1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Estimate poisson regression via IRLS by group and with HDFE
3 |
4 | cap program drop gpoisson
5 | program gpoisson, rclass
6 | version 13.1
7 |
8 | local 00: copy local 0
9 | if ( strpos(`"`0'"', ",") > 0 ) {
10 | local comma
11 | }
12 | else {
13 | local comma ,
14 | }
15 | gregress `0' `comma' glm family(poisson)
16 | if ( ${GREG_RC} ) {
17 | global GREG_RC
18 | exit 0
19 | }
20 | local 0: copy local 00
21 |
22 | return local levels `"`r(levels)'"'
23 | return scalar N = r(N)
24 | return scalar J = r(J)
25 | return scalar minJ = r(minJ)
26 | return scalar maxJ = r(maxJ)
27 | end
28 |
--------------------------------------------------------------------------------
/build/greg.ado:
--------------------------------------------------------------------------------
1 | *! version 1.11.8 28Jun2024 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Estimate linear regression via OLS by group and with HDFE
3 |
4 | cap program drop greg
5 | program greg, rclass
6 | version 13.1
7 |
8 | local 00: copy local 0
9 | gregress `0'
10 | if ( ${GREG_RC} ) {
11 | global GREG_RC
12 | exit 0
13 | }
14 | local 0: copy local 00
15 |
16 | return local cmd `"`r(cmd)'"'
17 | return local mata `"`r(mata)'"'
18 | return scalar N = r(N)
19 | return scalar J = r(J)
20 | return scalar minJ = r(minJ)
21 | return scalar maxJ = r(maxJ)
22 | end
23 |
--------------------------------------------------------------------------------
/build/gstats.sthlp:
--------------------------------------------------------------------------------
1 | {smcl}
2 | {* *! version 0.4.0 09Jun2019}{...}
3 | {viewerdialog gstats "dialog gstats"}{...}
4 | {vieweralsosee "[R] gstats" "mansection R gstats"}{...}
5 | {viewerjumpto "Syntax" "gstats##syntax"}{...}
6 | {viewerjumpto "Description" "gstats##description"}{...}
7 | {title:Title}
8 |
9 | {p2colset 5 15 23 2}{...}
10 | {p2col :{cmd:gstats} {hline 2}} Various statistical fucntions and transformations. {p_end}
11 | {p2colreset}{...}
12 |
13 | {pstd}
14 | {it:Important}: Please run {stata gtools, upgrade} to update {cmd:gtools} to
15 | the latest stable version.
16 |
17 | {marker syntax}{...}
18 | {title:Syntax}
19 |
20 | {p 8 17 2}
21 | {cmd:gstats}
22 | {it:subcommand}
23 | {varlist}
24 | {ifin}
25 | [{it:{help gstats##weight:weight}}]
26 | [{cmd:,} {opth by(varlist)} {it:{help gstats##table_options:subcommand_options}}]
27 |
28 | {phang}
29 | {opt gstats} is a wrapper for various statistical functions and
30 | transformations, including:
31 |
32 | {p 8 17 2}
33 | {help gstats hdfee:{bf:hdfe}}
34 | (alias {help gstats hdfe:{bf:residualize}}) is a fast utility for residualizing variables (i.e. HDFE transform; accepts weights). {p_end}
35 |
36 | {p 8 17 2}
37 | {help gstats winsor:{bf:winsor}}
38 | as a fast {opt winsor2} alternative (accepts weights). {p_end}
39 |
40 | {p 8 17 2}
41 | {help gstats summarize:{bf:{ul:sum}marize}} and
42 | {help gstats summarize:{bf:{ul:tab}stat}} are fast,
43 | by-able alternatives to {opt summarize, detail} and {opt tabtsat} (accept weights). {p_end}
44 |
45 | {p 8 17 2}
46 | {help gstats transform:{bf:transform}}
47 | to apply various statistical transformations (accepts weights). {p_end}
48 |
49 | {marker description}{...}
50 | {title:Description}
51 |
52 | {pstd}
53 | {opt gstats} is a wrapper to several statistical fucntions and
54 | transformations. In theory {opt gegen} would be the place to expand
55 | {opt gtools}; however, {opt gegen}'s internally implemented functions
56 | were written with two assumptions: first, the output is unique at the
57 | group level; second, there is always a target variable. {opt gstats}
58 | is written to be more flexible and allow arbitrary functions and
59 | transformations.
60 |
61 | {pstd}
62 | Weights are supported for the following subcommands: {it:winsor}, {it:summarize}, {it:tabstat}, {it:residualize}.
63 |
64 | {marker author}{...}
65 | {title:Author}
66 |
67 | {pstd}Mauricio Caceres{p_end}
68 | {pstd}{browse "mailto:mauricio.caceres.bravo@gmail.com":mauricio.caceres.bravo@gmail.com }{p_end}
69 | {pstd}{browse "https://mcaceresb.github.io":mcaceresb.github.io}{p_end}
70 |
71 | {title:Website}
72 |
73 | {pstd}{cmd:gstats} is maintained as part of the {manhelp gtools R:gtools} project at {browse "https://github.com/mcaceresb/stata-gtools":github.com/mcaceresb/stata-gtools}{p_end}
74 |
75 | {marker acknowledgment}{...}
76 | {title:Acknowledgment}
77 |
78 | {pstd}
79 | {opt gtools} was largely inspired by Sergio Correia's {it:ftools}:
80 | {browse "https://github.com/sergiocorreia/ftools"}.
81 | {p_end}
82 |
83 | {pstd}
84 | The OSX version of gtools was implemented with invaluable help from @fbelotti;
85 | see {browse "https://github.com/mcaceresb/stata-gtools/issues/11"}.
86 | {p_end}
87 |
88 | {title:Also see}
89 |
90 | {p 4 13 2}
91 | help for
92 | {help gtools}
93 |
--------------------------------------------------------------------------------
/build/gtools.pkg:
--------------------------------------------------------------------------------
1 | v 1.11.8
2 | d
3 | d 'GTOOLS': Faster implementation of common Stata commands optimized for large datasets
4 | d
5 | d Faster Stata for big data. Gtools provides a hash-based implementation
6 | d of common Stata commands using C plugins for a massive speed
7 | d improvement. Gtools implements gcollapse, greshape, gquantiles
8 | d (pctile, xtile, and _pctile), gcontract, gegen, gisid, glevelsof,
9 | d gunique, gdistinct, gduplicates, gtop, and gstats (winsor, summarize,
10 | d tabstat). Syntax is largely analogous to their native counterparts
11 | d (see help gtools for details). This package was inspired by Sergio
12 | d Correia's ftools.
13 | d
14 | d KW: plugin
15 | d KW: gtools
16 | d KW: collapse
17 | d KW: reshape
18 | d KW: regress
19 | d KW: ivregress
20 | d KW: glm
21 | d KW: logit
22 | d KW: poisson
23 | d KW: residualize
24 | d KW: hdfe
25 | d KW: contract
26 | d KW: egen
27 | d KW: xtile
28 | d KW: fastxtile
29 | d KW: pctile
30 | d KW: _pctile
31 | d KW: levelsof
32 | d KW: toplevelsof
33 | d KW: isid
34 | d KW: duplicates
35 | d KW: unique
36 | d KW: distinct
37 | d KW: sort
38 | d KW: gsort
39 | d KW: gegen
40 | d KW: fasterxtile
41 | d KW: gquantiles
42 | d KW: gcollapse
43 | d KW: greshape
44 | d KW: gregress
45 | d KW: givregress
46 | d KW: gglm
47 | d KW: gpoisson
48 | d KW: glogit
49 | d KW: gcontract
50 | d KW: gisid
51 | d KW: gduplicates
52 | d KW: glevelsof
53 | d KW: gtoplevelsof
54 | d KW: gunique
55 | d KW: gdistinct
56 | d KW: hashsort
57 | d KW: winsor
58 | d KW: summarize
59 | d KW: tabstat
60 | d
61 | d Requires: Stata version 13.1
62 | d
63 | d Author: Mauricio Caceres Bravo
64 | d Support: email mauricio.caceres.bravo@gmail.com
65 | d
66 | d Distribution-Date: 20240628
67 | d
68 | f _gtools_internal.ado
69 | f _gtools_internal.mata
70 | f gcollapse.ado
71 | f gcontract.ado
72 | f gegen.ado
73 | f gquantiles.ado
74 | f fasterxtile.ado
75 | f gunique.ado
76 | f gdistinct.ado
77 | f glevelsof.ado
78 | f gtop.ado
79 | f gtoplevelsof.ado
80 | f gisid.ado
81 | f gduplicates.ado
82 | f hashsort.ado
83 | f greshape.ado
84 | f greg.ado
85 | f gregress.ado
86 | f givregress.ado
87 | f gglm.ado
88 | f gpoisson.ado
89 | f gstats.ado
90 | f gtools.ado
91 | f gcollapse.sthlp
92 | f gcontract.sthlp
93 | f gegen.sthlp
94 | f gquantiles.sthlp
95 | f fasterxtile.sthlp
96 | f gunique.sthlp
97 | f gdistinct.sthlp
98 | f glevelsof.sthlp
99 | f gtop.sthlp
100 | f gtoplevelsof.sthlp
101 | f gisid.sthlp
102 | f gduplicates.sthlp
103 | f hashsort.sthlp
104 | f greshape.sthlp
105 | f greg.sthlp
106 | f gregress.sthlp
107 | f givregress.sthlp
108 | f gglm.sthlp
109 | f gstats.sthlp
110 | f gstats_residualize.sthlp
111 | f gstats_hdfe.sthlp
112 | f gstats_winsor.sthlp
113 | f gstats_tab.sthlp
114 | f gstats_sum.sthlp
115 | f gstats_summarize.sthlp
116 | f gstats_transform.sthlp
117 | f gstats_range.sthlp
118 | f gstats_moving.sthlp
119 | f gtools.sthlp
120 | f lgtools.mlib
121 | f gtools_windows_v2.plugin
122 | f gtools_unix_v2.plugin
123 | f gtools_macosx_v2.plugin
124 | f gtools_windows_v3.plugin
125 | f gtools_unix_v3.plugin
126 | f gtools_macosx_v3.plugin
127 |
--------------------------------------------------------------------------------
/build/gtools_macosx_v2.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_macosx_v2.plugin
--------------------------------------------------------------------------------
/build/gtools_macosx_v3.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_macosx_v3.plugin
--------------------------------------------------------------------------------
/build/gtools_unix_v2.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_unix_v2.plugin
--------------------------------------------------------------------------------
/build/gtools_unix_v3.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_unix_v3.plugin
--------------------------------------------------------------------------------
/build/gtools_windows_v2.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_windows_v2.plugin
--------------------------------------------------------------------------------
/build/gtools_windows_v3.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/gtools_windows_v3.plugin
--------------------------------------------------------------------------------
/build/gtop.ado:
--------------------------------------------------------------------------------
1 | *! version 1.2.0 23Mar2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Calculate the top groups by count of a varlist (jointly).
3 |
4 | cap program drop gtop
5 | program gtop, rclass
6 | version 13.1
7 |
8 | local 00 `0'
9 | gtoplevelsof `0'
10 | if ( ${GTOP_RC} ) {
11 | global GTOP_RC
12 | exit 0
13 | }
14 | local 0 `00'
15 |
16 | qui syntax [anything] [if] [in] [aw fw pw], [LOCal(str) MATrix(str) *]
17 | if ( "`local'" != "" ) c_local `local' `"`r(levels)'"'
18 | if ( "`matrix'" != "" ) matrix `matrix' = r(toplevels)
19 | return local levels `"`r(levels)'"'
20 | return scalar N = r(N)
21 | return scalar J = r(J)
22 | return scalar minJ = r(minJ)
23 | return scalar maxJ = r(maxJ)
24 | return scalar alpha = r(alpha)
25 | return scalar ntop = r(ntop)
26 | return scalar nrows = r(nrows)
27 |
28 | if ( `"`r(matalevels)'"' == "" ) {
29 | tempname gmat
30 | matrix `gmat' = r(toplevels)
31 | return matrix toplevels = `gmat'
32 | }
33 | else {
34 | return local matalevels = `"`r(matalevels)'"'
35 | }
36 | end
37 |
--------------------------------------------------------------------------------
/build/hashsort.ado:
--------------------------------------------------------------------------------
1 | *! version 1.0.1 23Jan2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Hash-based implementation of -sort- and -gsort- using C-plugins
3 |
4 | capture program drop hashsort
5 | program define hashsort
6 | version 13.1
7 |
8 | global GTOOLS_CALLER hashsort
9 | syntax anything, /// Variables to sort by: [+|-]varname [[+|-]varname ...]
10 | [ ///
11 | GENerate(passthru) /// Generate variable with sort order
12 | replace /// Replace generated variable, if it exists
13 | sortgen /// Sort by generated variable, if applicable
14 | skipcheck /// Turn off internal is sorted check
15 | ///
16 | compress /// Try to compress strL variables
17 | forcestrl /// Force reading strL variables (stata 14 and above only)
18 | Verbose /// Print info during function execution
19 | _CTOLerance(passthru) /// (Undocumented) Counting sort tolerance; default is radix
20 | BENCHmark /// Benchmark function
21 | BENCHmarklevel(int 0) /// Benchmark various steps of the plugin
22 | HASHmethod(passthru) /// Hashing method: 0 (default), 1 (biject), 2 (spooky)
23 | oncollision(passthru) /// error|fallback: On collision, use native command or throw error
24 | debug(passthru) /// Print debugging info to console
25 | ///
26 | tag(passthru) ///
27 | counts(passthru) ///
28 | fill(passthru) ///
29 | invertinmata ///
30 | ///
31 | /// Unsupported sort options
32 | /// ------------------------
33 | ///
34 | stable /// Hashsort is always stable
35 | mlast ///
36 | Mfirst ///
37 | ]
38 |
39 | if ( `benchmarklevel' > 0 ) local benchmark benchmark
40 | local benchmarklevel benchmarklevel(`benchmarklevel')
41 |
42 | if ( "`stable'" != "" ) {
43 | di as txt "hashsort is always -stable-"
44 | }
45 |
46 | * mfirst is set by default, unlike gsort
47 | if ( ("`mfirst'" != "") & ("`mlast'" != "") ) {
48 | di as err "Cannot request both {opt mfirst} and {opt mlast}"
49 | }
50 |
51 | * mfirst is set by default, unlike gsort
52 | if ( ("`mfirst'" == "") & ("`mlast'" == "") & (strpos("`anything'", "-") > 0) ) {
53 | di as txt "(note: missing values will be sorted first)"
54 | }
55 |
56 | * mfirst is set by default
57 | if ( ("`mfirst'" == "") & ("`mlast'" == "") ) {
58 | local mfirst mfirst
59 | }
60 |
61 | if ( "`generate'" != "" ) local skipcheck skipcheck
62 |
63 | local opts `compress' `forcestrl' nods
64 | local opts `opts' `verbose' `benchmark' `benchmarklevel' `_ctolerance'
65 | local opts `opts' `oncollision' `hashmethod' `debug'
66 | local eopts `invertinmata' `sortgen' `skipcheck'
67 | local gopts `generate' `tag' `counts' `fill' `replace' `mlast'
68 | cap noi _gtools_internal `anything', missing `opts' `gopts' `eopts' gfunction(sort)
69 | global GTOOLS_CALLER ""
70 | local rc = _rc
71 |
72 | if ( `rc' == 17999 ) {
73 | if regexm("`anything'", "[\+\-]") {
74 | gsort `anything', `generate' `mfirst'
75 | exit 0
76 | }
77 | else {
78 | sort `anything'
79 | exit 0
80 | }
81 | }
82 | else if ( `rc' == 17001 ) {
83 | exit 0
84 | }
85 | else if ( `rc' ) exit `rc'
86 | end
87 |
--------------------------------------------------------------------------------
/build/lgtools.mlib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/build/lgtools.mlib
--------------------------------------------------------------------------------
/build/stata.toc:
--------------------------------------------------------------------------------
1 | v 1.11.8
2 | d Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
3 | p 'GTOOLS': Faster implementation of common Stata commands for big data
4 |
--------------------------------------------------------------------------------
/docs/benchmarks/quick.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/docs/benchmarks/quick.png
--------------------------------------------------------------------------------
/docs/benchmarks/quickdark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/docs/benchmarks/quickdark.png
--------------------------------------------------------------------------------
/docs/css/extra-material-dark.css:
--------------------------------------------------------------------------------
1 | @media only screen and (max-width: 76.1875em) {
2 | .md-nav {
3 | background-color: #000; } }
4 |
5 | .md-header {
6 | background-color: #000; }
7 |
8 | .md-container {
9 | background-color: #333; }
10 |
11 | .md-typeset .footnote {
12 | color: #ccc; }
13 |
14 | body {
15 | color: #ccc; }
16 |
17 | .md-typeset pre {
18 | background-color: #000; }
19 |
20 | .md-typeset code {
21 | color: #ccc;
22 | box-shadow: 0.29412em 0 0 rgba(0, 0, 0, 0.5), -0.29412em 0 0 rgba(0, 0, 0, 0.5);
23 | background-color: rgba(0, 0, 0, 0.5); }
24 |
25 | .md-nav__link[data-md-state=blur] {
26 | color: rgba(230, 230, 230, 0.54); }
27 |
28 | .md-typeset h1 {
29 | color: #ccc; }
30 |
31 | a, .md-typeset a, md-nav__link, md-nav__link:hover {
32 | color: #00968c; }
33 |
34 | a footnote-ref::before {
35 | border-color: #00968c; }
36 |
37 | a footnote-ref::before {
38 | color: #00968c; }
39 |
40 | .md-nav__item--active > .md-nav__link, .md-nav__link:active {
41 | color: #00968c; }
42 |
43 | .md-nav--secondary {
44 | border-left: 0.4rem solid #00968c; }
45 |
46 | .md-nav__link:focus, .md-nav__link:hover {
47 | color: #00968c; }
48 |
49 | .md-typeset hr {
50 | border-bottom: .1rem dotted #666; }
51 |
52 | .md-footer-copyright {
53 | color: #666; }
54 |
55 | .md-typeset blockquote {
56 | padding-left: 1.2rem;
57 | border-left: 0.4rem solid rgba(230, 230, 230, 0.26);
58 | color: rgba(230, 230, 230, 0.54); }
59 |
60 | @media only screen and (max-width: 76.1875em) {
61 | html .md-nav--primary .md-nav__title--site {
62 | background-color: #000; }
63 | html .md-nav--primary .md-nav__title ~ .md-nav__list {
64 | background: linear-gradient(180deg, #000 10%, transparent), linear-gradient(180deg, rgba(0, 0, 0, 0.26), rgba(0, 0, 0, 0.07) 35%, transparent 60%);
65 | background-color: #333; } }
66 |
67 | pre { color: white !important; }
68 |
69 | .md-clipboard:before {
70 | color: rgb(255, 255, 255);
71 | }
72 |
73 | .codehilite:hover .md-clipboard:before,.md-typeset .highlight:hover .md-clipboard:before,pre:hover .md-clipboard:before {
74 | color: rgba(255, 255, 255, 0.54) !important
75 | }
76 |
--------------------------------------------------------------------------------
/docs/css/extra-material-dark.scss:
--------------------------------------------------------------------------------
1 | // $link_color: #10cfff;
2 | $link_color: #00968c;
3 | $background_color1: #333;
4 |
5 | @media only screen and (max-width: 76.1875em) {
6 | .md-nav {
7 | background-color: #000;
8 | }
9 | }
10 | .md-header {
11 | background-color: #000;
12 | }
13 | .md-container {
14 | background-color: $background_color1;
15 | }
16 | .md-typeset .footnote {
17 | color: #ccc;
18 | }
19 | body {
20 | color: #ccc;
21 | }
22 | .md-typeset pre {
23 | background-color: #000;
24 | }
25 | .md-typeset code {
26 | color: #ccc;
27 | box-shadow: 0.29412em 0 0 hsla(0,0%,0%,.5), -0.29412em 0 0 hsla(0,0%,0%,.5);
28 | background-color: hsla(0,0%,0%,.5);
29 | }
30 | .md-nav__link[data-md-state=blur] {
31 | color: rgba(230,230,230,.54);
32 | }
33 | .md-typeset h1 {
34 | color: #ccc;
35 | }
36 | a, .md-typeset a, md-nav__link, md-nav__link:hover {
37 | color: $link_color;
38 | }
39 | a footnote-ref::before {
40 | border-color: $link_color;
41 | }
42 | a footnote-ref::before {
43 | color: $link_color;
44 | }
45 | .md-nav__item--active>.md-nav__link, .md-nav__link:active {
46 | color: $link_color;
47 | }
48 | .md-nav--secondary {
49 | border-left: .4rem solid $link_color;
50 | }
51 | .md-nav__link:focus, .md-nav__link:hover {
52 | color: $link_color;
53 | }
54 | .md-typeset hr {
55 | border-bottom: .1rem dotted #666;
56 | }
57 | .md-footer-copyright {
58 | color: #666;
59 | }
60 | .md-typeset blockquote {
61 | padding-left: 1.2rem;
62 | border-left: .4rem solid rgba(230,230,230,.26);
63 | color: rgba(230,230,230,.54);
64 | }
65 | @media only screen and (max-width: 76.1875em) {
66 | html .md-nav--primary .md-nav__title--site {
67 | background-color: #000;
68 | }
69 | html .md-nav--primary .md-nav__title~.md-nav__list {
70 | background: linear-gradient(180deg,#000 10%,hsla(0,0%,0%,0)),linear-gradient(180deg,rgba(0,0,0,.26),rgba(0,0,0,.07) 35%,transparent 60%);
71 | background-color: $background_color1;
72 | }
73 | }
74 |
75 | pre { color: white !important; }
76 |
77 | .md-clipboard:before {
78 | color: rgb(255, 255, 255);
79 | }
80 |
81 | .codehilite:hover .md-clipboard:before,.md-typeset .highlight:hover .md-clipboard:before,pre:hover .md-clipboard:before {
82 | color: rgba(255, 255, 255, 0.54) !important
83 | }
84 |
--------------------------------------------------------------------------------
/docs/css/extra-material.css:
--------------------------------------------------------------------------------
1 | .codespan {
2 | font-family: monospace;
3 | font-size: 18px
4 | }
5 |
--------------------------------------------------------------------------------
/docs/css/extra-rtd.css:
--------------------------------------------------------------------------------
1 | code {
2 | font-size: 13px
3 | }
4 |
5 | .codehilite {
6 | font-size: 14px
7 | }
8 |
9 | .codespan {
10 | font-family: monospace;
11 | font-size: 14px
12 | }
13 |
14 | li {
15 | font-size: 15px
16 | }
17 |
18 | table {
19 | font-size: 16px
20 | }
21 |
--------------------------------------------------------------------------------
/docs/examples/gcontract.do:
--------------------------------------------------------------------------------
1 | * The options here are essentially the same as Stata's contract,
2 | * save for the standard gtools options.
3 |
4 | sysuse auto, clear
5 | gen long id = _n * 1000
6 | expand id
7 | gcontract rep78, verbose
8 |
9 | l
10 |
11 |
12 | * You can add frequencies, percentages, and so on:
13 | sysuse auto, clear
14 | gen long id = _n * 1000
15 | expand id
16 | gcontract rep78, freq(f) cfreq(cf) percent(p) cpercent(cp) bench
17 |
18 | l
19 |
20 |
21 | * Last, with multiple variables you can "fill in" missing groups. This option
22 | * has not been implemented internally and as such is very slow:
23 |
24 | sysuse auto, clear
25 | gen long id = _n * 1000
26 | expand id
27 | gcontract foreign rep78, ///
28 | freq(f) cfreq(cf) percent(p) cpercent(cp) bench(3) zero
29 |
30 | l
31 |
32 | * You will note a few levels have 0 frequency, which means they did
33 | * not appear in the full data.
34 |
--------------------------------------------------------------------------------
/docs/examples/gdistinct.do:
--------------------------------------------------------------------------------
1 | * gdistinct can function as a drop-in replacement for distinct.
2 |
3 | sysuse auto, clear
4 | gdistinct
5 | matrix list r(distinct)
6 |
7 | gdistinct, sort(-distinct)
8 |
9 | gdistinct, max(10)
10 |
11 | gdistinct make-headroom
12 |
13 | gdistinct make-headroom, missing abbrev(6)
14 |
15 | gdistinct foreign rep78, joint
16 |
17 | gdistinct foreign rep78, joint missing
18 |
--------------------------------------------------------------------------------
/docs/examples/gduplicates.do:
--------------------------------------------------------------------------------
1 | * Setup
2 | sysuse auto
3 | keep make price mpg rep78 foreign
4 | expand 2 in 1/2
5 |
6 | * Report duplicates
7 | gduplicates report
8 |
9 | * List one example for each group of duplicated observations
10 | sort mpg
11 | gduplicates examples
12 | gduplicates examples, sorted
13 |
14 | * List all duplicated observations
15 | gduplicates list
16 |
17 | * Create variable dup containing the number of duplicates (0 if
18 | * observation is unique)
19 | gduplicates tag, generate(dup)
20 |
21 | * List the duplicated observations
22 | list if dup == 1
23 |
24 | * Drop all but the first occurrence of each group of duplicated
25 | * observations
26 | gduplicates drop
27 |
28 | * List all duplicated observations
29 | gduplicates list
30 |
31 |
--------------------------------------------------------------------------------
/docs/examples/gegen.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 | gegen id = group(foreign)
3 | gegen tag = group(foreign)
4 | gegen sum = sum(mpg), by(foreign)
5 | gegen sum2 = sum(mpg rep78), by(foreign)
6 | gegen p5 = pctile(mpg rep78), p(5) by(foreign)
7 | gegen nuniq = nunique(mpg), by(foreign)
8 |
9 | * The function can be any of the supported functions above.
10 | * It can also be any function supported by egen:
11 |
12 | webuse egenxmpl4, clear
13 | gegen hsum = rowtotal(a b c)
14 |
15 | sysuse auto, clear
16 | gegen seq = seq(), by(foreign)
17 |
--------------------------------------------------------------------------------
/docs/examples/gglm.do:
--------------------------------------------------------------------------------
1 | * NOTE: gglm is in beta. To enable enable beta features, define
2 | *
3 | * global GTOOLS_BETA = 1
4 |
5 | * Showcase
6 | * --------
7 |
8 | webuse lbw, clear
9 | gglm low age lwt smoke ptl ht ui, absorb(race) family(binomial)
10 | mata GtoolsLogit.print()
11 |
12 | gen w = _n
13 | gglm low age lwt smoke ptl ht ui [fw = w], absorb(race) family(binomial)
14 | mata GtoolsLogit.print()
15 |
16 | webuse ships, clear
17 | expand 2
18 | gen by = 1.5 - (_n < _N / 2)
19 | gen w = _n
20 | gen _co_75_79 = co_75_79
21 | qui tab ship, gen(_s)
22 |
23 | gglm accident op_75_79 co_65_69 co_70_74 co_75_79 [fw = w], robust family(poisson)
24 | mata GtoolsPoisson.print()
25 |
26 | gglm accident op_75_79 co_65_69 co_70_74 co_75_79 _co_75_79 [pw = w], cluster(ship) family(poisson)
27 | mata GtoolsPoisson.print()
28 |
29 | gglm accident op_75_79 co_65_69 co_70_74 co_75_79 _s*, absorb(ship) cluster(ship) family(poisson)
30 | mata GtoolsPoisson.print()
31 |
32 | gglm accident op_75_79 co_65_69 co_70_74 co_75_79, by(by) absorb(ship) robust family(poisson)
33 | mata GtoolsPoisson.print()
34 |
35 | * Basic Benchmark
36 | * ---------------
37 |
38 | clear
39 | local N 1000000
40 | local G 10000
41 | set obs `N'
42 | gen g1 = int(runiform() * `G')
43 | gen g2 = int(runiform() * `G')
44 | gen g3 = int(runiform() * `G')
45 | gen g4 = int(runiform() * `G')
46 | gen x3 = runiform()
47 | gen x4 = runiform()
48 | gen x1 = x3 + runiform()
49 | gen x2 = x4 + runiform()
50 | gen l = int(0.25 * x1 - 0.75 * x2 + g1 + g2 + g3 + g4 + 20 * rnormal())
51 |
52 | timer clear
53 | timer on 1
54 | gglm l x1 x2, absorb(g1 g2 g3) mata(greg) family(poisson)
55 | timer off 1
56 | mata greg.print()
57 | timer on 2
58 | ppmlhdfe l x1 x2, absorb(g1 g2 g3)
59 | timer off 2
60 |
61 | timer on 3
62 | gglm l x1 x2, absorb(g1 g2 g3) cluster(g4) mata(greg) family(poisson)
63 | timer off 3
64 | mata greg.print()
65 | timer on 4
66 | ppmlhdfe l x1 x2, absorb(g1 g2 g3) vce(cluster g4)
67 | timer off 4
68 |
69 | timer list
70 |
71 | * 1: 3.22 / 1 = 3.2160
72 | * 2: 29.64 / 1 = 29.6380
73 | * 3: 3.31 / 1 = 3.3140
74 | * 4: 31.32 / 1 = 31.3190
75 |
--------------------------------------------------------------------------------
/docs/examples/gisid.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 |
3 | gisid mpg // not an id
4 | gisid make
5 |
6 | replace make = "" in 1
7 | gisid make // should never be missing
8 | gisid make, missok
9 |
10 | * gisid can also take a range, that is
11 | gisid mpg in 1
12 | gisid mpg if _n == 1
13 |
--------------------------------------------------------------------------------
/docs/examples/givregress.do:
--------------------------------------------------------------------------------
1 | * NOTE: givregress is in beta. To enable enable beta features, define
2 | *
3 | * global GTOOLS_BETA = 1
4 |
5 | * Showcase
6 | * --------
7 |
8 | sysuse auto, clear
9 | gen _mpg = mpg
10 | qui tab headroom, gen(_h)
11 |
12 | givregress price (mpg = gear_ratio) weight turn
13 | givregress price (mpg = gear_ratio) _mpg, cluster(headroom)
14 | mata GtoolsIV.print()
15 |
16 | givregress price (mpg weight = gear_ratio turn displacement) _h*, absorb(rep78 headroom)
17 | mata GtoolsIV.print()
18 |
19 | givregress price (mpg = gear_ratio) weight [fw = rep78], absorb(headroom)
20 | mata GtoolsIV.print()
21 |
22 | givregress price (mpg = gear_ratio turn displacement) weight [aw = rep78], by(foreign)
23 | mata GtoolsIV.print()
24 |
25 | givregress price (mpg = gear_ratio turn) weight, by(foreign) mata(coefsOnly, nose) prefix(b(_b_) se(_se_))
26 | givregress price (mpg weight = gear_ratio turn), mata(seOnly, nob) prefix(hdfe(_hdfe_))
27 | givregress price (mpg weight = gear_ratio turn) displacement, mata(nothing, nob nose)
28 |
29 | mata coefsOnly.print()
30 | mata seOnly.print()
31 | mata nothing.print()
32 |
33 | * Basic Benchmark
34 | * ---------------
35 |
36 | clear
37 | local N 1000000
38 | local G 10000
39 | set obs `N'
40 | gen g1 = int(runiform() * `G')
41 | gen g2 = int(runiform() * `G')
42 | gen g3 = int(runiform() * `G')
43 | gen g4 = int(runiform() * `G')
44 | gen x3 = runiform()
45 | gen x4 = runiform()
46 | gen x1 = x3 + runiform()
47 | gen x2 = x4 + runiform()
48 | gen y = 0.25 * x1 - 0.75 * x2 + g1 + g2 + g3 + g4 + 20 * rnormal()
49 |
50 | timer clear
51 | timer on 1
52 | givregress y (x1 x2 = x3 x4), absorb(g1 g2 g3) mata(greg)
53 | timer off 1
54 | mata greg.print()
55 | timer on 2
56 | ivreghdfe y (x1 x2 = x3 x4), absorb(g1 g2 g3)
57 | timer off 2
58 |
59 | timer on 3
60 | givregress y (x1 x2 = x3 x4), absorb(g1 g2 g3) cluster(g4) mata(greg)
61 | timer off 3
62 | mata greg.print()
63 | timer on 4
64 | ivreghdfe y (x1 x2 = x3 x4), absorb(g1 g2 g3) cluster(g4)
65 | timer off 4
66 |
67 | timer list
68 |
69 | * 1: 0.89 / 1 = 0.8920
70 | * 2: 17.62 / 1 = 17.6240
71 | * 3: 1.07 / 1 = 1.0670
72 | * 4: 23.17 / 1 = 23.1670
73 |
--------------------------------------------------------------------------------
/docs/examples/glevelsof.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 | glevelsof rep78
3 | qui glevelsof rep78, miss local(mylevs)
4 | display "`mylevs'"
5 | glevelsof rep78, sep(,)
6 |
7 |
8 | ************************************
9 | * De-duplicating a variable list *
10 | ************************************
11 |
12 | * `glevelsof` can store the unique levels of a varlist. This is
13 | * specially useful when the user wants to obtain the unique levels but
14 | * runs up against the stata macro variable limit.
15 |
16 | set seed 42
17 | clear
18 | set obs 100000
19 | gen x = "a long string appeared" + string(mod(_n, 10000))
20 | gen y = int(10 * runiform())
21 | glevelsof x
22 | glevelsof x, gen(uniq_) nolocal
23 | gisid uniq_* in 1 / `r(J)'
24 |
25 | * If the user prefers to work with mata, simply pass the option
26 | * `matasave[(name)]`. With mixed-types, numbers and strings are
27 | * stored in separate matrices as well as a single printed matrix,
28 | * but the latter can be suppressed to save memory.
29 |
30 | glevelsof x y, mata(xy) nolocal
31 | glevelsof x, mata(x) nolocal silent
32 |
33 | mata xy.desc()
34 | mata x.desc()
35 |
36 | * The user can also replace the source variables if need be. This is
37 | * faster and saves memory, but it dispenses with the original variables.
38 |
39 | glevelsof x y, gen(, replace) nolocal
40 | l in `r(J)'
41 | l in `=_N'
42 |
43 |
44 | *******************
45 | * Number format *
46 | *******************
47 |
48 | * `levelsof` by default shows many significant digits for numerical variables.
49 |
50 | sysuse auto, clear
51 | replace headroom = headroom + 0.1
52 | levelsof headroom
53 | glevelsof headroom
54 |
55 | * This is cumbersome. You can specify a number format to compress this:
56 | glevelsof headroom, numfmt(%.3g)
57 |
58 |
59 | ************************
60 | * Multiple variables *
61 | ************************
62 |
63 | * `glevelsof` can parse multiple variables:
64 | local varlist foreign rep78
65 | glevelsof `varlist', sep("|") colsep(", ")
66 |
67 | * If you know a bit of mata, you can parse this string!
68 | mata:
69 | string scalar function unquote_str(string scalar quoted_str)
70 | {
71 | if ( substr(quoted_str, 1, 1) == `"""' ) {
72 | quoted_str = substr(quoted_str, 2, strlen(quoted_str) - 2)
73 | }
74 | else if (substr(quoted_str, 1, 2) == "`" + `"""') {
75 | quoted_str = substr(quoted_str, 3, strlen(quoted_str) - 4)
76 | }
77 | return (quoted_str);
78 | }
79 |
80 | t = tokeninit(`"`r(sep)'"', (""), (`""""', `"`""'"'), 1)
81 | tokenset(t, `"`r(levels)'"')
82 |
83 | rows = tokengetall(t)
84 | for (i = 1; i <= cols(rows); i++) {
85 | rows[i] = unquote_str(rows[i]);
86 | }
87 |
88 | levels = J(cols(rows), `:list sizeof varlist', "")
89 |
90 | t = tokeninit(`"`r(colsep)'"', (""), (`""""', `"`""'"'), 1)
91 | for (i = 1; i <= cols(rows); i++) {
92 | tokenset(t, rows[i])
93 | levels[i, .] = tokengetall(t)
94 | for (k = 1; k <= `:list sizeof varlist'; k++) {
95 | levels[i, k] = unquote_str(levels[i, k])
96 | }
97 | }
98 | end
99 |
100 | mata: levels
101 |
102 | * While this looks cumbersome, this mechanism is used internally by
103 | * `gtoplevelsof` to display its results.
104 |
--------------------------------------------------------------------------------
/docs/examples/gregress.do:
--------------------------------------------------------------------------------
1 | * NOTE: gregress is in beta. To enable enable beta features, define
2 | *
3 | * global GTOOLS_BETA = 1
4 | * global GTOOLS_GREGTABLE = 1
5 |
6 | * Showcase
7 | * --------
8 |
9 | sysuse auto, clear
10 | gen _mpg = mpg
11 | qui tab headroom, gen(_h)
12 |
13 | greg price mpg
14 | greg price mpg, by(foreign) robust
15 | mata GtoolsRegress.print()
16 |
17 | greg price mpg _h* [fw = rep78]
18 | mata GtoolsRegress.print()
19 |
20 | greg price mpg _h* [fw = rep78], absorb(headroom)
21 | mata GtoolsRegress.print()
22 |
23 | greg price mpg _mpg, cluster(headroom)
24 | greg price mpg _mpg [aw = rep78], by(foreign) absorb(rep78 headroom) cluster(headroom)
25 | mata GtoolsRegress.print()
26 |
27 | greg price mpg, mata(coefsOnly, nose)
28 | greg price mpg, mata(seOnly, nob)
29 | greg price mpg, mata(nothing, nob nose)
30 |
31 | mata coefsOnly.print()
32 | mata seOnly.print()
33 | mata nothing.print()
34 |
35 | greg price mpg, prefix(b(_b_)) replace
36 | greg price mpg, prefix(se(_se_)) replace
37 | greg price mpg _mpg, absorb(rep78 headroom) prefix(b(_b_) se(_se_) hdfe(_hdfe_)) replace
38 | drop _*
39 |
40 | greg price mpg, gen(b(_b_mpg _b_cons))
41 | greg price mpg, gen(se(_se_mpg _se_cons))
42 | greg price mpg, absorb(rep78 headroom) gen(hdfe(_hdfe_price _hdfe_mpg))
43 |
44 | * Basic Benchmark
45 | * ---------------
46 |
47 | clear
48 | local N 1000000
49 | local G 10000
50 | set obs `N'
51 | gen g1 = int(runiform() * `G')
52 | gen g2 = int(runiform() * `G')
53 | gen g3 = int(runiform() * `G')
54 | gen g4 = int(runiform() * `G')
55 | gen x3 = runiform()
56 | gen x4 = runiform()
57 | gen x1 = x3 + runiform()
58 | gen x2 = x4 + runiform()
59 | gen y = 0.25 * x1 - 0.75 * x2 + g1 + g2 + g3 + g4 + 20 * rnormal()
60 |
61 | timer clear
62 | timer on 1
63 | greg y x1 x2, absorb(g1 g2 g3) mata(greg)
64 | timer off 1
65 | mata greg.print()
66 | timer on 2
67 | reghdfe y x1 x2, absorb(g1 g2 g3)
68 | timer off 2
69 |
70 | timer on 3
71 | greg y x1 x2, absorb(g1 g2 g3) cluster(g4) mata(greg)
72 | timer off 3
73 | mata greg.print()
74 | timer on 4
75 | reghdfe y x1 x2, absorb(g1 g2 g3) vce(cluster g4)
76 | timer off 4
77 |
78 | timer on 5
79 | greg y x1 x2, by(g4) prefix(b(_b_))
80 | timer off 5
81 | drop _*
82 | timer on 6
83 | asreg y x1 x2, by(g4)
84 | timer off 6
85 | drop _*
86 |
87 | timer list
88 |
89 | * 1: 0.64 / 1 = 0.6380
90 | * 2: 11.77 / 1 = 11.7730
91 | * 3: 0.91 / 1 = 0.9140
92 | * 4: 15.74 / 1 = 15.7370
93 | * 5: 0.46 / 1 = 0.4570
94 | * 6: 2.09 / 1 = 2.0890
95 |
--------------------------------------------------------------------------------
/docs/examples/gstats_hdfe.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 | gstats hdfe demean_price = price, absorb(foreign)
3 | gstats hdfe hdfe_price = price, absorb(foreign rep78)
4 | assert mi(hdfe_price) if mi(rep78)
5 | gstats hdfe hdfe_price = price, absorb(foreign rep78) replace absorbmissing
6 | assert !mi(hdfe_price)
7 |
8 | gstats hdfe price mpg [aw = rep78], by(foreign) absorb(rep78 headroom) gen(v1 v2) mata
9 | mata GtoolsByLevels.desc()
10 | mata GtoolsByLevels.nj
11 | mata GtoolsByLevels.njabsorb
12 |
13 | gstats hdfe price mpg, absorb(foreign rep78) prefix(res_)
14 | gstats hdfe price mpg, absorb(foreign rep78) replace
15 | assert price == res_price if !mi(rep78)
16 | assert mpg == res_mpg if !mi(rep78)
17 |
18 | gstats hdfe price mpg, absorb(foreign make) replace
19 | assert abs(price) < 1e-8 if !mi(rep78)
20 | assert abs(price) < 1e-8 if !mi(rep78)
21 |
22 | * Basic Benchmark
23 | * ---------------
24 |
25 | clear
26 | local N 10000000
27 | set obs `N'
28 | gen g1 = int(runiform() * 10000)
29 | gen g2 = int(runiform() * 100)
30 | gen g3 = int(runiform() * 10)
31 | gen x = rnormal()
32 |
33 | timer clear
34 | timer on 1
35 | gstats hdfe x1 = x, absorb(g1 g2 g3) algorithm(squarem) bench(2)
36 | disp r(feval)
37 | timer off 1
38 |
39 | timer on 2
40 | gstats hdfe x2 = x, absorb(g1 g2 g3) algorithm(cg) bench(2)
41 | disp r(feval)
42 | timer off 2
43 |
44 | timer on 3
45 | gstats hdfe x3 = x, absorb(g1 g2 g3) algorithm(map) bench(2)
46 | disp r(feval)
47 | timer off 3
48 |
49 | timer on 4
50 | gstats hdfe x4 = x, absorb(g1 g2 g3) algorithm(it) bench(2)
51 | disp r(feval)
52 | timer off 4
53 |
54 | timer on 5
55 | * equivalent to cg
56 | qui reghdfe x, absorb(g1 g2 g3) resid(x5) acceleration(cg)
57 | timer off 5
58 |
59 | timer on 6
60 | * equivalent to map
61 | qui reghdfe x, absorb(g1 g2 g3) resid(x6) acceleration(none)
62 | timer off 6
63 |
64 | assert reldif(x1, x2) < 1e-6
65 | assert reldif(x1, x3) < 1e-6
66 | assert reldif(x1, x4) < 1e-6
67 | assert reldif(x1, x5) < 1e-6
68 | assert reldif(x1, x6) < 1e-6
69 |
70 | timer list
71 |
72 | * 1: 2.73 / 1 = 2.7260
73 | * 2: 2.94 / 1 = 2.9430
74 | * 3: 2.46 / 1 = 2.4620
75 | * 4: 2.90 / 1 = 2.8980
76 | * 5: 41.24 / 1 = 41.2390
77 | * 6: 44.05 / 1 = 44.0450
78 |
--------------------------------------------------------------------------------
/docs/examples/gstats_summarize.do:
--------------------------------------------------------------------------------
1 | *************
2 | * Tabstat *
3 | *************
4 |
5 | * Basic usage
6 | sysuse auto, clear
7 | gstats tab price
8 | gstats tab price, s(mean sd min max) by(foreign)
9 | gstats tab price, by(foreign rep78)
10 |
11 | * Custom printing
12 | gstats tab price mpg, s(p5 q p95 select7 select-3 gini) pretty
13 | gstats tab price mpg, s(p5 q p95 select7 select-3 gini) col(var)
14 | gstats tab price mpg, s(p5 q p95 select7 select-3 gini) col(stat)
15 |
16 | * Mata API
17 | gen strvar = "string" + string(rep78)
18 | gstats tab price mpg, by(foreign strvar) matasave
19 |
20 | mata
21 | GstatsOutput.getf(1, 1, .)
22 | GstatsOutput.getnum(., 1)
23 | GstatsOutput.getchar((2, 5, 6), .)
24 |
25 | GstatsOutput.getOutputRow(1)
26 | GstatsOutput.getOutputCol(1)
27 | GstatsOutput.getOutputVar("price")
28 | GstatsOutput.getOutputVar("mpg")
29 | GstatsOutput.getOutputGroup(1)
30 | end
31 |
32 | mata: st_matrix("output", GstatsOutput.output)
33 | matrix list output
34 |
35 | * The mata API allows the user to computing several runs of summary
36 | * statistics and keeping them in memory:
37 |
38 | gstats tab price mpg, by(foreign) noprint matasave(StatsByForeign)
39 | gstats tab price mpg, by(rep78) noprint matasave(StatsByRep)
40 |
41 | mata StatsByRep.desc()
42 | mata StatsByForeign.desc()
43 | mata StatsByForeign.printOutput()
44 |
45 | * It is also specially useful for a large number of groups
46 |
47 | clear
48 | set obs 100000
49 | gen g = mod(_n, 10000)
50 | gen x = runiform()
51 | gstats tab x, by(g) noprint matasave
52 | mata GstatsOutput.J
53 | mata GstatsOutput.getOutputGroup(13)
54 |
55 | ***************
56 | * Summarize *
57 | ***************
58 |
59 | * Basic usage
60 | sysuse auto, clear
61 | gstats sum price
62 | gstats sum price [pw = gear_ratio / 5]
63 | gstats sum price mpg, f
64 |
65 | * In the style of tabstat
66 | gstats sum price mpg, tab nod
67 | gstats sum price mpg, tab meanonly
68 | gstats sum price mpg, by(foreign) tab
69 | gstats sum price mpg, by(foreign) nod
70 | gstats sum price mpg, by(foreign) meanonly
71 |
72 | * Pool inputs
73 | gstats sum price *, nod
74 | gstats sum price *, nod pool
75 |
--------------------------------------------------------------------------------
/docs/examples/gstats_winsor.do:
--------------------------------------------------------------------------------
1 | * _Note_: These examples are taken verbatim from `help winsor2`.
2 |
3 | * winsor at (p1 p99), get new variable "wage_w"
4 | sysuse nlsw88, clear
5 | gstats winsor wage
6 |
7 | * winsor 3 variables at 0.5th and 99.5th percentiles, and overwrite the
8 | * old variables
9 |
10 | gstats winsor wage age hours, cuts(0.5 99.5) replace
11 |
12 | * winsor 3 variables at (p1 p99), gen new variables with suffix _win,
13 | * and add variable labels
14 |
15 | gstats winsor wage age hours, suffix(_win) label
16 |
17 | * left-winsorizing only, at 1th percentile
18 |
19 | cap noi gstats winsor wage, cuts(1 100)
20 | gstats winsor wage, cuts(1 100) s(_w2)
21 |
22 | * right-trimming only, at 99th percentile
23 |
24 | gstats winsor wage, cuts(0 99) trim
25 |
26 | * winsor variables at (p1 p99) by (industry), overwrite the old
27 | * variables
28 |
29 | gstats winsor wage hours, replace by(industry)
30 |
--------------------------------------------------------------------------------
/docs/examples/gtoplevelsof.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 |
3 | gtoplevelsof rep78
4 |
5 | gtop rep78 [fw = weight]
6 |
7 | gtop rep78 [w = gear_ratio]
8 |
9 | gtop rep78, missrow
10 |
11 | gtop rep78, colsep(", ")
12 |
13 | gtop rep78, pctfmt(%7.3f)
14 |
15 | gtop mpg, numfmt(%7.3f)
16 |
17 | gtop foreign
18 |
19 | gtop foreign, colmax(3)
20 |
21 | gtop foreign, novaluelab
22 |
23 | gtop foreign rep78, ntop(4) missrow colstrmax(2)
24 |
25 | gtop foreign rep78, ntop(4) missrow groupmiss
26 |
27 | gtop foreign rep78, ntop(4) missrow groupmiss noother
28 |
29 | gtop foreign rep78, cols(<<) missrow("I am missing") matrix(lvl)
30 | matrix list lvl
31 |
32 | gtop foreign rep78, mata(lvl) ntop(3)
33 | mata lvl.desc()
34 | mata lvl.printed
35 | mata lvl.toplevels
36 |
--------------------------------------------------------------------------------
/docs/examples/gunique.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 |
3 | gunique *
4 |
5 | gunique *, miss
6 |
7 | gunique make-headroom
8 |
9 | gunique rep78, d
10 |
11 | gunique rep78, by(foreign)
12 |
--------------------------------------------------------------------------------
/docs/examples/hashsort.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 | hashsort price
3 | hashsort +price
4 | hashsort rep78 -price
5 | hashsort make
6 | hashsort foreign -make
7 |
8 | * One thing that is useful is that hashsort can encode a set of variables and
9 | * set the encoded variable as the sorting variable:
10 |
11 | sysuse auto, clear
12 |
13 | hashsort foreign -rep78, gen(id) sortgen
14 |
15 | disp "`: sortedby'"
16 |
17 | tab id
18 |
--------------------------------------------------------------------------------
/docs/helpers.js:
--------------------------------------------------------------------------------
1 | MathJax.Hub.Config({
2 | tex2jax: {
3 | inlineMath: [ ['$','$'], ["\\(","\\)"] ],
4 | processEscapes: true
5 | }
6 | });
7 |
--------------------------------------------------------------------------------
/docs/stata/gstats.sthlp:
--------------------------------------------------------------------------------
1 | {smcl}
2 | {* *! version 0.4.0 09Jun2019}{...}
3 | {viewerdialog gstats "dialog gstats"}{...}
4 | {vieweralsosee "[R] gstats" "mansection R gstats"}{...}
5 | {viewerjumpto "Syntax" "gstats##syntax"}{...}
6 | {viewerjumpto "Description" "gstats##description"}{...}
7 | {title:Title}
8 |
9 | {p2colset 5 15 23 2}{...}
10 | {p2col :{cmd:gstats} {hline 2}} Various statistical fucntions and transformations. {p_end}
11 | {p2colreset}{...}
12 |
13 | {pstd}
14 | {it:Important}: Please run {stata gtools, upgrade} to update {cmd:gtools} to
15 | the latest stable version.
16 |
17 | {marker syntax}{...}
18 | {title:Syntax}
19 |
20 | {p 8 17 2}
21 | {cmd:gstats}
22 | {it:subcommand}
23 | {varlist}
24 | {ifin}
25 | [{it:{help gstats##weight:weight}}]
26 | [{cmd:,} {opth by(varlist)} {it:{help gstats##table_options:subcommand_options}}]
27 |
28 | {phang}
29 | {opt gstats} is a wrapper for various statistical functions and
30 | transformations, including:
31 |
32 | {p 8 17 2}
33 | {help gstats hdfee:{bf:hdfe}}
34 | (alias {help gstats hdfe:{bf:residualize}}) is a fast utility for residualizing variables (i.e. HDFE transform; accepts weights). {p_end}
35 |
36 | {p 8 17 2}
37 | {help gstats winsor:{bf:winsor}}
38 | as a fast {opt winsor2} alternative (accepts weights). {p_end}
39 |
40 | {p 8 17 2}
41 | {help gstats summarize:{bf:{ul:sum}marize}} and
42 | {help gstats summarize:{bf:{ul:tab}stat}} are fast,
43 | by-able alternatives to {opt summarize, detail} and {opt tabtsat} (accept weights). {p_end}
44 |
45 | {p 8 17 2}
46 | {help gstats transform:{bf:transform}}
47 | to apply various statistical transformations (accepts weights). {p_end}
48 |
49 | {marker description}{...}
50 | {title:Description}
51 |
52 | {pstd}
53 | {opt gstats} is a wrapper to several statistical fucntions and
54 | transformations. In theory {opt gegen} would be the place to expand
55 | {opt gtools}; however, {opt gegen}'s internally implemented functions
56 | were written with two assumptions: first, the output is unique at the
57 | group level; second, there is always a target variable. {opt gstats}
58 | is written to be more flexible and allow arbitrary functions and
59 | transformations.
60 |
61 | {pstd}
62 | Weights are supported for the following subcommands: {it:winsor}, {it:summarize}, {it:tabstat}, {it:residualize}.
63 |
64 | {marker author}{...}
65 | {title:Author}
66 |
67 | {pstd}Mauricio Caceres{p_end}
68 | {pstd}{browse "mailto:mauricio.caceres.bravo@gmail.com":mauricio.caceres.bravo@gmail.com }{p_end}
69 | {pstd}{browse "https://mcaceresb.github.io":mcaceresb.github.io}{p_end}
70 |
71 | {title:Website}
72 |
73 | {pstd}{cmd:gstats} is maintained as part of the {manhelp gtools R:gtools} project at {browse "https://github.com/mcaceresb/stata-gtools":github.com/mcaceresb/stata-gtools}{p_end}
74 |
75 | {marker acknowledgment}{...}
76 | {title:Acknowledgment}
77 |
78 | {pstd}
79 | {opt gtools} was largely inspired by Sergio Correia's {it:ftools}:
80 | {browse "https://github.com/sergiocorreia/ftools"}.
81 | {p_end}
82 |
83 | {pstd}
84 | The OSX version of gtools was implemented with invaluable help from @fbelotti;
85 | see {browse "https://github.com/mcaceresb/stata-gtools/issues/11"}.
86 | {p_end}
87 |
88 | {title:Also see}
89 |
90 | {p 4 13 2}
91 | help for
92 | {help gtools}
93 |
--------------------------------------------------------------------------------
/docs/usage/gisid.md:
--------------------------------------------------------------------------------
1 | gisid
2 | =====
3 |
4 | Efficiently check for unique identifiers using C plugins. This is a fast
5 | option to Stata's isid. It checks whether a set of variables uniquely
6 | identifies observations in a dataset. It can additionally take `if` and
7 | `in` but it cannot check an external data set or sort the data.
8 |
9 | !!! tip "Important"
10 | Run `gtools, upgrade` to update `gtools` to the latest stable version.
11 |
12 | Syntax
13 | ------
14 |
15 |
gisid varlist [if] [in] [, missok ]
16 |
17 | Options
18 | -------
19 |
20 | missok indicates that missing values are permitted in varlist.
21 |
22 | ### Gtools options
23 |
24 | (Note: These are common to every gtools command.)
25 |
26 | - `compress` Try to compress strL to str#. The Stata Plugin Interface has
27 | only limited support for strL variables. In Stata 13 and
28 | earlier (version 2.0) there is no support, and in Stata 14
29 | and later (version 3.0) there is read-only support. The user
30 | can try to compress strL variables using this option.
31 |
32 | - `forcestrl` Skip binary variable check and force gtools to read strL variables
33 | (14 and above only). __Gtools gives incorrect results when there is
34 | binary data in strL variables__. This option was included because on
35 | some windows systems Stata detects binary data even when there is none.
36 | Only use this option if you are sure you do not have binary data in your
37 | strL variables.
38 |
39 | - `verbose` prints some useful debugging info to the console.
40 |
41 | - `benchmark` or `bench(level)` prints how long in seconds various parts of the
42 | program take to execute. Level 1 is the same as `benchmark`. Levels
43 | 2 and 3 additionally prints benchmarks for internal plugin steps.
44 |
45 | - `hashmethod(str)` Hash method to use. `default` automagically chooses the
46 | algorithm. `biject` tries to biject the inputs into the
47 | natural numbers. `spooky` hashes the data and then uses the
48 | hash.
49 |
50 | - `oncollision(str)` How to handle collisions. A collision should never happen
51 | but just in case it does `gtools` will try to use native
52 | commands. The user can specify it throw an error instead by
53 | passing `oncollision(error)`.
54 |
55 | Examples
56 | --------
57 |
58 | You can download the raw code for the examples below
59 | [here
](https://raw.githubusercontent.com/mcaceresb/stata-gtools/master/docs/examples/gisid.do)
60 |
61 | ```stata
62 | . sysuse auto, clear
63 | (1978 Automobile Data)
64 |
65 | . gisid mpg
66 | variable mpg does not uniquely identify the observations
67 | r(459);
68 |
69 | . gisid make
70 |
71 | . replace make = "" in 1
72 | (1 real change made)
73 |
74 | . gisid make
75 | variable make should never be missing
76 | r(459);
77 |
78 | . gisid make, missok
79 | ```
80 |
81 | gisid can also take a range, that is
82 | ```
83 | . gisid mpg in 1
84 | . gisid mpg if _n == 1
85 | ```
86 |
--------------------------------------------------------------------------------
/docs/usage/gtools.md:
--------------------------------------------------------------------------------
1 | gtools
2 | ======
3 |
4 | The gtools command is merely a wrapper for some high-level operations to
5 | do with package maintenance. See any of the commands below for details
6 | on how to use the programs provided by this package commands, or the
7 | [introduction](index) for an overview of the package and available
8 | commands.
9 |
10 | Succintly, gtools is a Stata package that provides a fast implementation
11 | of common group commands like collapse, egen, isid, levelsof, contract,
12 | distinct, and so on using C plugins for a massive speed improvement.
13 |
14 | This program helps the user manage their gtools installation.
15 |
16 | !!! tip "Important"
17 | Run `gtools, upgrade` to update `gtools` to the latest stable version.
18 |
19 | Syntax
20 | ------
21 |
22 | ```stata
23 | gtools [, options]
24 | ```
25 |
26 | See `gtools, examples` for examples of how to use available gtools functions.
27 |
28 | Options
29 | -------
30 |
31 | - `upgrade` (alias `install_latest`) Upgrades gtools to the latest github version (default is master).
32 |
33 | - `licenses` Prints the open source projects used in `gtools`
34 |
35 | - `verbose` With `licenses`, prints the licenses of the open source projects used in `gtools`
36 |
37 | - `examples` (alias `showcase`) Print examples of how to use available gtools functions.
38 |
39 | - `test[(str)]` Run unit tests, optionally specifying which tests to run. Tests
40 | available are: `dependencies`, `basic_checks`, `bench_test`,
41 | `comparisons`, `switches`, `bench_full`. A good set of "small" tests
42 | which take 10-20 minutes are `dependencies basic_checks bench_test`. By
43 | default, however, the first 5 tests are run, which take 1-3h. The bulk
44 | of that time is from `comparisons`, which compares the results from
45 | gtools to that of various native counterparts under several different
46 | conditions. `bench_full` is not run by default because this benchmarks
47 | gtools against stata using modestly-sized data (millions). Some stata
48 | commands are very slow under some of the benchmarks, meaning this can
49 | take well over a day.
50 |
51 | - `branch(str)` Github branch to use (default is master).
52 |
--------------------------------------------------------------------------------
/lib/id_rsa_travis.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/id_rsa_travis.enc
--------------------------------------------------------------------------------
/lib/material.json:
--------------------------------------------------------------------------------
1 | ../docs/benchmarks/material.json
--------------------------------------------------------------------------------
/lib/plugin/gtools_macosx_v2.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_macosx_v2.plugin
--------------------------------------------------------------------------------
/lib/plugin/gtools_macosx_v3.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_macosx_v3.plugin
--------------------------------------------------------------------------------
/lib/plugin/gtools_unix_v2.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_unix_v2.plugin
--------------------------------------------------------------------------------
/lib/plugin/gtools_unix_v3.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_unix_v3.plugin
--------------------------------------------------------------------------------
/lib/plugin/gtools_windows_v2.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_windows_v2.plugin
--------------------------------------------------------------------------------
/lib/plugin/gtools_windows_v3.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/gtools_windows_v3.plugin
--------------------------------------------------------------------------------
/lib/plugin/lgtools.mlib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/lib/plugin/lgtools.mlib
--------------------------------------------------------------------------------
/lib/spi-2.0/stplugin.c:
--------------------------------------------------------------------------------
1 | /*
2 | stplugin.c, version 2.0
3 | copyright (c) 2003, 2006 StataCorp
4 | */
5 |
6 | #include "stplugin.h"
7 |
8 | ST_plugin *_stata_ ;
9 |
10 | STDLL pginit(ST_plugin *p)
11 | {
12 | _stata_ = p ;
13 | return(SD_PLUGINVER) ;
14 | }
15 |
--------------------------------------------------------------------------------
/lib/spi-3.0/stplugin.c:
--------------------------------------------------------------------------------
1 | /*
2 | stplugin.c, version 3.0
3 | copyright (c) 2003, 2006, 2015 StataCorp LP
4 | */
5 |
6 | #include "stplugin.h"
7 |
8 | ST_plugin *_stata_ ;
9 |
10 | STDLL pginit(ST_plugin *p)
11 | {
12 | _stata_ = p ;
13 | return(SD_PLUGINVER) ;
14 | }
15 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | # Project information
2 | site_name: 'Gtools'
3 | site_description: 'Faster Stata for Big Data'
4 |
5 | pages:
6 | - Home: index.md
7 | - FAQs: faqs.md
8 | - Benchmarks: benchmarks.md
9 | - Compiling: compiling.md
10 | - Usage:
11 | - gtools: usage/gtools.md
12 | - Data manipulation:
13 | - gcollapse: usage/gcollapse.md
14 | - greshape: usage/greshape.md
15 | - gcontract: usage/gcontract.md
16 | - gisid: usage/gisid.md
17 | - glevelsof: usage/glevelsof.md
18 | - gtop: usage/gtoplevelsof.md
19 | - hashsort: usage/hashsort.md
20 | - Statistics:
21 | - gegen: usage/gegen.md
22 | - gquantiles: usage/gquantiles.md
23 | - gstats residualize: usage/gstats_hdfe.md
24 | - gstats winsor: usage/gstats_winsor.md
25 | - gstats sum/tab: usage/gstats_summarize.md
26 | - gstats transform: usage/gstats_transform.md
27 | - gdistinct: usage/gdistinct.md
28 | - gunique: usage/gunique.md
29 | - gduplicates: usage/gduplicates.md
30 | - Regression models:
31 | - gregress: usage/gregress.md
32 | - givregress: usage/givregress.md
33 | - gglm: usage/gglm.md
34 |
35 | theme: readthedocs
36 |
37 | # name: 'material'
38 | # primary: 'Teal'
39 | # accent: 'Teal'
40 |
41 | extra_javascript:
42 | - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML
43 | - helpers.js
44 |
45 | markdown_extensions:
46 | - meta
47 | - toc
48 | - tables
49 | - fenced_code
50 | - admonition
51 | - codehilite:
52 | guess_lang: false
53 |
54 | # use_pygments: True
55 | # noclasses: True
56 | # pygments_style: monokai
57 |
58 | use_directory_urls: false
59 |
60 | extra_css:
61 | - css/extra-rtd.css
62 |
63 | # extra_css:
64 | # - css/extra-material.css
65 | # - css/extra-material-dark.css
66 |
67 | # Repository
68 | # repo_name: 'mcaceresb/stata-gtools'
69 | # repo_url: 'https://github.com/mcaceresb/stata-gtools'
70 | # edit_uri: 'blob/master/docs/'
71 | # site_url: 'https://gtools.readthedocs.io/en/latest/'
72 |
--------------------------------------------------------------------------------
/src/ado/gglm.ado:
--------------------------------------------------------------------------------
1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Estimate GLM via IRLS by group and with HDFE
3 |
4 | cap program drop gglm
5 | program gglm, rclass
6 | version 13.1
7 |
8 | local 00: copy local 0
9 | if ( strpos(`"`0'"', ",") > 0 ) {
10 | local comma
11 | }
12 | else {
13 | local comma ,
14 | }
15 | gregress `0' `comma' glm
16 | if ( ${GREG_RC} ) {
17 | global GREG_RC
18 | exit 0
19 | }
20 | local 0: copy local 00
21 |
22 | return local levels `"`r(levels)'"'
23 | return scalar N = r(N)
24 | return scalar J = r(J)
25 | return scalar minJ = r(minJ)
26 | return scalar maxJ = r(maxJ)
27 | end
28 |
29 |
--------------------------------------------------------------------------------
/src/ado/gisid.ado:
--------------------------------------------------------------------------------
1 | *! version 1.1.1 23Jan2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! -isid- implementation using C for faster processing
3 |
4 | capture program drop gisid
5 | program gisid
6 | version 13.1
7 |
8 | global GTOOLS_CALLER gisid
9 | syntax varlist /// Variables to check
10 | [if] [in] , /// [if condition] [in start / end]
11 | [ ///
12 | Missok /// Missing values in varlist are OK
13 | compress /// Try to compress strL variables
14 | forcestrl /// Force reading strL variables (stata 14 and above only)
15 | Verbose /// Print info during function execution
16 | _keepgreshape /// (Undocumented) Keep greshape scalars
17 | _CTOLerance(passthru) /// (Undocumented) Counting sort tolerance; default is radix
18 | BENCHmark /// Benchmark function
19 | BENCHmarklevel(int 0) /// Benchmark various steps of the plugin
20 | HASHmethod(passthru) /// Hashing method: 0 (default), 1 (biject), 2 (spooky)
21 | oncollision(passthru) /// error|fallback: On collision, use native command or throw error
22 | debug(passthru) /// Print debugging info to console
23 | ///
24 | /// Unsupported isid options
25 | /// ------------------------
26 | Sort ///
27 | ]
28 |
29 | if ( `benchmarklevel' > 0 ) local benchmark benchmark
30 | local benchmarklevel benchmarklevel(`benchmarklevel')
31 |
32 | if ( "`sort'" != "" ) {
33 | di as err "Option -sort- is not implemented"
34 | exit 198
35 | }
36 |
37 | if ( "`missok'" == "" ) {
38 | local miss exitmissing
39 | }
40 | else {
41 | local miss missing
42 | }
43 |
44 | local opts `miss' `compress' `forcestrl' `_ctolerance' `_keepgreshape'
45 | local opts `opts' `verbose' `benchmark' `benchmarklevel'
46 | local opts `opts' `oncollision' `hashmethod' `debug'
47 | cap noi _gtools_internal `varlist' `if' `in', unsorted `opts' gfunction(isid)
48 | local rc = _rc
49 | global GTOOLS_CALLER ""
50 |
51 | if ( `rc' == 17999 ) {
52 | isid `varlist' `if' `in', `missok'
53 | exit 0
54 | }
55 | else if ( `rc' == 17001 ) {
56 | di as txt "(no observations)"
57 | exit 0
58 | }
59 | else if ( `rc' ) exit `rc'
60 | end
61 |
--------------------------------------------------------------------------------
/src/ado/givregress.ado:
--------------------------------------------------------------------------------
1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Estimate IV regression via 2SLS by group and with HDFE
3 |
4 | cap program drop givregress
5 | program givregress, rclass
6 | version 13.1
7 |
8 | local 00: copy local 0
9 | if ( strpos(`"`0'"', ",") > 0 ) {
10 | local comma
11 | }
12 | else {
13 | local comma ,
14 | }
15 | gregress `0' `comma' ivregress
16 | if ( ${GREG_RC} ) {
17 | global GREG_RC
18 | exit 0
19 | }
20 | local 0: copy local 00
21 |
22 | return local levels `"`r(levels)'"'
23 | return scalar N = r(N)
24 | return scalar J = r(J)
25 | return scalar minJ = r(minJ)
26 | return scalar maxJ = r(maxJ)
27 | end
28 |
--------------------------------------------------------------------------------
/src/ado/gpoisson.ado:
--------------------------------------------------------------------------------
1 | *! version 1.11.1 03Apr2023 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Estimate poisson regression via IRLS by group and with HDFE
3 |
4 | cap program drop gpoisson
5 | program gpoisson, rclass
6 | version 13.1
7 |
8 | local 00: copy local 0
9 | if ( strpos(`"`0'"', ",") > 0 ) {
10 | local comma
11 | }
12 | else {
13 | local comma ,
14 | }
15 | gregress `0' `comma' glm family(poisson)
16 | if ( ${GREG_RC} ) {
17 | global GREG_RC
18 | exit 0
19 | }
20 | local 0: copy local 00
21 |
22 | return local levels `"`r(levels)'"'
23 | return scalar N = r(N)
24 | return scalar J = r(J)
25 | return scalar minJ = r(minJ)
26 | return scalar maxJ = r(maxJ)
27 | end
28 |
--------------------------------------------------------------------------------
/src/ado/greg.ado:
--------------------------------------------------------------------------------
1 | *! version 1.11.8 28Jun2024 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Estimate linear regression via OLS by group and with HDFE
3 |
4 | cap program drop greg
5 | program greg, rclass
6 | version 13.1
7 |
8 | local 00: copy local 0
9 | gregress `0'
10 | if ( ${GREG_RC} ) {
11 | global GREG_RC
12 | exit 0
13 | }
14 | local 0: copy local 00
15 |
16 | return local cmd `"`r(cmd)'"'
17 | return local mata `"`r(mata)'"'
18 | return scalar N = r(N)
19 | return scalar J = r(J)
20 | return scalar minJ = r(minJ)
21 | return scalar maxJ = r(maxJ)
22 | end
23 |
--------------------------------------------------------------------------------
/src/ado/gtop.ado:
--------------------------------------------------------------------------------
1 | *! version 1.2.0 23Mar2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Calculate the top groups by count of a varlist (jointly).
3 |
4 | cap program drop gtop
5 | program gtop, rclass
6 | version 13.1
7 |
8 | local 00 `0'
9 | gtoplevelsof `0'
10 | if ( ${GTOP_RC} ) {
11 | global GTOP_RC
12 | exit 0
13 | }
14 | local 0 `00'
15 |
16 | qui syntax [anything] [if] [in] [aw fw pw], [LOCal(str) MATrix(str) *]
17 | if ( "`local'" != "" ) c_local `local' `"`r(levels)'"'
18 | if ( "`matrix'" != "" ) matrix `matrix' = r(toplevels)
19 | return local levels `"`r(levels)'"'
20 | return scalar N = r(N)
21 | return scalar J = r(J)
22 | return scalar minJ = r(minJ)
23 | return scalar maxJ = r(maxJ)
24 | return scalar alpha = r(alpha)
25 | return scalar ntop = r(ntop)
26 | return scalar nrows = r(nrows)
27 |
28 | if ( `"`r(matalevels)'"' == "" ) {
29 | tempname gmat
30 | matrix `gmat' = r(toplevels)
31 | return matrix toplevels = `gmat'
32 | }
33 | else {
34 | return local matalevels = `"`r(matalevels)'"'
35 | }
36 | end
37 |
--------------------------------------------------------------------------------
/src/ado/hashsort.ado:
--------------------------------------------------------------------------------
1 | *! version 1.0.1 23Jan2019 Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
2 | *! Hash-based implementation of -sort- and -gsort- using C-plugins
3 |
4 | capture program drop hashsort
5 | program define hashsort
6 | version 13.1
7 |
8 | global GTOOLS_CALLER hashsort
9 | syntax anything, /// Variables to sort by: [+|-]varname [[+|-]varname ...]
10 | [ ///
11 | GENerate(passthru) /// Generate variable with sort order
12 | replace /// Replace generated variable, if it exists
13 | sortgen /// Sort by generated variable, if applicable
14 | skipcheck /// Turn off internal is sorted check
15 | ///
16 | compress /// Try to compress strL variables
17 | forcestrl /// Force reading strL variables (stata 14 and above only)
18 | Verbose /// Print info during function execution
19 | _CTOLerance(passthru) /// (Undocumented) Counting sort tolerance; default is radix
20 | BENCHmark /// Benchmark function
21 | BENCHmarklevel(int 0) /// Benchmark various steps of the plugin
22 | HASHmethod(passthru) /// Hashing method: 0 (default), 1 (biject), 2 (spooky)
23 | oncollision(passthru) /// error|fallback: On collision, use native command or throw error
24 | debug(passthru) /// Print debugging info to console
25 | ///
26 | tag(passthru) ///
27 | counts(passthru) ///
28 | fill(passthru) ///
29 | invertinmata ///
30 | ///
31 | /// Unsupported sort options
32 | /// ------------------------
33 | ///
34 | stable /// Hashsort is always stable
35 | mlast ///
36 | Mfirst ///
37 | ]
38 |
39 | if ( `benchmarklevel' > 0 ) local benchmark benchmark
40 | local benchmarklevel benchmarklevel(`benchmarklevel')
41 |
42 | if ( "`stable'" != "" ) {
43 | di as txt "hashsort is always -stable-"
44 | }
45 |
46 | * mfirst is set by default, unlike gsort
47 | if ( ("`mfirst'" != "") & ("`mlast'" != "") ) {
48 | di as err "Cannot request both {opt mfirst} and {opt mlast}"
49 | }
50 |
51 | * mfirst is set by default, unlike gsort
52 | if ( ("`mfirst'" == "") & ("`mlast'" == "") & (strpos("`anything'", "-") > 0) ) {
53 | di as txt "(note: missing values will be sorted first)"
54 | }
55 |
56 | * mfirst is set by default
57 | if ( ("`mfirst'" == "") & ("`mlast'" == "") ) {
58 | local mfirst mfirst
59 | }
60 |
61 | if ( "`generate'" != "" ) local skipcheck skipcheck
62 |
63 | local opts `compress' `forcestrl' nods
64 | local opts `opts' `verbose' `benchmark' `benchmarklevel' `_ctolerance'
65 | local opts `opts' `oncollision' `hashmethod' `debug'
66 | local eopts `invertinmata' `sortgen' `skipcheck'
67 | local gopts `generate' `tag' `counts' `fill' `replace' `mlast'
68 | cap noi _gtools_internal `anything', missing `opts' `gopts' `eopts' gfunction(sort)
69 | global GTOOLS_CALLER ""
70 | local rc = _rc
71 |
72 | if ( `rc' == 17999 ) {
73 | if regexm("`anything'", "[\+\-]") {
74 | gsort `anything', `generate' `mfirst'
75 | exit 0
76 | }
77 | else {
78 | sort `anything'
79 | exit 0
80 | }
81 | }
82 | else if ( `rc' == 17001 ) {
83 | exit 0
84 | }
85 | else if ( `rc' ) exit `rc'
86 | end
87 |
--------------------------------------------------------------------------------
/src/github-issues/30b/replicate.do:
--------------------------------------------------------------------------------
1 | Stata's `altdef` formula in `pctile` gives the wrong result for certain certain numbers in IC and SE (this will also affect `xtile` one the bug with `altdef` there is fixed).
2 |
3 | clear
4 | set obs 89750
5 | gen double x = 7.2439548890446011
6 |
7 | pctile fp = x, nq(500) altdef
8 | pctile double dp = x, nq(500) altdef
9 | _pctile x, nq(500) altdef
10 |
11 | assert (x[1] == fp) | mi(fp)
12 | assert (x[1] == dp) | mi(dp)
13 |
14 | The above assertions should be true, or at least the second one, but both fail. (Note that in Stata/MP, the second assertion goes through; at least that was the case for me in testing). We can see that
15 |
16 | . levelsof fp
17 | 7.243954658508301
18 |
19 | . levelsof dp
20 | 7.2439548890446 7.243954889044601 7.243954889044602
21 |
22 | This happens because `altdef` takes an average. The formula is:
23 |
24 | scalar perc = 100 * 148 / 500
25 | scalar ith = (_N + 1) * perc / 100
26 | scalar i = floor(ith)
27 | scalar h = ith - i
28 | scalar q = (1 - h) * x[i] + h * x[i + 1]
29 |
30 | assert x[i] == x[i - 1]
31 | assert q == dp[148]
32 | assert q == x[i]
33 |
34 | The first two assertions succeeded but the third fails. Stata's `pctile` fails to recognize that `x[i]` is equal to `x[i - 1]`.
35 |
36 | (Note: Naturally my actual use case involved a variable that had different values, but one of them was `7.2439548890446011` and that caused the problem.)
37 |
--------------------------------------------------------------------------------
/src/github-issues/35/Makefile:
--------------------------------------------------------------------------------
1 | # GCC = x86_64-w64-mingw32-gcc-5.4.0.exe
2 | GCC = x86_64-w64-mingw32-gcc.exe
3 | FLAGS = -Wall -shared
4 | SPOOKY = -L./ -l:spookyhash.dll
5 |
6 | all: clean test1 test2
7 |
8 | test1: test1.c stplugin.c
9 | $(GCC) $(FLAGS) -o test1.plugin stplugin.c test1.c
10 |
11 | test2: test2.c stplugin.c
12 | $(GCC) $(FLAGS) -o test2.plugin stplugin.c test2.c $(SPOOKY)
13 |
14 | .PHONY: clean
15 | clean:
16 | rm -f test1.plugin
17 | rm -f test2.plugin
18 |
--------------------------------------------------------------------------------
/src/github-issues/35/spookyhash.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/35/spookyhash.dll
--------------------------------------------------------------------------------
/src/github-issues/35/stplugin.c:
--------------------------------------------------------------------------------
1 | /*
2 | stplugin.c, version 2.0
3 | copyright (c) 2003, 2006 StataCorp
4 | */
5 |
6 | #include "stplugin.h"
7 |
8 | ST_plugin *_stata_ ;
9 |
10 | STDLL pginit(ST_plugin *p)
11 | {
12 | _stata_ = p ;
13 | return(SD_PLUGINVER) ;
14 | }
15 |
--------------------------------------------------------------------------------
/src/github-issues/35/test.do:
--------------------------------------------------------------------------------
1 | program test1, plugin using(test1.plugin)
2 | plugin call test1
3 |
4 | program test2, plugin using(test2.plugin)
5 | plugin call test2
6 |
--------------------------------------------------------------------------------
/src/github-issues/35/test1.c:
--------------------------------------------------------------------------------
1 | #include "stplugin.h"
2 |
3 | int main()
4 | {
5 | return(0);
6 | }
7 |
8 | int WinMain()
9 | {
10 | return(0);
11 | }
12 |
13 | STDLL stata_call(int argc, char *argv[])
14 | {
15 | SF_display("Hello World\n") ;
16 | return(0) ;
17 | }
18 |
19 | /*
20 |
21 | cd /home/mauricio/code/stata-gtools/src/github-issues/35/
22 | !gcc -Wall -shared -fPIC -DSYSTEM=OPUNIX -o test1.plugin stplugin.c test1.c
23 | capture program drop test1
24 | program test1, plugin using(test1.plugin)
25 | plugin call test1
26 |
27 | */
28 |
--------------------------------------------------------------------------------
/src/github-issues/35/test1.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/35/test1.plugin
--------------------------------------------------------------------------------
/src/github-issues/35/test2.c:
--------------------------------------------------------------------------------
1 | #include "stplugin.h"
2 | #include "spookyhash_api.h"
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | int main()
11 | {
12 | return(0);
13 | }
14 |
15 | int WinMain()
16 | {
17 | return(0);
18 | }
19 |
20 | STDLL stata_call(int argc, char *argv[])
21 | {
22 | char * buffer = malloc(1024 * sizeof(char));
23 | char * string = strdup("foo");
24 |
25 | ST_double * number = calloc(1, sizeof(ST_double));
26 | number[1] = 1729.42;
27 |
28 | sprintf (buffer, "%s: %9.2f\n", string, *number);
29 | SF_display (buffer);
30 |
31 | uint64_t h1, h2;
32 | spookyhash_128(number, sizeof(ST_double), &h1, &h2);
33 |
34 | sprintf (buffer, "hash: %"PRIu64", %"PRIu64"\n", h1, h2);
35 | SF_display (buffer);
36 |
37 | free (buffer);
38 | return(0) ;
39 | }
40 |
41 | // gcc -Wall -O3 -o test2.plugin stplugin.c test2.c
42 |
--------------------------------------------------------------------------------
/src/github-issues/35/test2.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/35/test2.plugin
--------------------------------------------------------------------------------
/src/github-issues/40/gtools-1.3.3.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/40/gtools-1.3.3.zip
--------------------------------------------------------------------------------
/src/github-issues/40/plugin.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/40/plugin.zip
--------------------------------------------------------------------------------
/src/github-issues/40/plugin/Makefile:
--------------------------------------------------------------------------------
1 | GCC = gcc
2 | FLAGS = -Wall -shared -fPIC -DSYSTEM=OPUNIX
3 | SPOOKY = -L./ -l:libspookyhash.a
4 |
5 | all: clean test1 test2
6 |
7 | test1: test1.c stplugin.c
8 | $(GCC) $(FLAGS) -o test1.plugin stplugin.c test1.c
9 |
10 | test2: test2.c stplugin.c
11 | $(GCC) $(FLAGS) -o test2.plugin stplugin.c test2.c $(SPOOKY)
12 |
13 | .PHONY: clean
14 | clean:
15 | rm -f test1.plugin
16 | rm -f test2.plugin
17 |
--------------------------------------------------------------------------------
/src/github-issues/40/plugin/libspookyhash.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/40/plugin/libspookyhash.a
--------------------------------------------------------------------------------
/src/github-issues/40/plugin/stplugin.c:
--------------------------------------------------------------------------------
1 | /*
2 | stplugin.c, version 2.0
3 | copyright (c) 2003, 2006 StataCorp
4 | */
5 |
6 | #include "stplugin.h"
7 |
8 | ST_plugin *_stata_ ;
9 |
10 | STDLL pginit(ST_plugin *p)
11 | {
12 | _stata_ = p ;
13 | return(SD_PLUGINVER) ;
14 | }
15 |
--------------------------------------------------------------------------------
/src/github-issues/40/plugin/test.do:
--------------------------------------------------------------------------------
1 | clear all
2 | program test1, plugin using(test1.plugin)
3 | plugin call test1
4 | syntax, [foo(cilevel)]
5 | disp "`foo'"
6 |
7 | program test2, plugin using(test2.plugin)
8 | plugin call test2
9 | syntax, [foo(cilevel)]
10 | disp "`foo'"
11 |
12 | set obs 1
13 | global GTOOLS_CALLER ghash
14 | _gtools_internal
15 | syntax, [foo(cilevel)]
16 | disp "`foo'"
17 |
--------------------------------------------------------------------------------
/src/github-issues/40/plugin/test1.c:
--------------------------------------------------------------------------------
1 | #include "stplugin.h"
2 |
3 | int main()
4 | {
5 | return(0);
6 | }
7 |
8 | int WinMain()
9 | {
10 | return(0);
11 | }
12 |
13 | STDLL stata_call(int argc, char *argv[])
14 | {
15 | SF_display("Hello World\n") ;
16 | return(0) ;
17 | }
18 |
19 | /*
20 |
21 | cd /home/mauricio/code/stata-gtools/src/github-issues/35/
22 | !gcc -Wall -shared -fPIC -DSYSTEM=OPUNIX -o test1.plugin stplugin.c test1.c
23 | capture program drop test1
24 | program test1, plugin using(test1.plugin)
25 | plugin call test1
26 |
27 | */
28 |
--------------------------------------------------------------------------------
/src/github-issues/40/plugin/test1.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/40/plugin/test1.plugin
--------------------------------------------------------------------------------
/src/github-issues/40/plugin/test2.c:
--------------------------------------------------------------------------------
1 | #include "stplugin.h"
2 | #include "spookyhash_api.h"
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | int main()
11 | {
12 | return(0);
13 | }
14 |
15 | int WinMain()
16 | {
17 | return(0);
18 | }
19 |
20 | STDLL stata_call(int argc, char *argv[])
21 | {
22 | char * buffer = malloc(1024 * sizeof(char));
23 | char * string = strdup("foo");
24 |
25 | ST_double * number = calloc(1, sizeof(ST_double));
26 | number[1] = 1729.42;
27 |
28 | sprintf (buffer, "%s: %9.2f\n", string, *number);
29 | SF_display (buffer);
30 |
31 | uint64_t h1, h2;
32 | spookyhash_128(number, sizeof(ST_double), &h1, &h2);
33 |
34 | sprintf (buffer, "hash: %"PRIu64", %"PRIu64"\n", h1, h2);
35 | SF_display (buffer);
36 |
37 | free (buffer);
38 | return(0) ;
39 | }
40 |
41 | // gcc -Wall -O3 -o test2.plugin stplugin.c test2.c
42 |
--------------------------------------------------------------------------------
/src/github-issues/40/plugin/test2.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/40/plugin/test2.plugin
--------------------------------------------------------------------------------
/src/github-issues/40/test.do:
--------------------------------------------------------------------------------
1 | ssc install parallel
2 |
3 | clear all
4 | sysuse auto, clear
5 |
6 | parallel setclusters 2, f
7 | capture program drop pargegen
8 | program pargegen
9 | version 13
10 | syntax varlist [if]
11 | marksample touse
12 | gegen test = sum(price)
13 | disp "`level'"
14 | reg `varlist' if `touse'
15 | drop test
16 | end
17 |
18 | parallel bs, reps(50) nodots: pargegen price weight foreign rep78
19 | bs, reps(50) nodots: pargegen price weight foreign rep78
20 |
--------------------------------------------------------------------------------
/src/github-issues/45/test.do:
--------------------------------------------------------------------------------
1 | * clear
2 | * input long id1 int id2
3 | * 1225800 179
4 | * 1226197 162
5 | * 1245415 167
6 | * 1245415 204
7 | * 1249196 158
8 | * 1246805 226
9 | * 1247361 189
10 | * 1248872 203
11 | * 1249196 158
12 | * end
13 | * tostring id1 id2, gen(sid1 sid2)
14 | * cap noi gisid id1 id2, v
15 | * assert _rc == 459
16 | * cap noi gisid sid1 sid2, v
17 | * assert _rc == 459
18 | *
19 | * clear
20 | * input long id1 int id2
21 | * 1 13
22 | * 2 11
23 | * 3 12
24 | * 3 16
25 | * 9 10
26 | * 4 17
27 | * 5 14
28 | * 6 15
29 | * 9 10
30 | * end
31 | * tostring id1 id2, gen(sid1 sid2)
32 | * cap noi gisid id1 id2, v
33 | * assert _rc == 459
34 | * cap noi gisid sid1 sid2, v
35 | * assert _rc == 459
36 |
37 | clear
38 | input long id1 int id2
39 | 3 6
40 | 3 7
41 | 9 1
42 | 4 1
43 | 9 1
44 | end
45 | gen id3 = _n
46 | tostring id1 id2, gen(sid1 sid2)
47 | cap noi gisid id1 id2, v
48 | assert _rc == 459
49 | cap noi gisid sid1 sid2, v
50 | assert _rc == 459
51 |
52 | sort id1 id2
53 | cap noi gisid id1 id2, v
54 | assert _rc == 459
55 | sort sid1 sid2
56 | cap noi gisid sid1 sid2, v
57 | assert _rc == 459
58 |
59 | gen sid3 = string(_n)
60 | cap noi gisid id1 id2 id3, v
61 | assert _rc == 0
62 | cap noi gisid sid1 sid2 sid3, v
63 | assert _rc == 0
64 |
65 | /*
66 | set obs 10000000
67 | replace id1 = 10 + mod(_n, 123) in 6 / `=_N'
68 | replace id2 = 10 + mod(_n, 543) in 6 / `=_N'
69 | hashsort id3 id1 id2
70 | gisid id1 id2, v
71 | replace sid1 = string(id1)
72 | replace sid2 = string(id2)
73 | gisid sid1 sid2, v
74 | hashsort id1 id2
75 | gisid id1 id2, v
76 | hashsort sid1 sid2
77 | gisid sid1 sid2, v
78 | */
79 |
--------------------------------------------------------------------------------
/src/github-issues/48/test.do:
--------------------------------------------------------------------------------
1 | clear
2 | set obs 10
3 | gen x = "a"
4 | gen z = 0
5 | gen a = 1
6 | gen b = 1
7 | gen c = 1
8 |
9 | gcontract color
10 | gcontract color pink
11 | gcontract color _.pink
12 |
13 | preserve
14 | gcollapse x
15 | restore, preserve
16 | gcollapse z, by(a)
17 | restore, preserve
18 | gcollapse z, by(a b c)
19 | restore, preserve
20 | gcollapse z, by(a b c zz)
21 | restore, preserve
22 | gcollapse z, by(a-zz)
23 | restore, preserve
24 | gcollapse z, by(a-zz) nods
25 | restore, preserve
26 | gcollapse z, by(a-zz) ds
27 | restore, preserve
28 | gcollapse z, by(a-c)
29 | restore, preserve
30 | gcollapse z, by(a-c) nods
31 | restore, preserve
32 | gcollapse z, by(a-c) ds
33 | restore, preserve
34 | gcollapse z, by(a - c)
35 | restore, preserve
36 | gcollapse z, by(a - c) nods
37 | restore, preserve
38 | gcollapse z, by(a - c) ds
39 | restore
40 |
41 | preserve
42 | gcontract a
43 | restore, preserve
44 | gcontract a b c
45 | restore, preserve
46 | gcontract a b c zz
47 | restore, preserve
48 | gcontract a-zz
49 | restore, preserve
50 | gcontract a-zz, nods
51 | restore, preserve
52 | gcontract a-zz, ds
53 | restore, preserve
54 | gcontract a-c
55 | restore, preserve
56 | gcontract a-c, nods
57 | restore, preserve
58 | gcontract a-c, ds
59 | restore, preserve
60 | gcontract a - c
61 | restore, preserve
62 | gcontract a - c, nods
63 | restore, preserve
64 | gcontract a - c, ds
65 | restore
66 |
67 | glevelsof a
68 | glevelsof a b c
69 | glevelsof a b c zz
70 | glevelsof a-zz
71 | glevelsof a-zz, nods
72 | glevelsof a-zz, ds
73 | glevelsof a-c
74 | glevelsof a-c, nods
75 | glevelsof a-c, ds
76 | glevelsof a - c,
77 | glevelsof a - c, nods
78 | glevelsof a - c, ds
79 |
80 | gtop a
81 | gtop a b c
82 | gtop a b c zz
83 | gtop a-zz
84 | gtop a-zz, nods
85 | gtop a-zz, ds
86 | gtop a-c
87 | gtop a-c, nods
88 | gtop a-c, ds
89 | gtop a - c,
90 | gtop a - c, nods
91 | gtop a - c, ds
92 | gtop a*c
93 |
--------------------------------------------------------------------------------
/src/github-issues/60/plugin.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/60/plugin.zip
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/Makefile:
--------------------------------------------------------------------------------
1 | OSFLAGS = -shared
2 | GCC = x86_64-w64-mingw32-gcc.exe
3 |
4 | TEST1_SRC=stplugin.c test1.c
5 |
6 | TEST2_SRC=stplugin.c test2.c
7 |
8 | SPOOKYHASH_SRC=lib/spookyhash/src/context.c \
9 | lib/spookyhash/src/globals.c \
10 | lib/spookyhash/src/spookyhash.c
11 |
12 | SPOOKYHASH_INC=-Ilib/spookyhash/src
13 |
14 | all: clean test1 test2
15 |
16 | test1: $(TEST1_SRC)
17 | $(GCC) $(OSFLAGS) -o test1.plugin $^
18 |
19 | test2: $(TEST2_SRC) $(SPOOKYHASH_SRC)
20 | $(GCC) $(OSFLAGS) -o test2.plugin $(SPOOKYHASH_INC) $^
21 |
22 | .PHONY: clean
23 | clean:
24 | rm -f test1.plugin
25 | rm -f test2.plugin
26 |
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/lib/spookyhash/src/context.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Centaurean SpookyHash
3 | *
4 | * Copyright (c) 2015, Guillaume Voirin
5 | * All rights reserved.
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are met:
9 | *
10 | * 1. Redistributions of source code must retain the above copyright notice, this
11 | * list of conditions and the following disclaimer.
12 | *
13 | * 2. Redistributions in binary form must reproduce the above copyright notice,
14 | * this list of conditions and the following disclaimer in the documentation
15 | * and/or other materials provided with the distribution.
16 | *
17 | * 3. Neither the name of the copyright holder nor the names of its
18 | * contributors may be used to endorse or promote products derived from
19 | * this software without specific prior written permission.
20 | *
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | *
32 | * 25/01/15 12:19
33 | *
34 | * ----------
35 | * SpookyHash
36 | * ----------
37 | *
38 | * Author(s)
39 | * Bob Jenkins (http://burtleburtle.net/bob/hash/spooky.html)
40 | *
41 | * Description
42 | * Very fast non cryptographic hash
43 | */
44 |
45 | #include "context.h"
46 |
47 | SPOOKYHASH_WINDOWS_EXPORT SPOOKYHASH_FORCE_INLINE void spookyhash_context_init(spookyhash_context *context, uint64_t seed1, uint64_t seed2) {
48 | context->m_length = 0;
49 | context->m_remainder = 0;
50 | context->m_state[0] = seed1;
51 | context->m_state[1] = seed2;
52 | }
53 |
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/lib/spookyhash/src/context.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Centaurean SpookyHash
3 | *
4 | * Copyright (c) 2015, Guillaume Voirin
5 | * All rights reserved.
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are met:
9 | *
10 | * 1. Redistributions of source code must retain the above copyright notice, this
11 | * list of conditions and the following disclaimer.
12 | *
13 | * 2. Redistributions in binary form must reproduce the above copyright notice,
14 | * this list of conditions and the following disclaimer in the documentation
15 | * and/or other materials provided with the distribution.
16 | *
17 | * 3. Neither the name of the copyright holder nor the names of its
18 | * contributors may be used to endorse or promote products derived from
19 | * this software without specific prior written permission.
20 | *
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | *
32 | * 24/01/15 22:32
33 | *
34 | * ----------
35 | * SpookyHash
36 | * ----------
37 | *
38 | * Author(s)
39 | * Bob Jenkins (http://burtleburtle.net/bob/hash/spooky.html)
40 | *
41 | * Description
42 | * Very fast non cryptographic hash
43 | */
44 |
45 | #ifndef SPOOKYHASH_CONTEXT_H
46 | #define SPOOKYHASH_CONTEXT_H
47 |
48 | #include "globals.h"
49 |
50 | #define SPOOKYHASH_BLOCK_SIZE (SPOOKYHASH_VARIABLES * 8)
51 | #define SPOOKYHASH_BUFFER_SIZE (2 * SPOOKYHASH_BLOCK_SIZE)
52 | #define SPOOKYHASH_CONSTANT (0xdeadbeefdeadbeefLL)
53 |
54 | SPOOKYHASH_WINDOWS_EXPORT void spookyhash_context_init(spookyhash_context *, uint64_t, uint64_t);
55 |
56 | #endif
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/lib/spookyhash/src/globals.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Centaurean SpookyHash
3 | *
4 | * Copyright (c) 2015, Guillaume Voirin
5 | * All rights reserved.
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are met:
9 | *
10 | * 1. Redistributions of source code must retain the above copyright notice, this
11 | * list of conditions and the following disclaimer.
12 | *
13 | * 2. Redistributions in binary form must reproduce the above copyright notice,
14 | * this list of conditions and the following disclaimer in the documentation
15 | * and/or other materials provided with the distribution.
16 | *
17 | * 3. Neither the name of the copyright holder nor the names of its
18 | * contributors may be used to endorse or promote products derived from
19 | * this software without specific prior written permission.
20 | *
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | *
32 | * 26/06/15 1:08
33 | *
34 | * ----------
35 | * SpookyHash
36 | * ----------
37 | *
38 | * Author(s)
39 | * Bob Jenkins (http://burtleburtle.net/bob/hash/spooky.html)
40 | *
41 | * Description
42 | * Very fast non cryptographic hash
43 | */
44 |
45 | #include "globals.h"
46 |
47 | SPOOKYHASH_WINDOWS_EXPORT uint8_t spookyhash_version_major() {
48 | return SPOOKYHASH_MAJOR_VERSION;
49 | }
50 |
51 | SPOOKYHASH_WINDOWS_EXPORT uint8_t spookyhash_version_minor() {
52 | return SPOOKYHASH_MINOR_VERSION;
53 | }
54 |
55 | SPOOKYHASH_WINDOWS_EXPORT uint8_t spookyhash_version_revision() {
56 | return SPOOKYHASH_REVISION;
57 | }
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/lib/spookyhash/src/spookyhash.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Centaurean SpookyHash
3 | *
4 | * Copyright (c) 2015, Guillaume Voirin
5 | * All rights reserved.
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are met:
9 | *
10 | * 1. Redistributions of source code must retain the above copyright notice, this
11 | * list of conditions and the following disclaimer.
12 | *
13 | * 2. Redistributions in binary form must reproduce the above copyright notice,
14 | * this list of conditions and the following disclaimer in the documentation
15 | * and/or other materials provided with the distribution.
16 | *
17 | * 3. Neither the name of the copyright holder nor the names of its
18 | * contributors may be used to endorse or promote products derived from
19 | * this software without specific prior written permission.
20 | *
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | *
32 | * 24/01/15 22:32
33 | *
34 | * ----------
35 | * SpookyHash
36 | * ----------
37 | *
38 | * Author(s)
39 | * Bob Jenkins (http://burtleburtle.net/bob/hash/spooky.html)
40 | *
41 | * Description
42 | * Very fast non cryptographic hash
43 | */
44 |
45 | #ifndef SPOOKYHASH_H
46 | #define SPOOKYHASH_H
47 |
48 | #include "context.h"
49 |
50 | #define SPOOKYHASH_ALLOW_UNALIGNED_READS 0
51 | #define SPOOKYHASH_ROTATE(x, k) (((x) << (k)) | (((x) >> (64 - (k)))))
52 |
53 | #endif
54 |
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/stplugin.c:
--------------------------------------------------------------------------------
1 | /*
2 | stplugin.c, version 2.0
3 | copyright (c) 2003, 2006 StataCorp
4 | */
5 |
6 | #include "stplugin.h"
7 |
8 | ST_plugin *_stata_ ;
9 |
10 | STDLL pginit(ST_plugin *p)
11 | {
12 | _stata_ = p ;
13 | return(SD_PLUGINVER) ;
14 | }
15 |
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/test.do:
--------------------------------------------------------------------------------
1 | clear all
2 | program test1, plugin using(test1.plugin)
3 | plugin call test1
4 |
5 | program test2, plugin using(test2.plugin)
6 | plugin call test2
7 |
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/test1.c:
--------------------------------------------------------------------------------
1 | #include "stplugin.h"
2 |
3 | int main()
4 | {
5 | return(0);
6 | }
7 |
8 | int WinMain()
9 | {
10 | return(0);
11 | }
12 |
13 | STDLL stata_call(int argc, char *argv[])
14 | {
15 | SF_display("Hello World\n") ;
16 | return(0) ;
17 | }
18 |
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/test1.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/60/plugin/test1.plugin
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/test2.c:
--------------------------------------------------------------------------------
1 | #include "stplugin.h"
2 | #include "spookyhash_api.h"
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | int main()
11 | {
12 | return(0);
13 | }
14 |
15 | int WinMain()
16 | {
17 | return(0);
18 | }
19 |
20 | STDLL stata_call(int argc, char *argv[])
21 | {
22 | char * buffer = malloc(1024 * sizeof(char));
23 | char * string = strdup("foo");
24 |
25 | ST_double * number = calloc(1, sizeof(ST_double));
26 | number[1] = 1729.42;
27 |
28 | sprintf (buffer, "%s: %9.2f\n", string, *number);
29 | SF_display (buffer);
30 |
31 | uint64_t h1, h2;
32 | spookyhash_128(number, sizeof(ST_double), &h1, &h2);
33 |
34 | sprintf (buffer, "hash: %"PRIu64", %"PRIu64"\n", h1, h2);
35 | SF_display (buffer);
36 |
37 | free (buffer);
38 | return(0) ;
39 | }
40 |
--------------------------------------------------------------------------------
/src/github-issues/60/plugin/test2.plugin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/60/plugin/test2.plugin
--------------------------------------------------------------------------------
/src/github-issues/65/test.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 | estpost gtabstat price mpg rep78, statistics(mean sd)
3 | esttab ., cells("price mpg rep78")
4 | estpost gtabstat price mpg rep78, statistics(mean sd) columns(statistics)
5 | esttab ., cells("mean(fmt(a3)) sd")
6 | estpost gtabstat price mpg rep78, by(foreign) statistics(mean sd) columns(variables)
7 | estpost gtabstat price mpg rep78, by(foreign) statistics(mean sd) columns(statistics)
8 | esttab ., main(mean) aux(sd) nostar unstack noobs nonote label
9 |
10 | estpost gtabstat price, by(foreign) statistics(mean sd) columns(variables)
11 | estpost gtabstat price, by(foreign) statistics(mean sd) columns(statistics)
12 |
13 | estpost gtabstat price, statistics(mean sd) columns(variables)
14 | estpost gtabstat price, statistics(mean sd) columns(statistics)
15 |
--------------------------------------------------------------------------------
/src/github-issues/67/test-transform.do:
--------------------------------------------------------------------------------
1 | clear all
2 |
3 | capture program drop bench
4 | program bench
5 | gettoken timer call: 0, p(:)
6 | gettoken colon call: call, p(:)
7 | cap timer clear `timer'
8 | timer on `timer'
9 | `call'
10 | timer off `timer'
11 | qui timer list
12 | c_local r`timer' `=r(t`timer')'
13 | end
14 |
15 | clear
16 | set obs 10000000
17 | gen x = ceil(runiform() * 10000)
18 | gen g = round(_n / 100)
19 |
20 | bench 1: egen double rankx_def1 = rank(x)
21 | bench 2: gegen double rankx_def2 = rank(x)
22 |
23 | bench 3: egen rankx_track1 = rank(x), track
24 | bench 4: gegen rankx_track2 = rank(x), ties(track)
25 |
26 | bench 5: egen rankx_field1 = rank(x), field
27 | bench 6: gegen rankx_field2 = rank(x), ties(field)
28 |
29 | bench 7: egen long rankx_uniq1 = rank(x), uniq
30 | bench 8: gegen long rankx_uniq2 = rank(x), ties(uniq)
31 |
32 | gegen rankx_uniq3 = rank(x), ties(stable)
33 |
34 | bench 11: egen double rankx_group_def1 = rank(x), by(g)
35 | bench 12: gegen double rankx_group_def2 = rank(x), by(g)
36 |
37 | bench 13: egen rankx_group_track1 = rank(x), by(g) track
38 | bench 14: gegen rankx_group_track2 = rank(x), by(g) ties(track)
39 |
40 | bench 15: egen rankx_group_field1 = rank(x), by(g) field
41 | bench 16: gegen rankx_group_field2 = rank(x), by(g) ties(field)
42 |
43 | bench 17: egen long rankx_group_uniq1 = rank(x), by(g) uniq
44 | bench 18: gegen long rankx_group_uniq2 = rank(x), by(g) ties(uniq)
45 |
46 | gegen rankx_group_uniq3 = rank(x), by(g) ties(stable)
47 |
48 | assert (rankx_def1 == rankx_def2)
49 | assert (rankx_track1 == rankx_track2)
50 | assert (rankx_field1 == rankx_field2)
51 |
52 | sort x, stable
53 | assert rankx_uniq3 == _n
54 |
55 | gisid rankx_uniq1
56 | gisid rankx_uniq2
57 |
58 | assert (rankx_group_def1 == rankx_group_def2)
59 | assert (rankx_group_track1 == rankx_group_track2)
60 | assert (rankx_group_field1 == rankx_group_field2)
61 |
62 | cap drop ix
63 | sort g x, stable
64 | by g: gen long ix = _n
65 | assert rankx_group_uniq3 == ix
66 |
67 | gisid g rankx_group_uniq1
68 | gisid g rankx_group_uniq2
69 |
70 | local bench_table `" Versus | Native | gtools | % faster "'
71 | local bench_table `"`bench_table'"' _n(1) `" ---------- | ------ | ------ | -------- "'
72 |
73 | local commands default track field unique
74 | forvalues i = 1(2)7 {
75 | gettoken cmd commands: commands
76 | local pct "`:disp %7.2f 100 * (`r`i'' - `r`=`i'+1'') / `r`i'''"
77 | local dnative "`:disp %6.2f `r`i'''"
78 | local dgtools "`:disp %6.2f `r`=`i'+1'''"
79 | local cmd `"`:disp %10s "`cmd'"'"'
80 | local bench_table `"`bench_table'"' _n(1) `" `cmd' | `dnative' | `dgtools' | `pct'% "'
81 | }
82 |
83 | local bench_table `"`bench_table'"' _n(1) `" ---------- | ------ | ------ | -------- "'
84 | local bench_table `"`bench_table'"' _n(1) `" by group "'
85 | local bench_table `"`bench_table'"' _n(1) `" ---------- | ------ | ------ | -------- "'
86 |
87 | local commands default track field unique
88 | forvalues i = 11(2)17 {
89 | gettoken cmd commands: commands
90 | local pct "`:disp %7.2f 100 * (`r`i'' - `r`=`i'+1'') / `r`i'''"
91 | local dnative "`:disp %6.2f `r`i'''"
92 | local dgtools "`:disp %6.2f `r`=`i'+1'''"
93 | local cmd `"`:disp %10s "`cmd'"'"'
94 | local bench_table `"`bench_table'"' _n(1) `" `cmd' | `dnative' | `dgtools' | `pct'% "'
95 | }
96 | disp _n(1) `"`bench_table'"'
97 |
--------------------------------------------------------------------------------
/src/github-issues/67/test.do:
--------------------------------------------------------------------------------
1 | * Create simulated data
2 | clear all
3 | * set obs 10000000
4 | set obs 1000000
5 | gen x = ceil(runiform()*10000)
6 | gen g = round(_n / 100)
7 | tempfile data
8 | save `data'
9 |
10 | *---------------------------------------------
11 | * egen rank
12 | *---------------------------------------------
13 |
14 | * Load simulated data
15 | use `data', clear
16 |
17 | * With egen
18 | timer on 1
19 | * egen rank_x = rank(x)
20 | timer off 1
21 |
22 | * With gtools
23 | timer on 2
24 | tempvar t1 t2
25 | gegen `t1' = group(x), counts(`t2')
26 | gen rank2_x = `t1' + `t2' / 2 - 0.5
27 | timer off 2
28 |
29 | * Validate
30 | * gen same = rank_x==rank2_x
31 | * sum
32 |
33 | *---------------------------------------------
34 | * egen rank, track
35 | *---------------------------------------------
36 |
37 | * Load simulated data
38 | use `data', clear
39 |
40 | * With egen
41 | timer on 3
42 | * egen rank_x = rank(x), track
43 | timer off 3
44 |
45 | * With gtools
46 | timer on 4
47 | tempvar t1 t2
48 | gen `t1' = x
49 | fasterxtile `t2' = x, nq(`=_N')
50 | timer off 4
51 |
52 | * Validate
53 | * gen same = rank_x==rank2_x
54 | * sum
55 |
56 | *---------------------------------------------
57 | * egen rank, field
58 | *---------------------------------------------
59 |
60 | * Load simulated data
61 | use `data', clear
62 |
63 | * With egen
64 | timer on 5
65 | * egen rank_x = rank(x), field
66 | timer off 5
67 |
68 | * With gtools
69 | timer on 6
70 | tempvar t1 t2
71 | gegen `t1' = group(x), counts(`t2')
72 | gen rank2_x = `r(N)' - `t1' - `t2' + 2
73 | timer off 6
74 |
75 | * Validate they produce same results
76 | * gen same = rank_x==rank2_x
77 | * sum
78 |
79 | *---------------------------------------------
80 | * Display relative speeds
81 | *---------------------------------------------
82 |
83 | * Display benchmark speeds
84 | timer list
85 | timer clear
86 |
--------------------------------------------------------------------------------
/src/github-issues/67/test.orig.do:
--------------------------------------------------------------------------------
1 | * Create simulated data
2 | clear all
3 | set obs 10000000
4 | gen x = ceil(runiform()*10000)
5 | tempfile data
6 | save `data'
7 |
8 | *---------------------------------------------
9 | * egen rank
10 | *---------------------------------------------
11 |
12 | * Load simulated data
13 | use `data', clear
14 |
15 | * With egen
16 | timer on 1
17 | egen rank_x = rank(x)
18 | timer off 1
19 |
20 | * With gtools
21 | timer on 2
22 | tempvar t1 t2 t3
23 | gen `t1' = x
24 | gdistinct x
25 | fasterxtile `t2' = x, nq(`r(N)')
26 | gegen `t3' = count(x), by(`t1')
27 | gen rank2_x = `t2' + `t3'/2 - 0.5
28 | timer off 2
29 |
30 | * Validate
31 | gen same = rank_x==rank2_x
32 | sum
33 |
34 | *---------------------------------------------
35 | * egen rank, track
36 | *---------------------------------------------
37 |
38 | * Load simulated data
39 | use `data', clear
40 |
41 | * With egen
42 | timer on 3
43 | egen rank_x = rank(x), track
44 | timer off 3
45 |
46 | * With gtools
47 | timer on 4
48 | tempvar t1 t2 t3
49 | gen `t1' = x
50 | gdistinct x
51 | local Nd = r(ndistinct)
52 | fasterxtile `t2' = x, nq(`r(N)')
53 | gen rank2_x = `t2'
54 | timer off 4
55 |
56 | * Validate
57 | gen same = rank_x==rank2_x
58 | sum
59 |
60 | *---------------------------------------------
61 | * egen rank, field
62 | *---------------------------------------------
63 |
64 | * Load simulated data
65 | use `data', clear
66 |
67 | * With egen
68 | timer on 5
69 | egen rank_x = rank(x), field
70 | timer off 5
71 |
72 | * With gtools
73 | timer on 6
74 | tempvar t1 t2 t3
75 | gen `t1' = x
76 | gdistinct x
77 | local N = r(N)
78 | fasterxtile `t2' = x, nq(`N')
79 | gegen `t3' = count(x), by(`t1')
80 | gen rank2_x = `N' - `t2' - `t3' + 2
81 | timer off 6
82 |
83 | * Validate they produce same results
84 | gen same = rank_x==rank2_x
85 | sum
86 |
87 | *---------------------------------------------
88 | * Display relative speeds
89 | *---------------------------------------------
90 |
91 | * Display benchmark speeds
92 | timer list
93 | timer clear
94 |
--------------------------------------------------------------------------------
/src/github-issues/67/test.short.do:
--------------------------------------------------------------------------------
1 | * Create simulated data
2 | clear all
3 | set obs 1000000
4 | gen x = ceil(runiform() * 1000)
5 | qui gunique x
6 | assert r(J) < r(N)
7 |
8 | tempvar N
9 | gegen `N' = count(1), by(x)
10 | local nonmi = `r(N)'
11 | fasterxtile rankTrack = x, nq(`nonmi')
12 | gen rankField = `nonmi' - rankTrack - `N' + 2
13 | gen rankDefault = rankTrack + `N' / 2 - 0.5
14 |
15 | egen _rankDefault = rank(x)
16 | egen _rankTrack = rank(x), track
17 | egen _rankField = rank(x), field
18 |
19 | assert (_rankDefault == rankDefault)
20 | assert (_rankTrack == rankTrack)
21 | assert (_rankField == rankField)
22 |
--------------------------------------------------------------------------------
/src/github-issues/72/issue.do:
--------------------------------------------------------------------------------
1 | clear
2 | set obs 9
3 | gen id=strofreal(floor((_n+2)/3))
4 |
5 | g cat="none" if id=="1"
6 | replace cat="one" if id!="1"
7 |
8 | gegen gtot=total(cat!=cat[_n-1]), by(id)
9 | egen tot=total(cat!=cat[_n-1]), by(id)
10 |
--------------------------------------------------------------------------------
/src/github-issues/78/ifin.do:
--------------------------------------------------------------------------------
1 | capture program drop test
2 | program test
3 | sysuse auto, clear
4 | level1 if !strpos(make, ")")
5 | end
6 |
7 | capture program drop level1
8 | program level1
9 | syntax [if]
10 | macro dir _if
11 | level2 `if' in 1/10
12 | end
13 |
14 | capture program drop level2
15 | program level2
16 | syntax [if] [in]
17 | macro dir _if
18 | mata st_local("ifin", st_local("if") + " " + st_local("in"))
19 | macro dir _ifin
20 | local ifin: copy local ifin
21 | level3 `ifin', ifin(`ifin') ifintest(`ifin')
22 | end
23 |
24 | capture program drop level3
25 | program level3
26 | syntax [if] [in], ifin(str asis) ifintest(str)
27 | macro dir _if
28 | macro dir _in
29 | macro dir _ifin
30 | macro dir _ifintest
31 | end
32 |
33 | test
34 |
--------------------------------------------------------------------------------
/src/github-issues/78/test.do:
--------------------------------------------------------------------------------
1 | sysuse auto
2 | gdistinct make if !strpos(make, ")")
3 | gunique make if !strpos(make, ")")
4 | gegen x = group(foreign) if !strpos(make, "x)")
5 |
--------------------------------------------------------------------------------
/src/github-issues/88/test.do:
--------------------------------------------------------------------------------
1 | clear
2 | set obs 5
3 | g units=1
4 | g weight=5
5 |
6 | gegen total =total(units) [w=weight]
7 | gegen totalf =total(units) [fw=weight]
8 | gegen totalp =total(units) [pw=weight]
9 | sum total*
10 |
11 | collapse (sum) units [aw=weight]
12 | disp units
13 |
14 | clear
15 | set obs 5
16 | g units=_n
17 | g weight=_n
18 | gegen total =total(units) [w=weight]
19 | gegen totalu=total(units)
20 | sum total*
21 |
--------------------------------------------------------------------------------
/src/github-issues/debug-1/savehdfe.do:
--------------------------------------------------------------------------------
1 | global GTOOLS_BETA = 1
2 | sysuse auto, clear
3 | drop _hdfe_*
4 | gglm foreign price, family(binomial) absorb(rep78) mata(GLM) prefix(hdfe(_hdfe_))
5 |
6 | sysuse auto, clear
7 | gtop rep78 if mi(rep78), by(foreign) gen(a)
8 |
--------------------------------------------------------------------------------
/src/github-issues/debug-2/test-median.do:
--------------------------------------------------------------------------------
1 | clear
2 | set obs 1000
3 | gen double xx = int(100 * runiform()) / 100
4 | gegen yy1 = median(xx)
5 | gegen yy2 = pctile(xx), p(50)
6 | gcollapse (median) zz1 = xx, merge
7 | gcollapse (p50) zz2 = xx, merge
8 | tab yy1 yy2
9 | tab zz1 zz2
10 | gquantiles xx, _pctile
11 | disp r(r1)
12 |
--------------------------------------------------------------------------------
/src/github-issues/debug-2/test-noobs.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 | tempvar yy
3 | tempname zz
4 | gen `yy' = .
5 | set tracedepth 1
6 | * set trace on
7 | * gegen `xx' = mean(price) if `yy' == 1, by(foreign)
8 |
9 | capture program drop cc
10 | program cc
11 | sort foreign
12 | xtset foreign
13 | tempvar xx
14 | gegen `xx' = count(price), by(foreign) replace
15 |
16 | end
17 | frame put price foreign if `yy' == 1, into(`zz')
18 | frame `zz' {
19 | cc
20 | }
21 |
--------------------------------------------------------------------------------
/src/github-issues/debug-3/test-excludeself.do:
--------------------------------------------------------------------------------
1 | clear
2 | set obs 15
3 | gen x = _n
4 | gen y = mod(_n, 2)
5 | replace y = 2 if _n > 10
6 | replace x = . in 12
7 | gstats transform (range mean . .) z = x, excludeself by(y) replace
8 |
--------------------------------------------------------------------------------
/src/github-issues/debug-4/test_excludeself.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 | gstats transform (moving mean -6 .) x=rep78, excludeself replace
3 | gstats transform (range mean -6 .) x=rep78, excludeself replace
4 | gstats transform (range mean -6 .) x=rep78, replace
5 | gstats transform (moving mean -6 .) x=rep78 (range mean -6 6) y=rep78, excludeself replace
6 |
--------------------------------------------------------------------------------
/src/github-issues/debug-4/test_gcollapse.do:
--------------------------------------------------------------------------------
1 | cap mata mata drop ahaRename()
2 | mata
3 | void function ahaRename(real scalar i)
4 | {
5 | (void) st_addvar(st_vartype(i), st_local("revar"))
6 | if ( strpos(st_vartype(i), "str") ) {
7 | st_sstore(., st_local("revar"), st_sdata(., i))
8 | }
9 | else {
10 | st_store(., st_local("revar"), st_data(., i))
11 | }
12 | st_dropvar(i)
13 | }
14 | end
15 |
16 | use /home/mauricio/bulk/data/ra/doyle/cms-ambulance/aha/aha-data-120617.dta, clear
17 | local i 0
18 | unab vars: _all
19 | foreach var of local vars {
20 | local ++i
21 | if strpos("`var'", ".") {
22 | disp "`var'"
23 | local revar: subinstr local var "." "_", all
24 | mata: ahaRename(`i')
25 | if (`=`i'-1') {
26 | order `revar', after(`cached')
27 | local cached `revar'
28 | }
29 | else {
30 | order `revar'
31 | }
32 | }
33 | else {
34 | local cached `var'
35 | }
36 | }
37 |
38 | rename abs_hcahps_cmp_yr3 satis
39 | rename abs_proc_cmp_yr3 process
40 | rename abs_mort_cmp_yr3 Hmort30
41 | rename abs_readm_cmp_yr3 Hreadm30
42 | rename mort_30_ami_yr3 AMImort30
43 | rename mort_30_pn_yr3 PNmort30
44 | rename mort_30_hf_yr3 HFmort30
45 | rename readm_30_ami_yr3 AMIreadm30
46 | rename readm_30_pn_yr3 PNreadm30
47 | rename readm_30_hf_yr3 HFreadm30
48 | rename hospbd volume
49 | rename year diag_year
50 | rename low_profit lowpr
51 | rename high_profit hipr
52 | desc teach forpr nonpr gov coth
53 | local keepvars satis process Hmort30 Hreadm30 AMImort30 PNmort30 HFmort30 AMIreadm30 PNreadm30 HFreadm30 volume teach forpr nonpr gov lowpr hipr coth
54 |
55 | rename provider_id prvnumgrp
56 | keep `keepvars' prvnumgrp diag_year
57 | gcollapse (mean) `keepvars', by(prvnumgrp diag_year)
58 |
59 | set varabbrev on
60 | set more off
61 | clear
62 | set obs 10
63 | gen aa = 0
64 | gen bb = runiform()
65 | gen cc = runiform()
66 | gen dd = runiform()
67 | gegen x = mean(b c), by(a d* e)
68 | gen dz = runiform()
69 | gegen x = mean(b c), by(a d e)
70 |
--------------------------------------------------------------------------------
/src/github-issues/debug-4/test_gquantiles.do:
--------------------------------------------------------------------------------
1 | set varabbrev on
2 | set more off
3 | clear
4 | set obs 10
5 | gen aa = 0
6 | gen bb = runiform()
7 | gen cc = runiform()
8 | gen dd = runiform()
9 | gquantiles a b c d, pctile
10 |
--------------------------------------------------------------------------------
/src/github-issues/debug-4/test_greshape.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 | gen i = _n
3 | greshape wide price mpg price, i(i) j(foreign) xi(drop)
4 | greshape wide mpg price, i(i) j(foreign) xi(drop)
5 |
6 | sysuse auto, clear
7 | gen i = _n
8 | gen mp0 = price
9 | gen pr0 = price
10 | rename price pr1
11 | rename mpg mp1
12 | greshape long pr mp pr, i(i) j(j) xi(drop)
13 | greshape long pr mp, i(i) j(j) xi(drop)
14 |
15 | webuse reshape3, clear
16 | greshape long inc([0-9]+).+ (ue)(.+)/2 inc([0-9]+).+, by(id) keys(year) match(regex)
17 | greshape long inc([0-9]+).+ (ue)(.+)/2 inc(.+)r, by(id) keys(year) match(regex)
18 | greshape long inc([0-9]+).+ (ue)(.+)/2 waff, by(id) keys(year) match(regex)
19 | greshape long inc([0-9]+).+ (ue)(.+)/2, by(id) keys(year) match(regex)
20 |
--------------------------------------------------------------------------------
/src/github-issues/debug-4/test_gtop.do:
--------------------------------------------------------------------------------
1 | use /home/mauricio/bulk/data/research/census-correctionalss/raw/ICPSR_07852/DS0001/07852-0001-Data.dta, clear
2 | gtop V1 V2
3 |
4 | clear
5 | set obs 100
6 | gen x = mod(_n, 2)
7 | label define x 1 hi
8 | label values x x
9 | gtoplevelsof x
10 |
11 | use /home/mauricio/bulk/data/ra/doyle/cms-ambulance/cepr_acs_2005.dta, clear
12 | gtop socp05 if inlist(socp05, 292040, 292041, 292042)
13 | desc *soc*
14 |
15 | clear
16 | set obs 1000
17 | gen x = ceil(runiform() * 100)
18 | gtop x
19 | gtop x, missrow
20 | gtop x, ntop(1)
21 | gtop x, ntop(-1)
22 | gtop x, ntop(1000)
23 | gtop x, nooth
24 | replace x = . in 20/43
25 | gtop x
26 | gtop x, missrow
27 | gtop x, nomiss
28 | replace x = .a in 50/100
29 | replace x = .b in 200/300
30 | gtop x
31 | gtop x, nomiss
32 | gunique x if !mi(x)
33 | gtop x, missrow
34 | gtop x, missrow ntop(99)
35 |
36 | clear
37 | set obs 1000000
38 | gen x = ceil(runiform() * 10000)
39 | gtop x
40 |
--------------------------------------------------------------------------------
/src/github-issues/debug-4/test_gunique.do:
--------------------------------------------------------------------------------
1 | clear
2 | set more off
3 | set obs 40
4 | gen g = mod(_n, 5)
5 | gen x = ceil(runiform() * 10)
6 | gunique x, by(g) gen(y)
7 | l
8 | gunique x if inlist(g, 2, 3, 4), by(g) gen(z)
9 | l
10 | gunique x if inlist(g, 2, 3, 4), by(g) gen(z)
11 | gunique x if inlist(g, 1, 2, 3), by(g) gen(z) replace
12 | l
13 |
14 | clear
15 | set obs 10
16 | gen x = 1
17 | gegen y = group(x) if x > 1
18 | gegen z = tag(x) if x > 1
19 | egen _y = group(x) if x > 1
20 | egen _z = tag(x) if x > 1
21 | l
22 |
--------------------------------------------------------------------------------
/src/github-issues/debug-4/test_hash.do:
--------------------------------------------------------------------------------
1 | clear
2 | set obs 100000
3 | gen x = ceil(runiform() * 10)
4 | gen y = char(32 + ceil(runiform() * 96)) + char(32 + ceil(runiform() * 96))
5 | gegen `c(obs_t)' z1 = group(x y), hash(1)
6 | gegen `c(obs_t)' z2 = group(x y), hash(2)
7 | gegen `c(obs_t)' z3 = group(x y), hash(3)
8 | sort x y z1
9 | gen `c(obs_t)' id = (x != x[_n-1]) | (y != y[_n-1])
10 | replace id = sum(id)
11 | assert id == z1
12 | assert z1 == z2
13 | assert z2 == z3
14 |
15 | clear
16 | set obs 100000
17 | gen x = ceil(runiform() * 10)
18 | gen y = char(32 + ceil(runiform() * 96)) + char(32 + ceil(runiform() * 96))
19 | gegen `c(obs_t)' z1 = group(y x), hash(1)
20 | gegen `c(obs_t)' z2 = group(y x), hash(2)
21 | gegen `c(obs_t)' z3 = group(y x), hash(3)
22 | sort y x z1
23 | gen `c(obs_t)' id = (x != x[_n-1]) | (y != y[_n-1])
24 | replace id = sum(id)
25 | assert id == z1
26 | assert z1 == z2
27 | assert z2 == z3
28 |
--------------------------------------------------------------------------------
/src/github-issues/debug-5/test.do:
--------------------------------------------------------------------------------
1 | global GTOOLS_BETA=1
2 | global GTOOLS_GREGTABLE=1
3 | sysuse auto, clear
4 | greg price mpg rep78
5 | matlist e(V)
6 | reg price mpg rep78
7 | matlist e(V)
8 |
--------------------------------------------------------------------------------
/src/github-issues/debug-6/greg-coredump.do:
--------------------------------------------------------------------------------
1 | local nobs 10000000
2 |
3 | clear
4 | set obs `nobs'
5 | gen groups = int(runiform() * 1000)
6 | gen rsort = rnormal()
7 | gen rvar = rnormal()
8 | gen ix = _n
9 | sort rsort
10 | local nprocessors = c(processors)
11 | gen e = rnormal()
12 | gen x = rnormal()
13 | gen y = x + e + groups/100
14 | gen g = mod(groups, 10)
15 |
16 | set rmsg on
17 | global GTOOLS_BETA=1
18 | global GTOOLS_TABLE=1
19 | greg y x, by(g)
20 | mata GtoolsRegress.b
21 | mata GtoolsRegress.se
22 |
--------------------------------------------------------------------------------
/src/github-issues/debug-7/quantiles.do:
--------------------------------------------------------------------------------
1 | clear
2 | set obs 1000
3 | gen x = rnormal()
4 | gen e = rnormal()
5 | gen fe = mod(_n, 10)
6 | gen y = 3 * x^2 - x + fe + e
7 | gquantiles xbins = x, nq(252) xtile replace
8 | count if mi(xbins)
9 | * I'm not sure what happened but this seems fine?
10 |
--------------------------------------------------------------------------------
/src/github-issues/debug-8/test.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 | gegen fe = group(rep78)
3 | l fe rep78
4 | gegen fe = group(rep78), missing replace hash(1)
5 | l fe rep78
6 |
7 | sysuse auto, clear
8 | replace make = "" if mod(_n, 7) == 0
9 | gegen fe = group(make)
10 | l fe make
11 | gegen fe = group(make), missing replace
12 | l fe make
13 |
--------------------------------------------------------------------------------
/src/github-issues/debug-9/test.do:
--------------------------------------------------------------------------------
1 | exit, clear
2 | ./build.py --replace
3 | stata16-mp
4 | global GTOOLS_BETA = 1
5 | global GTOOLS_GREGTABLE = 1
6 | sysuse auto, clear
7 | gen _mpg = mpg
8 |
9 | greg price mpg _mpg, absorb(rep78) savecons
10 | reghdfe price mpg _mpg, absorb(rep78)
11 | mata GtoolsRegress.consest
12 |
13 | greg price mpg _mpg [aw=rep78], absorb(rep78) savecons
14 | reghdfe price mpg _mpg [aw=rep78], absorb(rep78)
15 | mata GtoolsRegress.consest
16 |
17 | greg price mpg _mpg , absorb(rep78 headroom) savecons
18 | reghdfe price mpg _mpg , absorb(rep78 headroom)
19 | mata GtoolsRegress.consest
20 |
21 | * Somehow this fails but generally works OK ):
22 | sysuse auto, clear
23 | foreach var in _a* _b* _c* _d* _e* _h* {
24 | cap drop `var'
25 | }
26 | reghdfe price mpg, absorb(_aa=rep78 _bb=headroom) resid(_hh)
27 | greg price mpg [aw=weight], absorb(rep78 headroom) savecons alphas(_cc _dd) resid(_ee) replace algorithm(it)
28 | greg price mpg [aw=weight], absorb(rep78 headroom) savecons alphas(_cc _dd) resid(_ee) replace algorithm(squarem)
29 | greg price mpg [aw=weight], absorb(rep78 headroom) savecons alphas(_cc _dd) resid(_ee) replace algorithm(cg)
30 | mata GtoolsRegress.consest
31 | mata reldif(12225.5, GtoolsRegress.consest)
32 | gstats tab _*, s(mean)
33 |
34 | cap drop _*
35 | reghdfe price [aw=weight], absorb(rep78 headroom) resid(_hdfe_price)
36 | reghdfe mpg [aw=weight], absorb(rep78 headroom) resid(_hdfe_mpg)
37 | gstats hdfe price mpg [aw=weight], absorb(rep78 headroom) gen(_g_price _g_mpg) replace
38 | reg _hdfe_price _g_price
39 | reg _hdfe_mpg _g_mpg
40 | gstats tab _* [aw=weight], s(mean)
41 | reg _hdfe_price _hdfe_mpg [aw=weight]
42 | predict _zz
43 | reg _g_price _g_mpg [aw=weight]
44 |
45 | * for example all these are fine
46 | clear
47 | set obs 100000
48 | gen group = mod(_n, 2)
49 | gen double f1 = round(12.2 * mod(_n, 3), 0.1) if runiform() > 0.05
50 | gen double f2 = round(20 * c(pi) * mod(_n, 5), 0.1) if runiform() > 0.05
51 | gen double f3 = round(9.72 * mod(_n, 41), 0.1) if runiform() > 0.05
52 | gen double x = round(mod(_n, 100), 0.1) if runiform() > 0.05
53 | gen double y = 123 * x + f1 + f2 + round(10000 * runiform(), 1)
54 | cap drop _*
55 | reghdfe y x if group == 1, absorb(_aa=f1 _bb=f2 _cc=f3) resid(_hh)
56 | reghdfe y x if group == 0, absorb( f1 f2 f3)
57 | greg y x, absorb(f1 f2 f3) savecons alphas(_dd _ee _ff) resid(_gg) replace by(group)
58 | mata GtoolsRegress.consest \ GtoolsRegress.r2
59 | gstats tab _*, s(mean)
60 | reg _aa _dd
61 | reg _bb _ee
62 | reg _cc _ff
63 | reg _hh _gg
64 |
--------------------------------------------------------------------------------
/src/github-issues/email-10/bug.do:
--------------------------------------------------------------------------------
1 | !clear
2 | clear
3 | set more off
4 | graph drop _all
5 |
6 | set seed 1
7 | set obs 20
8 | g x = _n
9 | expand 500
10 |
11 | * Case 1: - collapsed means and SDs from gcollapse in line 36 are zero
12 | * g y = .01*(x)^1.2 + .1*invnorm(uniform())
13 | * Case 2: - Now the collapsed means aren't zero but wrong
14 | g y = .01*(x)^1.2 + invnorm(uniform())
15 |
16 | preserve
17 | gcollapse (count) obsy=y (sd) sdy=y (mean) meany=y, by(x)
18 | l
19 | restore
20 | * replace y = int(y)
21 | gcollapse (sd) sdy=y (mean) meany=y , by(x)
22 | l
23 |
--------------------------------------------------------------------------------
/src/github-issues/email-10/fix.do:
--------------------------------------------------------------------------------
1 | clear
2 | set more off
3 | set seed 1
4 | set obs 2
5 | g y = 1.23
6 | g o = 9
7 | l
8 |
9 | * clear
10 | * set obs 10000000
11 | * gen x = abs(runiform())
12 | * gen y = abs(rnormal())
13 | * set rmsg on
14 | * sum x y, meanonly
15 | * global GTOOLS_CALLER ghash
16 | * _gtools_internal, sumcheck(x y)
17 | * matrix list r(sumcheck)
18 | * sum x y
19 |
20 | preserve
21 | gcollapse (count) cy = y (first) fy = y (mean) o, freq(z)
22 | l
23 | restore, preserve
24 | gcollapse (count) y (first) fy = y (nunique) o, freq(z)
25 | l
26 | restore, preserve
27 | gcollapse (first) fy = y (count) y (mean) o, freq(z)
28 | l
29 | restore, preserve
30 | gcollapse (first) fy = y (count) cy = y (count) o, freq(z)
31 | l
32 | restore
33 |
--------------------------------------------------------------------------------
/src/github-issues/email-10/gtools_test.do:
--------------------------------------------------------------------------------
1 | clear
2 | set more off
3 | graph drop _all
4 |
5 | set seed 1 // the seed doesn't matter as far as I can tell
6 | set obs 20
7 | g x = _n
8 | expand 500
9 |
10 | // Case 1: - collapsed means and SDs from gcollapse in line 36 are zero
11 | g y = .01*(x)^1.2 + .1*invnorm(uniform())
12 | // Case 2: - Now the collapsed means aren't zero but wrong
13 | // g y = .01*(x)^1.2 + invnorm(uniform())
14 |
15 | preserve
16 | collapse (count) obsy=y (sd) sdy=y (mean) meany=y , by(x)
17 | foreach v in obsy sdy meany {
18 | rename `v' `v'_stata_builtin
19 | label var `v' ""
20 | }
21 | tempfile 1
22 | save `1'
23 | restore
24 |
25 | preserve
26 | collapse (count) obsy=y (sd) sdy=y (mean) meany=y , by(x)
27 | foreach v in obsy sdy meany {
28 | rename `v' `v'_ftools
29 | label var `v' ""
30 | }
31 | tempfile 2
32 | save `2'
33 | restore
34 |
35 | preserve
36 | gcollapse (count) obsy=y (sd) sdy=y (mean) meany=y , by(x)
37 | foreach v in obsy sdy meany {
38 | rename `v' `v'_gtools
39 | label var `v' ""
40 | }
41 | tempfile 3
42 | save `3'
43 | restore
44 |
45 |
46 | preserve
47 | gcollapse (sd) sdy=y (mean) meany=y , by(x)
48 | foreach v in sdy meany {
49 | rename `v' `v'_gtools1
50 | label var `v' ""
51 | }
52 | tempfile 4
53 | save `4'
54 | restore
55 |
56 |
57 | use `1', clear
58 | merge 1:1 x using `2'
59 | drop _merge
60 | merge 1:1 x using `3'
61 | drop _merge
62 | merge 1:1 x using `4'
63 | drop _merge
64 | order x meany* sd* obs*
65 | br
66 | local i 100
67 | twoway (line meany_s x) (line meany_f x) ///
68 | (line meany_gtools x) (line meany_gtools1 x) , name(g`i++')
69 |
70 |
71 | foreach var of varlist mean* {
72 | twoway scatter `var' meany_stata, name(g`i++')
73 | }
74 |
--------------------------------------------------------------------------------
/src/github-issues/email-10/mwe.do:
--------------------------------------------------------------------------------
1 | clear
2 | set more off
3 | set seed 1
4 | set obs 2
5 | g y = 1.23
6 | l
7 | preserve
8 | gcollapse (count) cy = y (first) fy = y, freq(z)
9 | l
10 | restore
11 | gcollapse (first) fy = y (count) cy = y, freq(z)
12 | l
13 |
--------------------------------------------------------------------------------
/src/github-issues/email-11/test.do:
--------------------------------------------------------------------------------
1 | sysuse auto, clear
2 | by foreign: egen _mean1 = mean(price - price[1])
3 | by foreign: gegen _mean2 = mean(price - price[1])
4 | gen zz = abs((_mean1 - _mean2) / _mean1)
5 | gstats sum zz
6 |
7 | capture program drop test
8 | program define test, byable(onecall)
9 | disp _by(), "`_byvars'"
10 | desc
11 | end
12 | test
13 | by foreign: test
14 | bysort mpg: test
15 | by foreign (price), sort: test
16 |
17 | clear
18 | set obs 10
19 | gen var = mod(_n, 3)
20 | gen y = _n
21 | gen u = runiform()
22 | cap noi by var: gegen x = mean(max(y, y[1]))
23 | by var (u), sort: gegen x = mean(max(y, y[1]))
24 | sort y
25 | bys var (u): gegen z = mean(max(y, y[1]))
26 | bys var (u): egen w = mean(max(y, y[1]))
27 | assert x == z
28 | assert x == w
29 |
--------------------------------------------------------------------------------
/src/github-issues/email-12/test.do:
--------------------------------------------------------------------------------
1 | clear all
2 | set obs 100000
3 |
4 | gen x = rnormal()
5 | gen n = round(_n/10)
6 |
7 | * pause on
8 | * pause
9 | *
10 | * forvalues x = 1/100000{
11 | * di "`x'"
12 | * gcollapse (max) maxx = x, by(n) merge
13 | * drop maxx
14 | * }
15 |
16 | * forvalues x = 1 / 100{
17 | * di "`x'"
18 | * gcollapse (max) maxx = x, by(n) merge
19 | * drop maxx
20 | * }
21 |
--------------------------------------------------------------------------------
/src/github-issues/email-12/test2.do:
--------------------------------------------------------------------------------
1 | * Notes:
2 | *
3 | * You commented out select chunks untill you narrowed the memory leak
4 | *
5 | * The following are not freed on purpose because they are standing by for strL vars and such
6 | *
7 | * allocated: st_info->strL_bybytes
8 | * allocated: st_info->strL_bytes
9 | *
10 | * This was the issue (they were not being freed):
11 | *
12 | * allocated: st_info->st_by_charx
13 | * allocated: st_info->st_by_numx
14 |
15 | clear all
16 | set obs 1000000
17 |
18 | gen x = rnormal()
19 | gen n = round(_n/10)
20 |
21 | gcollapse (max) maxx = x, by(n) merge forcemem v bench(3)
22 | drop maxx
23 | sleep 100
24 |
25 | forvalues i = 1 / 100 {
26 | di "`i'"
27 | gcollapse (max) maxx = x, by(n) merge forcemem
28 | drop maxx
29 | sleep 100
30 | }
31 |
--------------------------------------------------------------------------------
/src/github-issues/email-13/dec_gtools.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/email-13/dec_gtools.dta
--------------------------------------------------------------------------------
/src/github-issues/email-13/dec_manual.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/email-13/dec_manual.dta
--------------------------------------------------------------------------------
/src/github-issues/email-13/dec_stata.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/email-13/dec_stata.dta
--------------------------------------------------------------------------------
/src/github-issues/email-13/example.do:
--------------------------------------------------------------------------------
1 | * use "test.dta", clear
2 |
3 | clear
4 | set obs 5111
5 | gen group = mod(_n, 26)
6 | gen w = abs(round(rnormal() * 13 + 20, 0.1))
7 | gen inc = abs(round(rnormal() * 7000 + 1200))
8 |
9 | local nq 10
10 | bysort group (inc): gen w_cum=sum(w)
11 | bysort group (inc): egen w_tot=sum(w)
12 | gen cum_share=w_cum/w_tot
13 |
14 | * Percentiles Manual
15 | preserve
16 | gen dec=floor((w_cum/w_tot)*`nq')*(100/`nq')
17 | bysort group dec: egen min_dec=min(w_cum)
18 | gen dec_manual=inc if min_dec==w_cum
19 |
20 | * gen dec=ceil(cum_share * `nq') * (100/`nq')
21 | * bysort group dec (inc): gen dec_manual = inc[_N]
22 |
23 | keep if !missing(dec_manual)
24 | keep group dec dec_manual
25 | duplicates drop
26 | isid group dec
27 | save "dec_manual.dta", replace
28 | restore
29 |
30 | * Percentiles Stata
31 | preserve
32 | levelsof group, local(group)
33 | foreach g of local group{
34 | pctile dec_stata`g'=inc [aw=w] if group==`g', nq(`nq') genp(dec`g')
35 | }
36 | keep if !missing(dec1)
37 | drop group
38 | reshape long dec_stata dec, i(cum_share) j(group)
39 | keep group dec dec_stata
40 | isid group dec
41 | save "dec_stata.dta", replace
42 | restore
43 |
44 | * Percentiles Gtools
45 | preserve
46 | gquantiles dec_gtools=inc [aw=w], pctile cutby strict by(group) nq(`nq') genp(dec)
47 | keep if !missing(dec)
48 | keep group dec dec_gtools
49 | isid group dec
50 | save "dec_gtools.dta", replace
51 | restore
52 |
53 | * Merge for Comparison
54 | use "dec_manual.dta", clear
55 | merge 1:1 group dec using "dec_stata.dta", keepusing(dec_stata) nogen
56 | merge 1:1 group dec using "dec_gtools.dta", keepusing(dec_gtools) nogen
57 |
58 | ***********************************************************************
59 | * Debugging *
60 | ***********************************************************************
61 |
62 | * Narrowed the issue to this:
63 | local nq = 10
64 | sysuse auto, clear
65 | keep if foreign
66 | gen w = 1
67 | gquantiles g1=price [fw = 1], cutby pctile nq(`nq') by(foreign) strict
68 | gquantiles g2=price [fw = 1], cutby pctile nq(`nq') by(foreign) strict xtile(x1)
69 |
70 | * g2 is correct but g1 is not. It turns out there was a bug in the code
71 | * to read in the data with by() and weights when only pctile requested.
72 |
--------------------------------------------------------------------------------
/src/github-issues/email-14/gtools_bug.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/github-issues/email-14/gtools_bug.dta
--------------------------------------------------------------------------------
/src/github-issues/email-14/test.do:
--------------------------------------------------------------------------------
1 | use ./gtools_bug.dta, clear
2 | gegen a=sd(g12) [aw=share], by(time) replace
3 | gegen b=sd(g36) [aw=share], by(time) replace
4 | gegen s=total(share), by(time)
5 |
--------------------------------------------------------------------------------
/src/gtools.pkg:
--------------------------------------------------------------------------------
1 | v 1.11.8
2 | d
3 | d 'GTOOLS': Faster implementation of common Stata commands optimized for large datasets
4 | d
5 | d Faster Stata for big data. Gtools provides a hash-based implementation
6 | d of common Stata commands using C plugins for a massive speed
7 | d improvement. Gtools implements gcollapse, greshape, gquantiles
8 | d (pctile, xtile, and _pctile), gcontract, gegen, gisid, glevelsof,
9 | d gunique, gdistinct, gduplicates, gtop, and gstats (winsor, summarize,
10 | d tabstat). Syntax is largely analogous to their native counterparts
11 | d (see help gtools for details). This package was inspired by Sergio
12 | d Correia's ftools.
13 | d
14 | d KW: plugin
15 | d KW: gtools
16 | d KW: collapse
17 | d KW: reshape
18 | d KW: regress
19 | d KW: ivregress
20 | d KW: glm
21 | d KW: logit
22 | d KW: poisson
23 | d KW: residualize
24 | d KW: hdfe
25 | d KW: contract
26 | d KW: egen
27 | d KW: xtile
28 | d KW: fastxtile
29 | d KW: pctile
30 | d KW: _pctile
31 | d KW: levelsof
32 | d KW: toplevelsof
33 | d KW: isid
34 | d KW: duplicates
35 | d KW: unique
36 | d KW: distinct
37 | d KW: sort
38 | d KW: gsort
39 | d KW: gegen
40 | d KW: fasterxtile
41 | d KW: gquantiles
42 | d KW: gcollapse
43 | d KW: greshape
44 | d KW: gregress
45 | d KW: givregress
46 | d KW: gglm
47 | d KW: gpoisson
48 | d KW: glogit
49 | d KW: gcontract
50 | d KW: gisid
51 | d KW: gduplicates
52 | d KW: glevelsof
53 | d KW: gtoplevelsof
54 | d KW: gunique
55 | d KW: gdistinct
56 | d KW: hashsort
57 | d KW: winsor
58 | d KW: summarize
59 | d KW: tabstat
60 | d
61 | d Requires: Stata version 13.1
62 | d
63 | d Author: Mauricio Caceres Bravo
64 | d Support: email mauricio.caceres.bravo@gmail.com
65 | d
66 | d Distribution-Date: 20240628
67 | d
68 | f _gtools_internal.ado
69 | f _gtools_internal.mata
70 | f gcollapse.ado
71 | f gcontract.ado
72 | f gegen.ado
73 | f gquantiles.ado
74 | f fasterxtile.ado
75 | f gunique.ado
76 | f gdistinct.ado
77 | f glevelsof.ado
78 | f gtop.ado
79 | f gtoplevelsof.ado
80 | f gisid.ado
81 | f gduplicates.ado
82 | f hashsort.ado
83 | f greshape.ado
84 | f greg.ado
85 | f gregress.ado
86 | f givregress.ado
87 | f gglm.ado
88 | f gpoisson.ado
89 | f gstats.ado
90 | f gtools.ado
91 | f gcollapse.sthlp
92 | f gcontract.sthlp
93 | f gegen.sthlp
94 | f gquantiles.sthlp
95 | f fasterxtile.sthlp
96 | f gunique.sthlp
97 | f gdistinct.sthlp
98 | f glevelsof.sthlp
99 | f gtop.sthlp
100 | f gtoplevelsof.sthlp
101 | f gisid.sthlp
102 | f gduplicates.sthlp
103 | f hashsort.sthlp
104 | f greshape.sthlp
105 | f greg.sthlp
106 | f gregress.sthlp
107 | f givregress.sthlp
108 | f gglm.sthlp
109 | f gstats.sthlp
110 | f gstats_residualize.sthlp
111 | f gstats_hdfe.sthlp
112 | f gstats_winsor.sthlp
113 | f gstats_tab.sthlp
114 | f gstats_sum.sthlp
115 | f gstats_summarize.sthlp
116 | f gstats_transform.sthlp
117 | f gstats_range.sthlp
118 | f gstats_moving.sthlp
119 | f gtools.sthlp
120 | f lgtools.mlib
121 | f gtools_windows_v2.plugin
122 | f gtools_unix_v2.plugin
123 | f gtools_macosx_v2.plugin
124 | f gtools_windows_v3.plugin
125 | f gtools_unix_v3.plugin
126 | f gtools_macosx_v3.plugin
127 |
--------------------------------------------------------------------------------
/src/plugin/api/groupby/transforms.c:
--------------------------------------------------------------------------------
1 | void GtoolsTransformScaleVector (
2 | ST_double *source,
3 | ST_double *target,
4 | GT_size N,
5 | ST_double scale)
6 | {
7 | GT_size i;
8 | if ( source == target ) {
9 | for (i = 0; i < N; i++)
10 | target[i] = source[i] * scale;
11 | }
12 | else {
13 | for (i = 0; i < N; i++)
14 | target[i] *= scale;
15 | }
16 | }
17 |
18 | void GtoolsTransformScaleMatrix (
19 | ST_double *source,
20 | ST_double *target,
21 | GT_size K,
22 | GT_size N,
23 | ST_double *scale)
24 | {
25 | GT_size k;
26 | ST_double *src = source, *trg = target;
27 | for (k = 0; k < K; k++, src += N, trg += N) {
28 | GtoolsTransformScaleVector(src, trg, N, scale[k]);
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/plugin/api/groupby/transforms_unweighted.c:
--------------------------------------------------------------------------------
1 | void GtoolsTransformIndex (
2 | ST_double *source,
3 | ST_double *target,
4 | GT_size *index,
5 | GT_size N,
6 | ST_double statcode)
7 | {
8 | if ( statcode == -2 ) {
9 | GtoolsTransformDeMeanIndex(source, target, index, N);
10 | }
11 | }
12 |
13 | void GtoolsTransformDeMeanIndex (
14 | ST_double *source,
15 | ST_double *target,
16 | GT_size *index,
17 | GT_size N)
18 | {
19 | GT_size i;
20 | ST_double z = GtoolsStatsMeanIndex(source, index, N);
21 | for (i = 0; i < N; i++) {
22 | target[index[i]] = source[index[i]] - z;
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/src/plugin/api/groupby/transforms_weights.c:
--------------------------------------------------------------------------------
1 | void GtoolsTransformIndexWeighted (
2 | ST_double *source,
3 | ST_double *weights,
4 | ST_double *target,
5 | GT_size *index,
6 | GT_size N,
7 | ST_double statcode)
8 | {
9 | if ( statcode == -2 ) {
10 | GtoolsTransformDeMeanIndexWeighted(source, weights, target, index, N);
11 | }
12 | }
13 |
14 | void GtoolsTransformDeMeanIndexWeighted (
15 | ST_double *source,
16 | ST_double *weights,
17 | ST_double *target,
18 | GT_size *index,
19 | GT_size N)
20 | {
21 | GT_size i;
22 | ST_double z = GtoolsStatsMeanIndexWeighted(source, weights, index, N);
23 | for (i = 0; i < N; i++) {
24 | target[index[i]] = source[index[i]] - z;
25 | }
26 | }
27 |
28 | /**********************************************************************
29 | * Weighted *
30 | **********************************************************************/
31 |
32 | void GtoolsTransformBiasedStandardizeVector (
33 | ST_double *source,
34 | ST_double *target,
35 | ST_double *weights,
36 | GT_size N,
37 | ST_double *sd)
38 | {
39 | GT_size i;
40 | ST_double z = GtoolsStatsBiasedStd(source, N, weights);
41 | if ( source == target ) {
42 | for (i = 0; i < N; i++)
43 | if ( z != 0 ) target[i] = source[i] / z;
44 | }
45 | else {
46 | for (i = 0; i < N; i++)
47 | if ( z != 0 ) target[i] /= z;
48 | }
49 | if ( sd != NULL ) *sd = z;
50 | }
51 |
52 | void GtoolsTransformBiasedStandardizeMatrix (
53 | ST_double *source,
54 | ST_double *target,
55 | ST_double *weights,
56 | GT_size K,
57 | GT_size N,
58 | ST_double *sd)
59 | {
60 | GT_size k;
61 | ST_double *src = source, *trg = target;
62 | for (k = 0; k < K; k++, src += N, trg += N) {
63 | GtoolsTransformBiasedStandardizeVector(src, trg, weights, N, sd == NULL? NULL: sd + k);
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/src/plugin/api/hashing.h:
--------------------------------------------------------------------------------
1 | #ifndef GTOOLSHASHINGAPI
2 | #define GTOOLSHASHINGAPI
3 |
4 | #ifndef GTOOLS_TYPES
5 | #include <../common/gttypes.h>
6 | #endif
7 |
8 | #ifndef GTOOLSOMP
9 | #define GTOOLSOMP 0
10 | #endif
11 |
12 | struct GtoolsHash {
13 | // Pointers to existing objects
14 | void *x;
15 | void *xptr;
16 | GT_size offset;
17 | GT_size nobs;
18 | GT_size _nobspanel;
19 | GT_size _nobsinit;
20 | GT_size kvars;
21 | GT_int *types;
22 | GT_bool *invert;
23 | // Variables to be computed
24 | GT_bool radixOK;
25 | GT_bool bijectOK;
26 | GT_bool sorted;
27 | GT_bool allNumeric;
28 | GT_bool allInteger;
29 | GT_size rowbytes;
30 | GT_size max1;
31 | GT_size nlevels;
32 | // Aux variables to be allocated
33 | GT_size *sizes;
34 | GT_bool allocSizes;
35 | GT_size *positions;
36 | GT_bool allocPositions;
37 | GT_size *index;
38 | GT_bool allocIndex;
39 | GT_size *indexj;
40 | GT_bool allocIndexj;
41 | GT_size *nj;
42 | GT_bool allocNj;
43 | GT_size *info;
44 | GT_bool allocInfo;
45 | // Hash
46 | uint64_t *h1ptr;
47 | uint64_t *h2ptr;
48 | uint64_t *h3ptr;
49 | uint64_t *hash1;
50 | uint64_t *hash2;
51 | uint64_t *hash3;
52 | GT_bool allocHash1;
53 | GT_bool allocHash2;
54 | GT_bool allocHash3;
55 | // Misc
56 | ST_double *hdfeMeanBuffer;
57 | ST_double *hdfeBuffer;
58 | ST_double *hdfeGammaSource;
59 | ST_double *hdfeGammaTarget;
60 | GT_bool hdfeMeanBufferAlloc;
61 | GT_bool hdfeBufferAlloc;
62 | GT_bool hdfeFallback;
63 | GT_bool hdfeTraceIter;
64 | GT_bool hdfeStandardize;
65 | GT_size hdfeIter;
66 | GT_size hdfeFeval;
67 | GT_size hdfeMaxIter;
68 | GT_size hdfeRc;
69 | };
70 |
71 | void GtoolsHashInit (
72 | struct GtoolsHash *GtoolsHashInfo,
73 | void *x,
74 | GT_size nobs,
75 | GT_size kvars,
76 | GT_int *types,
77 | GT_bool *invert
78 | );
79 |
80 | void GtoolsHashAbsorbByLoop (struct GtoolsHash *GtoolsHashInfo, GT_size K);
81 | GT_int GtoolsHashPanelAbsorb (struct GtoolsHash *GtoolsHashInfo, GT_size K, GT_size N);
82 | GT_int GtoolsHashSetupAbsorb (
83 | void *FE,
84 | struct GtoolsHash *GtoolsHashInfo,
85 | GT_size N,
86 | GT_size K,
87 | GT_int *types,
88 | GT_size *offsets
89 | );
90 |
91 | GT_int GtoolsMapIndex (struct GtoolsHash *GtoolsHashInfo);
92 | GT_int GtoolsHashSetup (struct GtoolsHash *GtoolsHashInfo);
93 | GT_int GtoolsHashSort (struct GtoolsHash *GtoolsHashInfo);
94 | GT_int GtoolsHashPanel (struct GtoolsHash *GtoolsHashInfo);
95 | GT_int GtoolsHashPanelBijection (struct GtoolsHash *GtoolsHashInfo);
96 | GT_int GtoolsHashPanel128 (struct GtoolsHash *GtoolsHashInfo);
97 | GT_int GtoolsHashPanelSorted (struct GtoolsHash *GtoolsHashInfo);
98 |
99 | void GtoolsHashCheckNumeric (struct GtoolsHash *GtoolsHashInfo);
100 | void GtoolsHashCheckInteger (struct GtoolsHash *GtoolsHashInfo);
101 | void GtoolsHashCheckSorted (struct GtoolsHash *GtoolsHashInfo);
102 | void GtoolsHashFree (struct GtoolsHash *GtoolsHashInfo);
103 |
104 | void GtoolsHashCheckBijection (
105 | struct GtoolsHash *GtoolsHashInfo,
106 | GT_int *maxs,
107 | GT_int *mins,
108 | GT_bool *allMiss,
109 | GT_bool *anyMiss
110 | );
111 |
112 | void GtoolsHashBijection (
113 | struct GtoolsHash *GtoolsHashInfo,
114 | GT_int *maxs,
115 | GT_int *mins,
116 | GT_bool *allMiss,
117 | GT_bool *anyMiss
118 | );
119 |
120 | #endif
121 |
--------------------------------------------------------------------------------
/src/plugin/collapse/gtools_math.h:
--------------------------------------------------------------------------------
1 | #ifndef GTOOLS_MATH
2 | #define GTOOLS_MATH
3 |
4 | ST_double gf_switch_fun (char *fname, ST_double v[], const GT_size start, const GT_size end);
5 | ST_double gf_switch_fun_code (ST_double fcode, ST_double v[], const GT_size start, const GT_size end);
6 | ST_double gf_code_fun (char * fname);
7 |
8 | ST_double gf_array_dquantile_range (
9 | ST_double v[],
10 | const GT_size start,
11 | const GT_size end,
12 | const ST_double quantile
13 | );
14 |
15 | ST_double gf_array_dsum_range (const ST_double v[], const GT_size start, const GT_size end);
16 | ST_double gf_array_dmean_range (const ST_double v[], const GT_size start, const GT_size end);
17 | ST_double gf_array_dgeomean_range (const ST_double v[], const GT_size start, const GT_size end);
18 | ST_double gf_array_dsd_range (const ST_double v[], const GT_size start, const GT_size end);
19 | ST_double gf_array_dvar_range (const ST_double v[], const GT_size start, const GT_size end);
20 | ST_double gf_array_dcv_range (const ST_double v[], const GT_size start, const GT_size end);
21 | ST_double gf_array_dmax_range (const ST_double v[], const GT_size start, const GT_size end);
22 | ST_double gf_array_dmin_range (const ST_double v[], const GT_size start, const GT_size end);
23 | ST_double gf_array_drange_range (const ST_double v[], const GT_size start, const GT_size end);
24 |
25 | ST_double gf_array_dsemean_range (const ST_double v[], const GT_size start, const GT_size end);
26 | ST_double gf_array_dsebinom_range (const ST_double v[], const GT_size start, const GT_size end);
27 | ST_double gf_array_dsepois_range (const ST_double v[], const GT_size start, const GT_size end);
28 |
29 | ST_double gf_array_dskew_range (const ST_double v[], const GT_size start, const GT_size end);
30 | ST_double gf_array_dkurt_range (const ST_double v[], const GT_size start, const GT_size end);
31 |
32 | ST_double gf_array_dmedian_range (ST_double v[], const GT_size start, const GT_size end);
33 | ST_double gf_array_diqr_range (ST_double v[], const GT_size start, const GT_size end);
34 |
35 | ST_double gf_array_dgini_range (ST_double v[], const GT_size start, const GT_size end);
36 | ST_double gf_array_dginidrop_range (ST_double v[], const GT_size start, const GT_size end);
37 | ST_double gf_array_dginikeep_range (ST_double v[], const GT_size start, const GT_size end);
38 |
39 | int gf_qsort_compare (const void * a, const void * b);
40 | GT_bool gf_array_dsorted_range (const ST_double v[], const GT_size start, const GT_size end);
41 | GT_bool gf_array_dsame (const ST_double *v, const GT_size N);
42 |
43 | #endif
44 |
45 | // -23 // variance
46 | // -24 // cv
47 | // -25 // range
48 | // 1000 + # // #th smallest
49 | // -1000 - # // #th largest
50 | // 1000.5 + # // raw #th smallest
51 | // -1000.5 - # // raw #th largest
52 |
--------------------------------------------------------------------------------
/src/plugin/collapse/gtools_math_unw.h:
--------------------------------------------------------------------------------
1 | #ifndef GTOOLS_MATH_UNW
2 | #define GTOOLS_MATH_UNW
3 |
4 | ST_double gf_switch_fun_code_unw (
5 | ST_double fcode,
6 | ST_double *v,
7 | GT_size N,
8 | GT_size vcount,
9 | ST_double *p_buffer
10 | );
11 |
12 | ST_double gf_array_dquantile_unweighted (
13 | ST_double *v,
14 | GT_size N,
15 | ST_double quantile,
16 | ST_double *p_buffer
17 | );
18 |
19 | ST_double gf_array_dselect_unweighted (
20 | ST_double *v,
21 | GT_size N,
22 | GT_int sth,
23 | GT_size end,
24 | ST_double *p_buffer
25 | );
26 |
27 | ST_double gf_array_diqr_unweighted (
28 | ST_double *v,
29 | GT_size N,
30 | ST_double *p_buffer
31 | );
32 |
33 | ST_double gf_array_dmean_unweighted (
34 | ST_double *v,
35 | GT_size N
36 | );
37 |
38 | ST_double gf_array_dgeomean_unweighted (
39 | ST_double *v,
40 | GT_size N
41 | );
42 |
43 | ST_double gf_array_dsd_unweighted (
44 | ST_double *v,
45 | GT_size N
46 | );
47 |
48 | ST_double gf_array_dvar_unweighted (
49 | ST_double *v,
50 | GT_size N
51 | );
52 |
53 | ST_double gf_array_dcv_unweighted (
54 | ST_double *v,
55 | GT_size N
56 | );
57 |
58 | ST_double gf_array_dsemean_unweighted (
59 | ST_double *v,
60 | GT_size N
61 | );
62 |
63 | ST_double gf_array_dsebinom_unweighted (
64 | ST_double *v,
65 | GT_size N
66 | );
67 |
68 | ST_double gf_array_dsepois_unweighted (
69 | ST_double *v,
70 | GT_size N
71 | );
72 |
73 | ST_double gf_array_dkurt_unweighted (
74 | ST_double *v,
75 | GT_size N
76 | );
77 |
78 | ST_double gf_array_dskew_unweighted (
79 | ST_double *v,
80 | GT_size N
81 | );
82 |
83 | ST_double gf_array_dgini_unweighted (
84 | ST_double *v,
85 | GT_size N,
86 | ST_double *p_buffer
87 | );
88 |
89 | ST_double gf_array_dginidrop_unweighted (
90 | ST_double *v,
91 | GT_size N,
92 | ST_double *p_buffer
93 | );
94 |
95 | ST_double gf_array_dginikeep_unweighted (
96 | ST_double *v,
97 | GT_size N,
98 | ST_double *p_buffer
99 | );
100 |
101 | #endif
102 |
--------------------------------------------------------------------------------
/src/plugin/collapse/gtools_utils.h:
--------------------------------------------------------------------------------
1 | #ifndef GTOOLS_UTILS
2 | #define GTOOLS_UTILS
3 |
4 | ST_double gf_benchmark (char *fname);
5 | ST_double gf_query_free_space (char *fname);
6 | ST_double gf_dipow(ST_double base, GT_size exp);
7 | GT_size gf_iipow(GT_size base, GT_size exp);
8 | void gf_split_path_file(char** p, char** f, char *pf);
9 |
10 | void gf_write_collapsed(
11 | char *collapsed_file,
12 | ST_double *collapsed_data,
13 | GT_size kstart,
14 | GT_size kend,
15 | GT_size J
16 | );
17 |
18 | void gf_read_collapsed(
19 | char *collapsed_file,
20 | ST_double *collapsed_data,
21 | GT_size knum,
22 | GT_size J
23 | );
24 |
25 | #endif
26 |
--------------------------------------------------------------------------------
/src/plugin/common/RadixSortTesting:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/plugin/common/RadixSortTesting
--------------------------------------------------------------------------------
/src/plugin/common/RadixSortTypedIndex.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcaceresb/stata-gtools/f8e303d90be1ac7fb469b9ed7caf202957139b69/src/plugin/common/RadixSortTypedIndex.c
--------------------------------------------------------------------------------
/src/plugin/common/fixes.c:
--------------------------------------------------------------------------------
1 | #ifdef __APPLE__
2 | #else
3 | void * memcpy (void *dest, const void *src, size_t n);
4 |
5 | /**
6 | * @brief Implement memcpy as a dummy function for memset (not on OSX)
7 | *
8 | * Stata requires plugins to be compied as shared executables. Since
9 | * this is being compiled on a relatively new linux system (by 2017
10 | * standards), some of the dependencies set in this way cannot be
11 | * fulfilled by older Linux systems. In particular, using memcpy as
12 | * provided by my system creates a dependency to Glib 2.14, which cannot
13 | * be fulfilled on some older systems (notably the servers where I
14 | * intend to use the plugin; hence I implement memcpy and get rid of
15 | * that particular dependency).
16 | *
17 | * @param dest pointer to place in memory to copy @src
18 | * @param src pointer to place in memory that is source of data
19 | * @param n how many bytes to copy
20 | * @return move @src to @dest
21 | */
22 | void * memcpy (void *dest, const void *src, size_t n)
23 | {
24 | return memmove(dest, src, n);
25 | }
26 | #endif
27 |
28 | // TODO: nice platform-specific way to profile time; the below is hack-ish
29 |
30 | #if defined(_WIN64) || defined(_WIN64) || defined(__MINGW32__) || defined(__MINGW64__)
31 |
32 | #define GTOOLS_TIMER(GtoolsTimerVariable) clock_t (GtoolsTimerVariable) = clock();
33 | #define GTOOLS_RUNNING_TIMER(GtoolsTimerVariable, msg) sf_running_timer(&GtoolsTimerVariable, msg)
34 | #define GTOOLS_UPDATE_TIMER(GtoolsTimerVariable) GtoolsTimerVariable = clock()
35 |
36 | #elif defined(__APPLE__)
37 |
38 | #define GTOOLS_TIMER(GtoolsTimerVariable) clock_t (GtoolsTimerVariable) = clock();
39 | #define GTOOLS_RUNNING_TIMER(GtoolsTimerVariable, msg) sf_running_timer(&GtoolsTimerVariable, msg)
40 | #define GTOOLS_UPDATE_TIMER(GtoolsTimerVariable) GtoolsTimerVariable = clock()
41 |
42 | #else
43 |
44 | void sf_running_timespec (struct timespec *timer, const char *msg);
45 | void sf_running_timespec (struct timespec *timer, const char *msg)
46 | {
47 | struct timespec update; clock_gettime(CLOCK_REALTIME, &update);
48 | double diff = (double) (update.tv_nsec - timer->tv_nsec) / 1e9 +
49 | (double) (update.tv_sec - timer->tv_sec);
50 |
51 | sf_printf (msg);
52 | sf_printf (" (%.3f seconds).\n", diff);
53 | *timer = update;
54 | }
55 |
56 | #define GTOOLS_TIMER(GtoolsTimerVariable) \
57 | struct timespec (GtoolsTimerVariable); \
58 | clock_gettime(CLOCK_REALTIME, &GtoolsTimerVariable)
59 |
60 | #define GTOOLS_RUNNING_TIMER(GtoolsTimerVariable, msg) \
61 | sf_running_timespec(&GtoolsTimerVariable, msg)
62 |
63 | #define GTOOLS_UPDATE_TIMER(GtoolsTimerVariable) \
64 | clock_gettime(CLOCK_REALTIME, &GtoolsTimerVariable)
65 |
66 | #endif
67 |
--------------------------------------------------------------------------------
/src/plugin/common/gttypes.h:
--------------------------------------------------------------------------------
1 | #ifndef GTOOLS_TYPES
2 | #define GTOOLS_TYPES
3 |
4 | #include
5 | #include
6 |
7 | typedef uint8_t GT_bool ;
8 | typedef uint64_t GT_size ;
9 | typedef int64_t GT_int ;
10 | typedef uint16_t GT_smallsize ;
11 | typedef int16_t GT_smallint ;
12 |
13 | // #if defined(_WIN64) || defined(_WIN32)
14 | // # define GT_size_cfmt "%lu"
15 | // # define GT_size_sfmt "lu"
16 | // # define GT_int_cfmt "%ld"
17 | // # define GT_int_sfmt "ld"
18 | // #else
19 | // # define GT_size_cfmt "%'lu"
20 | // # define GT_size_sfmt "lu"
21 | // # define GT_int_cfmt "%'ld"
22 | // # define GT_int_sfmt "ld"
23 | // #endif
24 |
25 | // #if defined(_WIN64) || defined(_WIN32)
26 | // # define GT_size_cfmt "%I64u"
27 | // # define GT_size_sfmt "I64u"
28 | // # define GT_int_cfmt "%I64d"
29 | // # define GT_int_sfmt "I64d"
30 | // #else
31 | // # define GT_size_cfmt "%'I64u"
32 | // # define GT_size_sfmt "I64u"
33 | // # define GT_int_cfmt "%'I64d"
34 | // # define GT_int_sfmt "I64d"
35 | // #endif
36 |
37 | // NOTE: Comma-printing via locale messes up on some systems, presumably
38 | // because their locale is reset in a way that makes Stata give errors.
39 | // Best to not rely on locale at all.
40 |
41 | /*
42 | * #if defined(_WIN64) || defined(_WIN32)
43 | *
44 | * #define COMMA_PRINTING \
45 | * setlocale(LC_NUMERIC, ""); \
46 | * struct lconv *ptrLocale = localeconv(); \
47 | * strcpy(ptrLocale->thousands_sep, ",");
48 | * #else
49 | * #define COMMA_PRINTING setlocale (LC_ALL, "");
50 | * #endif
51 | *
52 | */
53 |
54 | // #if defined(_WIN64) || defined(_WIN32)
55 | // # define GT_size_cfmt "%" PRIu64
56 | // # define GT_size_sfmt PRIu64
57 | // # define GT_int_cfmt "%" PRId64
58 | // # define GT_int_sfmt PRId64
59 | // #else
60 | // # define GT_size_cfmt "%'" PRIu64
61 | // # define GT_size_sfmt PRIu64
62 | // # define GT_int_cfmt "%'" PRId64
63 | // # define GT_int_sfmt PRId64
64 | // #endif
65 |
66 | #define GT_size_cfmt "%" PRIu64
67 | #define GT_size_sfmt PRIu64
68 | #define GT_int_cfmt "%" PRId64
69 | #define GT_int_sfmt PRId64
70 |
71 | #endif
72 |
--------------------------------------------------------------------------------
/src/plugin/common/sf_printf.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "sf_printf.h"
5 |
6 | #define BUF_MAX 4096
7 |
8 | void sf_printf_debug (const char *fmt, ...)
9 | {
10 | va_list args;
11 | va_start (args, fmt);
12 | char buf[BUF_MAX];
13 | vsprintf (buf, fmt, args);
14 | printf ("%s", buf);
15 | SF_display (buf);
16 | va_end (args);
17 | }
18 |
19 | /**
20 | * @brief Short wrapper to print to Stata
21 | *
22 | * Basic wrapper to print formatted strings to Stata
23 | *
24 | * @param *fmt a string to format
25 | * @param ... Arguments to pass to pritnf
26 | * @return Prints to Stata's console
27 | */
28 | void sf_printf (const char *fmt, ...)
29 | {
30 | va_list args;
31 | va_start (args, fmt);
32 | char buf[BUF_MAX];
33 | vsprintf (buf, fmt, args);
34 | SF_display (buf);
35 | // printf (buf);
36 | va_end (args);
37 | }
38 |
39 | /**
40 | * @brief Short wrapper to print error to Stata
41 | *
42 | * Basic wrapper to print formatted error strings to Stata
43 | *
44 | * @param *fmt a string to format
45 | * @param ... Arguments to pass to pritnf
46 | * @return Prints to Stata's console
47 | */
48 | void sf_errprintf (const char *fmt, ...)
49 | {
50 | va_list args;
51 | va_start (args, fmt);
52 | char buf[BUF_MAX];
53 | vsprintf (buf, fmt, args);
54 | SF_error (buf);
55 | va_end (args);
56 | }
57 |
--------------------------------------------------------------------------------
/src/plugin/common/sf_printf.h:
--------------------------------------------------------------------------------
1 | #ifndef SF_PRINTF
2 | #define SF_PRINTF
3 |
4 | void sf_printf (const char *fmt, ...);
5 | void sf_errprintf (const char *fmt, ...);
6 |
7 | #endif
8 |
--------------------------------------------------------------------------------
/src/plugin/common/sf_wrappers.h:
--------------------------------------------------------------------------------
1 | #ifndef SF_WRAPPERS
2 | #define SF_WRAPPERS
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include "gttypes.h"
11 | #include "../spi/stplugin.h"
12 |
13 | ST_retcode sf_empty_varlist(GT_size *pos, GT_size start, GT_size K);
14 | GT_size sf_anyobs_sel ();
15 |
16 | void sf_running_timer (clock_t *timer, const char *msg);
17 |
18 | ST_retcode sf_oom_error (char * step_desc, char * obj_desc);
19 |
20 | GT_int sf_get_vector_length (char *st_matrix);
21 | ST_retcode sf_get_vector (char *st_matrix, ST_double *v);
22 | ST_retcode sf_get_vector_int (char *st_matrix, GT_int *v);
23 | ST_retcode sf_get_vector_size (char *st_matrix, GT_size *v);
24 | ST_retcode sf_get_vector_bool (char *st_matrix, GT_bool *v);
25 | ST_retcode sf_byx_save (struct StataInfo *st_info);
26 | ST_retcode sf_byx_save_top (struct StataInfo *st_info, GT_size ntop, GT_size *topix);
27 |
28 | void sf_format_size (GT_size n, char *out);
29 |
30 | ST_retcode sf_scalar_int (char *st_scalar, GT_int *sval);
31 | ST_retcode sf_scalar_size (char *st_scalar, GT_size *sval);
32 |
33 | #endif
34 |
--------------------------------------------------------------------------------
/src/plugin/hash/gtools_hash.h:
--------------------------------------------------------------------------------
1 | #ifndef GTOOLS_HASH
2 | #define GTOOLS_HASH
3 |
4 | #define RADIX_SHIFT 24
5 |
6 | int gf_hash (
7 | uint64_t *h1,
8 | uint64_t *h2,
9 | struct StataInfo *st_info,
10 | GT_size *ix
11 | );
12 |
13 | int gf_biject_varlist (uint64_t *h1, struct StataInfo *st_info);
14 |
15 | int gf_panelsetup (
16 | uint64_t *h1,
17 | uint64_t *h2,
18 | struct StataInfo *st_info,
19 | GT_size *ix,
20 | const GT_bool hash_level
21 | );
22 |
23 | int gf_check_allequal (uint64_t *hash, GT_size start, GT_size end);
24 | int gf_panelsetup_bijection (uint64_t *h1, struct StataInfo *st_info);
25 |
26 | #endif
27 |
--------------------------------------------------------------------------------
/src/plugin/hash/gtools_sort.h:
--------------------------------------------------------------------------------
1 | #ifndef GTOOLS_SORT
2 | #define GTOOLS_SORT
3 |
4 | typedef union {
5 | struct {
6 | uint32_t c8[256];
7 | uint32_t c7[256];
8 | uint32_t c6[256];
9 | uint32_t c5[256];
10 | uint32_t c4[256];
11 | uint32_t c3[256];
12 | uint32_t c2[256];
13 | uint32_t c1[256];
14 | };
15 | uint32_t counts[256 * 8];
16 | } radixCounts8;
17 |
18 | struct radixCounts16 {
19 | uint32_t *c4;
20 | uint32_t *c3;
21 | uint32_t *c2;
22 | uint32_t *c1;
23 | };
24 |
25 | struct radixCounts16_32 {
26 | uint32_t *c2;
27 | uint32_t *c1;
28 | };
29 |
30 | struct radixCounts12_24 {
31 | uint32_t *c2;
32 | uint32_t *c1;
33 | };
34 |
35 | struct radixCounts8_16 {
36 | uint32_t *c2;
37 | uint32_t *c1;
38 | };
39 |
40 | ST_retcode gf_sort_hash (uint64_t *hash, GT_size *index, GT_size N, GT_bool verbose, GT_size ctol);
41 | ST_retcode gf_radix_sort8 (uint64_t *hash, GT_size *index, GT_size N);
42 | ST_retcode gf_radix_sort16 (uint64_t *hash, GT_size *index, GT_size N);
43 | ST_retcode gf_radix_sort16_32 (uint64_t *hash, GT_size *index, GT_size N);
44 | ST_retcode gf_radix_sort12_24 (uint64_t *hash, GT_size *index, GT_size N);
45 | ST_retcode gf_radix_sort8_16 (uint64_t *hash, GT_size *index, GT_size N);
46 | ST_retcode gf_counting_sort (uint64_t *hash, GT_size *index, GT_size N, uint64_t min, uint64_t max);
47 |
48 | #endif
49 |
--------------------------------------------------------------------------------
/src/plugin/lib:
--------------------------------------------------------------------------------
1 | ../../lib
--------------------------------------------------------------------------------
/src/plugin/quantiles/gquantiles_math.h:
--------------------------------------------------------------------------------
1 | #ifndef GQUANTILES_MATH
2 | #define GQUANTILES_MATH
3 |
4 | void gf_quantiles_nq (
5 | ST_double *qout,
6 | ST_double *x,
7 | GT_size nquants,
8 | GT_size N,
9 | GT_size kx
10 | );
11 |
12 | void gf_quantiles (
13 | ST_double *qout,
14 | ST_double *x,
15 | ST_double *quants,
16 | GT_size nquants,
17 | GT_size N,
18 | GT_size kx
19 | );
20 |
21 | void gf_quantiles_nq_altdef (
22 | ST_double *qout,
23 | ST_double *x,
24 | GT_size nquants,
25 | GT_size N,
26 | GT_size kx
27 | );
28 |
29 | void gf_quantiles_altdef (
30 | ST_double *qout,
31 | ST_double *x,
32 | ST_double *quants,
33 | GT_size nquants,
34 | GT_size N,
35 | GT_size kx
36 | );
37 |
38 | void gf_quantiles_nq_qselect (
39 | ST_double *qout,
40 | ST_double *x,
41 | GT_size nquants,
42 | GT_size N
43 | );
44 |
45 | void gf_quantiles_qselect (
46 | ST_double *qout,
47 | ST_double *x,
48 | ST_double *quants,
49 | GT_size nquants,
50 | GT_size N
51 | );
52 |
53 | void gf_quantiles_nq_qselect_altdef (
54 | ST_double *qout,
55 | ST_double *x,
56 | GT_size nquants,
57 | GT_size N
58 | );
59 |
60 | void gf_quantiles_qselect_altdef (
61 | ST_double *qout,
62 | ST_double *x,
63 | ST_double *quants,
64 | GT_size nquants,
65 | GT_size N
66 | );
67 |
68 | GT_size gf_quantiles_gcd (
69 | GT_size a,
70 | GT_size b
71 | );
72 |
73 | #endif
74 |
--------------------------------------------------------------------------------
/src/plugin/quantiles/gquantiles_math_w.h:
--------------------------------------------------------------------------------
1 | #ifndef GQUANTILES_MATH_W
2 | #define GQUANTILES_MATH_W
3 |
4 | void gf_quantiles_nq_w (
5 | ST_double *qout,
6 | ST_double *x,
7 | GT_size nquants,
8 | GT_size N,
9 | GT_size kx
10 | );
11 |
12 | void gf_quantiles_w (
13 | ST_double *qout,
14 | ST_double *x,
15 | ST_double *quants,
16 | GT_size nquants,
17 | GT_size N,
18 | GT_size kx
19 | );
20 |
21 | #endif
22 |
--------------------------------------------------------------------------------
/src/plugin/quantiles/gquantiles_utils.c:
--------------------------------------------------------------------------------
1 | GT_size gf_xtile_clean (
2 | ST_double *x,
3 | GT_size lsize,
4 | GT_bool dropmiss,
5 | GT_bool dedup)
6 | {
7 | GT_size i, _lsize;
8 | GT_bool sortme, dedupcheck;
9 |
10 | if ( lsize > 1 ) {
11 | _lsize = lsize;
12 | sortme = 0;
13 |
14 | for (i = 1; i < lsize; i++) {
15 | if ( x[i] < x[i - 1] ) {
16 | sortme = 1;
17 | break;
18 | }
19 | else if ( x[i] == x[i - 1] ) {
20 | dedupcheck = 1;
21 | }
22 | }
23 |
24 | if ( sortme ) {
25 | quicksort_bsd (
26 | x,
27 | lsize,
28 | sizeof *x,
29 | xtileCompare,
30 | NULL
31 | );
32 | dedupcheck = 1;
33 | sortme = 0;
34 | }
35 |
36 | if ( dedup & dedupcheck ) {
37 | _lsize = 0;
38 | if ( dropmiss ) {
39 | if ( SF_is_missing(x[0]) ) return (0);
40 | for (i = 1; i < lsize; i++) {
41 | if ( SF_is_missing(x[i]) ) break;
42 | else if ( x[_lsize] == x[i] ) continue;
43 | x[++_lsize] = x[i];
44 | }
45 | }
46 | else {
47 | for (i = 1; i < lsize; i++) {
48 | if ( x[_lsize] == x[i] ) continue;
49 | x[++_lsize] = x[i];
50 | }
51 | }
52 | _lsize++;
53 | }
54 | else if ( dropmiss ) {
55 | for (i = 0; i < lsize; i++) {
56 | if ( SF_is_missing(x[i]) ) return (i);
57 | }
58 | }
59 |
60 | return (_lsize);
61 | }
62 | else if ( (lsize == 1) & dropmiss ) {
63 | if ( SF_is_missing(x[0]) ) return (0);
64 | return (lsize);
65 | }
66 | else {
67 | return (lsize);
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/plugin/quantiles/gquantiles_utils.h:
--------------------------------------------------------------------------------
1 | #ifndef GTOOLS_GQUANTILES_UTILS
2 | #define GTOOLS_GQUANTILES_UTILS
3 |
4 | GT_size gf_xtile_clean (
5 | ST_double *x,
6 | GT_size lsize,
7 | GT_bool dropmiss,
8 | GT_bool dedup
9 | );
10 |
11 | #endif
12 |
--------------------------------------------------------------------------------
/src/plugin/regress/gregress.h:
--------------------------------------------------------------------------------
1 | #ifndef GREGRESS
2 | #define GREGRESS
3 |
4 | ST_retcode sf_regress (struct StataInfo *st_info, int level, char *fname);
5 |
6 | ST_retcode sf_regress_read_colmajor (
7 | struct StataInfo *st_info,
8 | ST_double *y,
9 | ST_double *X,
10 | ST_double *w,
11 | void *G,
12 | void *FE,
13 | ST_double *I,
14 | GT_size *nj
15 | );
16 |
17 | ST_retcode (*sf_regress_read)(
18 | struct StataInfo *,
19 | ST_double *,
20 | ST_double *,
21 | ST_double *,
22 | void *,
23 | void *,
24 | ST_double *,
25 | GT_size *
26 | );
27 |
28 | ST_retcode gf_regress_absorb (
29 | struct GtoolsHash *AbsorbHashes,
30 | GtoolsAlgorithmHDFE GtoolsAlgorithmHDFE,
31 | ST_double *stats,
32 | GT_size *maps,
33 | GT_size nj,
34 | GT_size kabs,
35 | GT_size kx,
36 | GT_size *kmodel,
37 | ST_double **njabsptr,
38 | ST_double *xptr,
39 | ST_double *yptr,
40 | ST_double *wptr,
41 | ST_double *xtarget,
42 | ST_double *ytarget,
43 | GT_bool setup,
44 | ST_double hdfetol
45 | );
46 |
47 | ST_retcode gf_regress_absorb_iter(
48 | struct GtoolsHash *AbsorbHashes,
49 | GtoolsAlgorithmHDFE GtoolsAlgorithmHDFE,
50 | ST_double *stats,
51 | GT_size *maps,
52 | GT_size J,
53 | GT_size *nj,
54 | GT_size kabs,
55 | GT_size kx,
56 | ST_double **njabsptr,
57 | ST_double *xptr,
58 | ST_double *yptr,
59 | ST_double *wptr,
60 | ST_double hdfetol
61 | );
62 |
63 | ST_retcode gf_regress_iv_notidentified (
64 | GT_size nj,
65 | GT_size kabs,
66 | GT_size ivkendog,
67 | GT_size ivkexog,
68 | GT_size ivkz,
69 | GT_size kmodel,
70 | char *buf1,
71 | char *buf2,
72 | char *buf3
73 | );
74 |
75 | ST_retcode gf_regress_notidentified (
76 | GT_size nj,
77 | GT_size kabs,
78 | GT_size kx,
79 | GT_size kmodel,
80 | char *buf1,
81 | char *buf2,
82 | char *buf3
83 | );
84 |
85 | void gf_regress_warnings (
86 | GT_size J,
87 | GT_size warncollinear,
88 | GT_size warnsingular,
89 | GT_size warnivnotiden,
90 | GT_size warnnocols,
91 | GT_size warnalpha,
92 | char *buf1,
93 | char *buf2,
94 | char *buf3,
95 | char *buf4,
96 | char *buf5
97 | );
98 |
99 |
100 | void gf_regress_adjust_collinear_b (
101 | ST_double *b,
102 | ST_double *buffer,
103 | GT_size *colix,
104 | GT_size k1,
105 | GT_size k2
106 | );
107 |
108 | void gf_regress_adjust_collinear_se (
109 | ST_double *se,
110 | ST_double *buffer,
111 | GT_size *colix,
112 | GT_size k1,
113 | GT_size k2
114 | );
115 |
116 | void gf_regress_adjust_collinear_V (
117 | ST_double *V,
118 | ST_double *buffer,
119 | GT_size *colix,
120 | GT_size k1,
121 | GT_size k2
122 | );
123 |
124 | #endif
125 |
--------------------------------------------------------------------------------
/src/plugin/regress/linalg/common.c:
--------------------------------------------------------------------------------
1 | /**
2 | * @brief Computes the transpose of a matrix B = A'
3 | *
4 | * @A k1 x k2 matrix to be transposed
5 | * @B k2 x k1 transpose of A
6 | * @k1 Number of rows in A
7 | * @k2 Number of columns in A
8 | * @return Store A' in @B
9 | */
10 | void gf_regress_linalg_dtrans_colmajor (ST_double *A, ST_double *B, GT_size k1, GT_size k2)
11 | {
12 | GT_size i, j;
13 | for (j = 0; j < k2; j++) {
14 | for (i = 0; i < k1; i++) {
15 | B[j * k2 + i] = A[i * k2 + j];
16 | }
17 | }
18 | }
19 |
20 | /**
21 | * @brief Print matrix A
22 | *
23 | * @A k1 x k2 matrix to be printed
24 | * @k1 Number of rows in A
25 | * @k2 Number of columns in A
26 | * @return Prints entries of matrix A
27 | */
28 | void gf_regress_dprintf_colmajor (
29 | ST_double *matrix,
30 | GT_size k1,
31 | GT_size k2,
32 | char *name)
33 | {
34 | GT_size i, j;
35 | sf_printf_debug("%s\n", name);
36 | for (i = 0; i < k1; i++) {
37 | for (j = 0; j < k2; j++) {
38 | sf_printf_debug("%.8g\t", matrix[i + k1 * j]);
39 | }
40 | sf_printf_debug("\n");
41 | }
42 | sf_printf_debug("\n");
43 | }
44 |
45 | /**
46 | * @brief Print matrix A
47 | *
48 | * @A k1 x k2 matrix to be printed
49 | * @k1 Number of rows in A
50 | * @k2 Number of columns in A
51 | * @return Prints entries of matrix A
52 | */
53 | void gf_regress_lprintf_colmajor (
54 | GT_size *matrix,
55 | GT_size k1,
56 | GT_size k2,
57 | char *name)
58 | {
59 | GT_size i, j;
60 | sf_printf_debug("%s\n", name);
61 | for (i = 0; i < k1; i++) {
62 | for (j = 0; j < k2; j++) {
63 | sf_printf_debug("%lu\t", matrix[i + k1 * j]);
64 | }
65 | sf_printf_debug("\n");
66 | }
67 | sf_printf_debug("\n");
68 | }
69 |
--------------------------------------------------------------------------------
/src/plugin/regress/linalg/rowmajor.c:
--------------------------------------------------------------------------------
1 | /**
2 | * @brief Compute C = AB assuming that either both A and B are symmetric or that A = B
3 | *
4 | * @A N x K matrix (symmetric, or A = B)
5 | * @B N x K matrix (symmetric, or A = B)
6 | * @C K x K array where to store AB
7 | * @N Number of rows in A, B
8 | * @K Number of columns in A, B
9 | * @return Store AB in @C
10 | */
11 | void gf_regress_linalg_dsymm_rowmajor(
12 | ST_double *A,
13 | ST_double *B,
14 | ST_double *C,
15 | GT_size N,
16 | GT_size K)
17 | {
18 | GT_size i, j, l;
19 | ST_double *aptr, *bptr;
20 |
21 | for (i = 0; i < K; i++) {
22 | for (j = 0; j < K; j++) {
23 | C[i * K + j] = 0;
24 | }
25 | }
26 |
27 | bptr = B;
28 | for (i = 0; i < N; i++) {
29 | for (j = 0; j < K; j++, bptr++) {
30 | aptr = A + i * K + j;
31 | for (l = j; l < K; l++, aptr++) {
32 | C[j * K + l] += (*aptr) * (*bptr);
33 | }
34 | }
35 | }
36 |
37 | // Since C is symmetric, we only compute the upper triangle and then
38 | // copy it back into the lower triangle
39 |
40 | for (i = 0; i < K; i++) {
41 | for (j = i + 1; j < K; j++) {
42 | C[j * K + i] = C[i * K + j];
43 | }
44 | }
45 | }
46 |
47 | void gf_regress_linalg_dsymm_ixrowmajor(
48 | ST_double *A,
49 | ST_double *B,
50 | ST_double *C,
51 | GT_size *ix,
52 | GT_size N,
53 | GT_size K)
54 | {
55 | GT_size i, j, l, m;
56 | ST_double *aptr, *bptr;
57 |
58 | for (i = 0; i < K; i++) {
59 | for (j = 0; j < K; j++) {
60 | C[i * K + j] = 0;
61 | }
62 | }
63 |
64 | for (i = 0; i < N; i++) {
65 | m = ix[i];
66 | bptr = B + m * K;
67 | for (j = 0; j < K; j++, bptr++) {
68 | aptr = A + m * K + j;
69 | for (l = j; l < K; l++, aptr++) {
70 | C[j * K + l] += (*aptr) * (*bptr);
71 | }
72 | }
73 | }
74 |
75 | // Since C is symmetric, we only compute the upper triangle and then
76 | // copy it back into the lower triangle
77 |
78 | for (i = 0; i < K; i++) {
79 | for (j = i + 1; j < K; j++) {
80 | C[j * K + i] = C[i * K + j];
81 | }
82 | }
83 | }
84 |
85 | void gf_regress_linalg_dsymm_wrowmajor(
86 | ST_double *A,
87 | ST_double *B,
88 | ST_double *C,
89 | ST_double *w,
90 | GT_size N,
91 | GT_size K)
92 | {
93 | GT_size i, j, l;
94 | ST_double *aptr, *bptr, *wptr;
95 |
96 | for (i = 0; i < K; i++) {
97 | for (j = 0; j < K; j++) {
98 | C[i * K + j] = 0;
99 | }
100 | }
101 |
102 | bptr = B;
103 | wptr = w;
104 | for (i = 0; i < N; i++, wptr++) {
105 | for (j = 0; j < K; j++, bptr++) {
106 | aptr = A + i * K + j;
107 | for (l = j; l < K; l++, aptr++) {
108 | C[j * K + l] += (*aptr) * (*bptr) * (*wptr);
109 | }
110 | }
111 | }
112 |
113 | // Since C is symmetric, we only compute the upper triangle and then
114 | // copy it back into the lower triangle
115 |
116 | for (i = 0; i < K; i++) {
117 | for (j = i + 1; j < K; j++) {
118 | C[j * K + i] = C[i * K + j];
119 | }
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/src/plugin/regress/models/glm.c:
--------------------------------------------------------------------------------
1 | ST_retcode gf_regress_glm_post(
2 | GT_bool wcode,
3 | ST_double *wptr,
4 | ST_double *e,
5 | ST_double *wgt,
6 | GT_size nj,
7 | ST_double diff,
8 | ST_double poistol,
9 | GT_size poisiter,
10 | char *buf1)
11 | {
12 | GT_size i;
13 | if ( diff < poistol ) {
14 | if ( wcode == 2 ) {
15 | for (i = 0; i < nj; i++) {
16 | e[i] *= wgt[i] / wptr[i];
17 | }
18 | memcpy(wgt, wptr, nj * sizeof(ST_double));
19 | }
20 | return(0);
21 | }
22 | else {
23 | sf_format_size(poisiter, buf1);
24 | sf_errprintf("max iter (%s) reached; tolerance not achieved (%15.9g > %15.9g)\n",
25 | buf1, diff, poistol);
26 | return(198);
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/plugin/regress/models/glm.h:
--------------------------------------------------------------------------------
1 | #ifndef GREGRESS_GLM
2 | #define GREGRESS_GLM
3 |
4 | // GLM
5 | // ---
6 |
7 | void (*gf_regress_glm_init)(
8 | ST_double *,
9 | ST_double *,
10 | ST_double *,
11 | ST_double *,
12 | ST_double *,
13 | ST_double *,
14 | ST_double *,
15 | GT_size
16 | );
17 |
18 | ST_double (*gf_regress_glm_iter)(
19 | ST_double *,
20 | ST_double *,
21 | ST_double *,
22 | ST_double *,
23 | ST_double *,
24 | ST_double *,
25 | ST_double *,
26 | ST_double *,
27 | ST_double *,
28 | GT_size
29 | );
30 |
31 | ST_retcode gf_regress_glm_post(
32 | GT_bool wcode,
33 | ST_double *wptr,
34 | ST_double *e,
35 | ST_double *wgt,
36 | GT_size nj,
37 | ST_double diff,
38 | ST_double glmtol,
39 | GT_size glmiter,
40 | char *buf1
41 | );
42 |
43 | // Logit
44 | // -----
45 |
46 | void gf_regress_logit_init_w(
47 | ST_double *yptr,
48 | ST_double *wptr,
49 | ST_double *mu,
50 | ST_double *wgt,
51 | ST_double *eta,
52 | ST_double *dev,
53 | ST_double *lhs,
54 | GT_size nj
55 | );
56 |
57 | void gf_regress_logit_init_unw(
58 | ST_double *yptr,
59 | ST_double *wptr,
60 | ST_double *mu,
61 | ST_double *wgt,
62 | ST_double *eta,
63 | ST_double *dev,
64 | ST_double *lhs,
65 | GT_size nj
66 | );
67 |
68 | ST_double gf_regress_logit_iter_unw(
69 | ST_double *yptr,
70 | ST_double *wptr,
71 | ST_double *e,
72 | ST_double *mu,
73 | ST_double *wgt,
74 | ST_double *eta,
75 | ST_double *dev,
76 | ST_double *dev0,
77 | ST_double *lhs,
78 | GT_size nj
79 | );
80 |
81 | ST_double gf_regress_logit_iter_w(
82 | ST_double *yptr,
83 | ST_double *wptr,
84 | ST_double *e,
85 | ST_double *mu,
86 | ST_double *wgt,
87 | ST_double *eta,
88 | ST_double *dev,
89 | ST_double *dev0,
90 | ST_double *lhs,
91 | GT_size nj
92 | );
93 |
94 | // Poisson
95 | // -------
96 |
97 | void gf_regress_poisson_init_w(
98 | ST_double *yptr,
99 | ST_double *wptr,
100 | ST_double *mu,
101 | ST_double *wgt,
102 | ST_double *eta,
103 | ST_double *dev,
104 | ST_double *lhs,
105 | GT_size nj
106 | );
107 |
108 | void gf_regress_poisson_init_unw(
109 | ST_double *yptr,
110 | ST_double *wptr,
111 | ST_double *mu,
112 | ST_double *wgt,
113 | ST_double *eta,
114 | ST_double *dev,
115 | ST_double *lhs,
116 | GT_size nj
117 | );
118 |
119 | ST_double gf_regress_poisson_iter_unw(
120 | ST_double *yptr,
121 | ST_double *wptr,
122 | ST_double *e,
123 | ST_double *mu,
124 | ST_double *wgt,
125 | ST_double *eta,
126 | ST_double *dev,
127 | ST_double *dev0,
128 | ST_double *lhs,
129 | GT_size nj
130 | );
131 |
132 | ST_double gf_regress_poisson_iter_w(
133 | ST_double *yptr,
134 | ST_double *wptr,
135 | ST_double *e,
136 | ST_double *mu,
137 | ST_double *wgt,
138 | ST_double *eta,
139 | ST_double *dev,
140 | ST_double *dev0,
141 | ST_double *lhs,
142 | GT_size nj
143 | );
144 |
145 | #endif
146 |
--------------------------------------------------------------------------------
/src/plugin/regress/models/logit.c:
--------------------------------------------------------------------------------
1 | void gf_regress_logit_init_unw(
2 | ST_double *yptr,
3 | ST_double *wptr,
4 | ST_double *mu,
5 | ST_double *wgt,
6 | ST_double *eta,
7 | ST_double *dev,
8 | ST_double *lhs,
9 | GT_size nj)
10 | {
11 | GT_size i;
12 | ST_double mean = 0;
13 | for (i = 0; i < nj; i++) {
14 | mean += yptr[i];
15 | }
16 | mean /= (ST_double) nj;
17 |
18 | for (i = 0; i < nj; i++) {
19 | mu[i] = (yptr[i] + mean) / 2;
20 | eta[i] = log(mu[i] / (1 - mu[i]));
21 | wgt[i] = mu[i] * (1 - mu[i]);
22 | dev[i] = 0;
23 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i];
24 | }
25 | }
26 |
27 | void gf_regress_logit_init_w(
28 | ST_double *yptr,
29 | ST_double *wptr,
30 | ST_double *mu,
31 | ST_double *wgt,
32 | ST_double *eta,
33 | ST_double *dev,
34 | ST_double *lhs,
35 | GT_size nj)
36 | {
37 | GT_size i;
38 | ST_double mean = 0;
39 | ST_double W = 0;
40 | for (i = 0; i < nj; i++) {
41 | mean += yptr[i] * wptr[i];
42 | W += wptr[i];
43 | }
44 | mean /= (ST_double) W;
45 | for (i = 0; i < nj; i++) {
46 | mu[i] = (yptr[i] + mean) / 2;
47 | eta[i] = log(mu[i] / (1 - mu[i]));
48 | wgt[i] = mu[i] * (1 - mu[i]);
49 | dev[i] = 0;
50 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i];
51 | wgt[i] *= wptr[i];
52 | }
53 | }
54 |
55 | ST_double gf_regress_logit_iter_unw(
56 | ST_double *yptr,
57 | ST_double *wptr,
58 | ST_double *e,
59 | ST_double *mu,
60 | ST_double *wgt,
61 | ST_double *eta,
62 | ST_double *dev,
63 | ST_double *dev0,
64 | ST_double *lhs,
65 | GT_size nj)
66 | {
67 | GT_size i;
68 | ST_double diff = 0;
69 |
70 | for (i = 0; i < nj; i++) {
71 | eta[i] = lhs[i] - e[i];
72 | mu[i] = 1 / (1 + exp(-eta[i]));
73 | wgt[i] = mu[i] * (1 - mu[i]);
74 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i];
75 | dev0[i] = dev[i];
76 | dev[i] = - 2 * (yptr[i] * log(mu[i]) + (1 - yptr[i]) * log(1 - mu[i]));
77 | diff = GTOOLS_PWMAX(diff, fabs(dev[i] - dev0[i]) / (fabs(dev0[i]) + 1));
78 | }
79 | return (diff);
80 | }
81 |
82 | ST_double gf_regress_logit_iter_w(
83 | ST_double *yptr,
84 | ST_double *wptr,
85 | ST_double *e,
86 | ST_double *mu,
87 | ST_double *wgt,
88 | ST_double *eta,
89 | ST_double *dev,
90 | ST_double *dev0,
91 | ST_double *lhs,
92 | GT_size nj)
93 | {
94 | GT_size i;
95 | ST_double diff = 0;
96 | for (i = 0; i < nj; i++) {
97 | eta[i] = lhs[i] - e[i];
98 | mu[i] = 1 / (1 + exp(-eta[i]));
99 | wgt[i] = mu[i] * (1 - mu[i]);
100 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i];
101 | dev0[i] = dev[i];
102 | dev[i] = - 2 * (yptr[i] * log(mu[i]) + (1 - yptr[i]) * log(1 - mu[i]));
103 | diff = GTOOLS_PWMAX(diff, fabs(dev[i] - dev0[i]) / (fabs(dev0[i]) + 1));
104 | wgt[i] *= wptr[i];
105 | }
106 | return (diff);
107 | }
108 |
--------------------------------------------------------------------------------
/src/plugin/regress/models/models.h:
--------------------------------------------------------------------------------
1 | #ifndef GREGRESS_MODELS
2 | #define GREGRESS_MODELS
3 |
4 | // OLS
5 | // ---
6 |
7 | GT_bool (*gf_regress_ols) (
8 | ST_double *,
9 | ST_double *,
10 | ST_double *,
11 | ST_double *,
12 | ST_double *,
13 | ST_double *,
14 | ST_double *,
15 | GT_size *,
16 | GT_size,
17 | GT_size
18 | );
19 |
20 | GT_bool gf_regress_ols_colmajor(
21 | ST_double *X,
22 | ST_double *y,
23 | ST_double *w,
24 | ST_double *XX,
25 | ST_double *Xy,
26 | ST_double *e,
27 | ST_double *b,
28 | GT_size *colix,
29 | GT_size N,
30 | GT_size kx
31 | );
32 |
33 | GT_bool gf_regress_ols_wcolmajor(
34 | ST_double *X,
35 | ST_double *y,
36 | ST_double *w,
37 | ST_double *XX,
38 | ST_double *Xy,
39 | ST_double *e,
40 | ST_double *b,
41 | GT_size *colix,
42 | GT_size N,
43 | GT_size kx
44 | );
45 |
46 | // IV regression
47 | // -------------
48 |
49 | GT_bool (*gf_regress_iv) (
50 | ST_double *,
51 | ST_double *,
52 | ST_double *,
53 | ST_double *,
54 | ST_double *,
55 | ST_double *,
56 | ST_double *,
57 | ST_double *,
58 | ST_double *,
59 | ST_double *,
60 | GT_size *,
61 | GT_size,
62 | GT_size,
63 | GT_size,
64 | GT_size
65 | );
66 |
67 | GT_bool gf_regress_iv_unw(
68 | ST_double *Xendog,
69 | ST_double *Xexog,
70 | ST_double *Z,
71 | ST_double *y,
72 | ST_double *w,
73 | ST_double *XX,
74 | ST_double *XZ,
75 | ST_double *BZ,
76 | ST_double *e,
77 | ST_double *b,
78 | GT_size *colix,
79 | GT_size N,
80 | GT_size kendog,
81 | GT_size kexog,
82 | GT_size kz
83 | );
84 |
85 | GT_bool gf_regress_iv_w(
86 | ST_double *Xendog,
87 | ST_double *Xexog,
88 | ST_double *Z,
89 | ST_double *y,
90 | ST_double *w,
91 | ST_double *XX,
92 | ST_double *XZ,
93 | ST_double *BZ,
94 | ST_double *e,
95 | ST_double *b,
96 | GT_size *colix,
97 | GT_size N,
98 | GT_size kendog,
99 | GT_size kexog,
100 | GT_size kz
101 | );
102 |
103 | void gf_regress_linalg_iverror(
104 | ST_double *y,
105 | ST_double *A1,
106 | ST_double *A2,
107 | ST_double *b,
108 | ST_double *c,
109 | GT_size N,
110 | GT_size k1,
111 | GT_size k2
112 | );
113 |
114 | void gf_regress_linalg_iverror_ix(
115 | ST_double *y,
116 | ST_double *A1,
117 | ST_double *A2,
118 | ST_double *b,
119 | ST_double *c,
120 | GT_size *colix,
121 | GT_size N,
122 | GT_size koffset,
123 | GT_size k1,
124 | GT_size k2
125 | );
126 |
127 | void gf_regress_linalg_ivcollinear_ix(
128 | GT_size *colix,
129 | GT_size kendog,
130 | GT_size kexog,
131 | GT_size kz
132 | );
133 |
134 | #endif
135 |
--------------------------------------------------------------------------------
/src/plugin/regress/models/ols.c:
--------------------------------------------------------------------------------
1 | /**
2 | * @brief Run basic OLS
3 | *
4 | * @X Independent variables; array of length N x kx
5 | * @y Dependent variable; array of length N
6 | * @XX Array of length kx x kx where to store X' X and (X' X)^-1
7 | * @Xy Array of length kx where to store X y
8 | * @b Array of length kx where to store the coefficients
9 | * @N Number of observations
10 | * @kx Number of X variables
11 | * @return Store OLS coefficients in @b
12 | */
13 | GT_bool gf_regress_ols_colmajor(
14 | ST_double *X,
15 | ST_double *y,
16 | ST_double *w,
17 | ST_double *XX,
18 | ST_double *Xy,
19 | ST_double *e,
20 | ST_double *b,
21 | GT_size *colix,
22 | GT_size N,
23 | GT_size kx)
24 | {
25 | GT_size kindep;
26 | GT_bool singular = 0;
27 |
28 | gf_regress_linalg_dsymm_colmajor (X, X, XX, N, kx);
29 | gf_regress_linalg_dsyldu (XX, kx, XX + kx * kx, colix, &singular);
30 |
31 | // gf_regress_dprintf_colmajor (XX, kx, kx, "XX");
32 | // gf_regress_linalg_dsysv (XX, kx, &singular);
33 | // gf_regress_dprintf_colmajor (XX, kindep, kindep, "XX^-1");
34 |
35 | kindep = colix[kx];
36 | if ( kindep > 0 ) {
37 | if ( kindep < kx ) {
38 | gf_regress_linalg_dgemTv_colmajor_ix1 (X, y, Xy, colix, N, kindep);
39 | gf_regress_linalg_dgemTv_colmajor (XX, Xy, b, kindep, kindep);
40 | gf_regress_linalg_error_colmajor_ix1 (y, X, b, e, colix, N, kindep);
41 | }
42 | else {
43 | gf_regress_linalg_dgemTv_colmajor (X, y, Xy, N, kx);
44 | gf_regress_linalg_dgemTv_colmajor (XX, Xy, b, kx, kx);
45 | gf_regress_linalg_error_colmajor (y, X, b, e, N, kx);
46 | }
47 | }
48 |
49 | // gf_regress_dprintf_colmajor (Xy, 1, kindep, "Xy");
50 | // gf_regress_dprintf_colmajor (b, 1, kindep, "b");
51 |
52 | return(singular);
53 | }
54 |
55 | GT_bool gf_regress_ols_wcolmajor(
56 | ST_double *X,
57 | ST_double *y,
58 | ST_double *w,
59 | ST_double *XX,
60 | ST_double *Xy,
61 | ST_double *e,
62 | ST_double *b,
63 | GT_size *colix,
64 | GT_size N,
65 | GT_size kx)
66 | {
67 | GT_size kindep;
68 | GT_bool singular = 0;
69 | gf_regress_linalg_dsymm_wcolmajor (X, X, XX, w, N, kx);
70 | gf_regress_linalg_dsyldu (XX, kx, XX + kx * kx, colix, &singular);
71 |
72 | kindep = colix[kx];
73 | if ( kindep > 0 ) {
74 | if ( kindep < kx ) {
75 | gf_regress_linalg_dgemTv_wcolmajor_ix1 (X, y, Xy, w, colix, N, kindep);
76 | gf_regress_linalg_dgemTv_colmajor (XX, Xy, b, kindep, kindep);
77 | gf_regress_linalg_error_colmajor_ix1 (y, X, b, e, colix, N, kindep);
78 | }
79 | else {
80 | gf_regress_linalg_dgemTv_wcolmajor (X, y, Xy, w, N, kx);
81 | gf_regress_linalg_dgemTv_colmajor (XX, Xy, b, kx, kx);
82 | gf_regress_linalg_error_colmajor (y, X, b, e, N, kx);
83 | }
84 | }
85 |
86 | return (singular);
87 | }
88 |
--------------------------------------------------------------------------------
/src/plugin/regress/models/poisson.c:
--------------------------------------------------------------------------------
1 | void gf_regress_poisson_init_unw(
2 | ST_double *yptr,
3 | ST_double *wptr,
4 | ST_double *mu,
5 | ST_double *wgt,
6 | ST_double *eta,
7 | ST_double *dev,
8 | ST_double *lhs,
9 | GT_size nj)
10 | {
11 | GT_size i;
12 | ST_double mean = 0;
13 | for (i = 0; i < nj; i++) {
14 | mean += yptr[i];
15 | }
16 | mean /= (ST_double) nj;
17 |
18 | for (i = 0; i < nj; i++) {
19 | mu[i] = (yptr[i] + mean) / 2;
20 | eta[i] = log(mu[i]);
21 | wgt[i] = mu[i];
22 | dev[i] = 0;
23 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i];
24 | }
25 | }
26 |
27 | void gf_regress_poisson_init_w(
28 | ST_double *yptr,
29 | ST_double *wptr,
30 | ST_double *mu,
31 | ST_double *wgt,
32 | ST_double *eta,
33 | ST_double *dev,
34 | ST_double *lhs,
35 | GT_size nj)
36 | {
37 | GT_size i;
38 | ST_double mean = 0;
39 | ST_double W = 0;
40 | for (i = 0; i < nj; i++) {
41 | mean += yptr[i] * wptr[i];
42 | W += wptr[i];
43 | }
44 | mean /= (ST_double) W;
45 | for (i = 0; i < nj; i++) {
46 | mu[i] = (yptr[i] + mean) / 2;
47 | eta[i] = log(mu[i]);
48 | wgt[i] = mu[i];
49 | dev[i] = 0;
50 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i];
51 | wgt[i] *= wptr[i];
52 | }
53 | }
54 |
55 | ST_double gf_regress_poisson_iter_unw(
56 | ST_double *yptr,
57 | ST_double *wptr,
58 | ST_double *e,
59 | ST_double *mu,
60 | ST_double *wgt,
61 | ST_double *eta,
62 | ST_double *dev,
63 | ST_double *dev0,
64 | ST_double *lhs,
65 | GT_size nj)
66 | {
67 | GT_size i;
68 | ST_double diff = 0;
69 |
70 | for (i = 0; i < nj; i++) {
71 | eta[i] = lhs[i] - e[i];
72 | mu[i] = exp(eta[i]);
73 | wgt[i] = mu[i];
74 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i];
75 | dev0[i] = dev[i];
76 | // is dropping these OK?
77 | dev[i] = yptr[i] > 0? 2 * (yptr[i] * log(yptr[i] / mu[i]) - (yptr[i] - mu[i])): 0;
78 | diff = GTOOLS_PWMAX(diff, fabs(dev[i] - dev0[i]) / (fabs(dev0[i]) + 1));
79 | }
80 | return (diff);
81 | }
82 |
83 | ST_double gf_regress_poisson_iter_w(
84 | ST_double *yptr,
85 | ST_double *wptr,
86 | ST_double *e,
87 | ST_double *mu,
88 | ST_double *wgt,
89 | ST_double *eta,
90 | ST_double *dev,
91 | ST_double *dev0,
92 | ST_double *lhs,
93 | GT_size nj)
94 | {
95 | GT_size i;
96 | ST_double diff = 0;
97 | for (i = 0; i < nj; i++) {
98 | eta[i] = lhs[i] - e[i];
99 | mu[i] = exp(eta[i]);
100 | wgt[i] = mu[i];
101 | lhs[i] = eta[i] + (yptr[i] - mu[i]) / wgt[i];
102 | dev0[i] = dev[i];
103 | // is dropping these OK?
104 | dev[i] = yptr[i] > 0? 2 * (yptr[i] * log(yptr[i] / mu[i]) - (yptr[i] - mu[i])): 0;
105 | diff = GTOOLS_PWMAX(diff, fabs(dev[i] - dev0[i]) / (fabs(dev0[i]) + 1));
106 | wgt[i] *= wptr[i];
107 | }
108 | return (diff);
109 | }
110 |
--------------------------------------------------------------------------------
/src/plugin/regress/vce/cluster.c:
--------------------------------------------------------------------------------
1 | void gf_regress_ols_cluster_colmajor(
2 | ST_double *e,
3 | ST_double *w,
4 | GT_size *info,
5 | GT_size *index,
6 | GT_size J,
7 | ST_double *U,
8 | GT_size *ux,
9 | ST_double *V,
10 | ST_double *VV,
11 | ST_double *X,
12 | ST_double *XX,
13 | ST_double *se,
14 | GT_size *colix,
15 | GT_size N,
16 | GT_size kx,
17 | GT_size kmodel,
18 | gf_regress_vceadj vceadj)
19 | {
20 | GT_size i, j, k, start, end, kindep;
21 | ST_double qc, *aptr, *bptr;
22 |
23 | kindep = colix[kx];
24 | memset(U, '\0', J * kindep * sizeof(ST_double));
25 | for (j = 0; j < J; j++) {
26 | start = info[j];
27 | end = info[j + 1];
28 | for (i = start; i < end; i++) {
29 | ux[index[i]] = j;
30 | }
31 | }
32 |
33 | if ( kindep < kx ) {
34 | for (k = 0; k < kindep; k++) {
35 | aptr = X + colix[k] * N;
36 | bptr = e;
37 | for (i = 0; i < N; i++, aptr++, bptr++) {
38 | U[ux[i] * kindep + k] += (*aptr) * (*bptr);
39 | }
40 | }
41 | }
42 | else {
43 | aptr = X;
44 | for (k = 0; k < kindep; k++) {
45 | bptr = e;
46 | for (i = 0; i < N; i++, aptr++, bptr++) {
47 | U[ux[i] * kindep + k] += (*aptr) * (*bptr);
48 | }
49 | }
50 | }
51 |
52 | gf_regress_linalg_dsymm_rowmajor (U, U, V, J, kindep);
53 | gf_regress_linalg_dgemm_colmajor (XX, V, VV, kindep, kindep, kindep);
54 | gf_regress_linalg_dgemm_colmajor (VV, XX, V, kindep, kindep, kindep);
55 |
56 | qc = vceadj(N, kmodel, J, w);
57 | for (i = 0; i < kindep; i++) {
58 | se[i] = sqrt(V[i * kindep + i] * qc);
59 | }
60 | }
61 |
62 | void gf_regress_ols_cluster_wcolmajor(
63 | ST_double *e,
64 | ST_double *w,
65 | GT_size *info,
66 | GT_size *index,
67 | GT_size J,
68 | ST_double *U,
69 | GT_size *ux,
70 | ST_double *V,
71 | ST_double *VV,
72 | ST_double *X,
73 | ST_double *XX,
74 | ST_double *se,
75 | GT_size *colix,
76 | GT_size N,
77 | GT_size kx,
78 | GT_size kmodel,
79 | gf_regress_vceadj vceadj)
80 | {
81 | GT_size i, j, k, start, end, kindep;
82 | ST_double qc, *aptr, *bptr, *wptr;
83 |
84 | kindep = colix[kx];
85 | memset(U, '\0', J * kindep * sizeof(ST_double));
86 | for (j = 0; j < J; j++) {
87 | start = info[j];
88 | end = info[j + 1];
89 | for (i = start; i < end; i++) {
90 | ux[index[i]] = j;
91 | }
92 | }
93 |
94 | if ( kindep < kx ) {
95 | for (k = 0; k < kindep; k++) {
96 | aptr = X + colix[k] * N;
97 | bptr = e;
98 | wptr = w;
99 | for (i = 0; i < N; i++, aptr++, bptr++, wptr++) {
100 | U[ux[i] * kindep + k] += (*aptr) * (*bptr) * (*wptr);
101 | }
102 | }
103 | }
104 | else {
105 | aptr = X;
106 | for (k = 0; k < kindep; k++) {
107 | bptr = e;
108 | wptr = w;
109 | for (i = 0; i < N; i++, aptr++, bptr++, wptr++) {
110 | U[ux[i] * kindep + k] += (*aptr) * (*bptr) * (*wptr);
111 | }
112 | }
113 | }
114 |
115 | gf_regress_linalg_dsymm_rowmajor (U, U, V, J, kindep);
116 | gf_regress_linalg_dgemm_colmajor (XX, V, VV, kindep, kindep, kindep);
117 | gf_regress_linalg_dgemm_colmajor (VV, XX, V, kindep, kindep, kindep);
118 |
119 | qc = vceadj(N, kmodel, J, w);
120 | for (i = 0; i < kindep; i++) {
121 | se[i] = sqrt(V[i * kindep + i] * qc);
122 | }
123 | }
124 |
--------------------------------------------------------------------------------
/src/plugin/regress/vce/heteroskedastic.c:
--------------------------------------------------------------------------------
1 | void gf_regress_ols_robust_colmajor(
2 | ST_double *e,
3 | ST_double *w,
4 | ST_double *V,
5 | ST_double *VV,
6 | ST_double *X,
7 | ST_double *XX,
8 | ST_double *se,
9 | GT_size *colix,
10 | GT_size N,
11 | GT_size kx,
12 | GT_size kmodel,
13 | gf_regress_vceadj vceadj)
14 | {
15 | GT_size i;
16 | ST_double qc = vceadj(N, kmodel, 0, w);
17 | GT_size kindep = colix[kx];
18 |
19 | // Compute D = X' diag(e) X
20 | if ( kindep < kx ) {
21 | gf_regress_linalg_dsymm_w2colmajor_ix (X, X, V, e, colix, N, kindep);
22 | }
23 | else {
24 | gf_regress_linalg_dsymm_w2colmajor (X, X, V, e, N, kx);
25 | }
26 |
27 | // Compute V = (X' X)^-1 D (X' X)^-1
28 | gf_regress_linalg_dgemm_colmajor (XX, V, VV, kindep, kindep, kindep);
29 | gf_regress_linalg_dgemm_colmajor (VV, XX, V, kindep, kindep, kindep);
30 |
31 | // Extract standard errors from diag(V)
32 | for (i = 0; i < kindep; i++) {
33 | se[i] = sqrt(V[i * kindep + i] * qc);
34 | }
35 | }
36 |
37 | void gf_regress_ols_robust_wcolmajor(
38 | ST_double *e,
39 | ST_double *w,
40 | ST_double *V,
41 | ST_double *VV,
42 | ST_double *X,
43 | ST_double *XX,
44 | ST_double *se,
45 | GT_size *colix,
46 | GT_size N,
47 | GT_size kx,
48 | GT_size kmodel,
49 | gf_regress_vceadj vceadj)
50 | {
51 | GT_size i;
52 | ST_double qc = vceadj(N, kmodel, 0, w);
53 | GT_size kindep = colix[kx];
54 |
55 | if ( kindep < kx ) {
56 | gf_regress_linalg_dsymm_we2colmajor_ix (X, X, V, e, w, colix, N, kindep);
57 | }
58 | else {
59 | gf_regress_linalg_dsymm_we2colmajor (X, X, V, e, w, N, kx);
60 | }
61 |
62 | gf_regress_linalg_dgemm_colmajor (XX, V, VV, kindep, kindep, kindep);
63 | gf_regress_linalg_dgemm_colmajor (VV, XX, V, kindep, kindep, kindep);
64 |
65 | for (i = 0; i < kindep; i++) {
66 | se[i] = sqrt(V[i * kindep + i] * qc);
67 | }
68 | }
69 |
70 | void gf_regress_ols_robust_fwcolmajor(
71 | ST_double *e,
72 | ST_double *w,
73 | ST_double *V,
74 | ST_double *VV,
75 | ST_double *X,
76 | ST_double *XX,
77 | ST_double *se,
78 | GT_size *colix,
79 | GT_size N,
80 | GT_size kx,
81 | GT_size kmodel,
82 | gf_regress_vceadj vceadj)
83 | {
84 | GT_size i;
85 | ST_double qc = vceadj(N, kmodel, 0, w);
86 | GT_size kindep = colix[kx];
87 |
88 | if ( kindep < kx ) {
89 | gf_regress_linalg_dsymm_fwe2colmajor_ix (X, X, V, e, w, colix, N, kindep);
90 | }
91 | else {
92 | gf_regress_linalg_dsymm_fwe2colmajor (X, X, V, e, w, N, kx);
93 | }
94 |
95 | gf_regress_linalg_dgemm_colmajor (XX, V, VV, kindep, kindep, kindep);
96 | gf_regress_linalg_dgemm_colmajor (VV, XX, V, kindep, kindep, kindep);
97 |
98 | for (i = 0; i < kindep; i++) {
99 | se[i] = sqrt(V[i * kindep + i] * qc);
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/src/plugin/regress/vce/homoskedastic.c:
--------------------------------------------------------------------------------
1 | /**
2 | * @brief Compute homo SE for OLS
3 | *
4 | * @e N length array of error terms
5 | * @V kx by kx matrix with (X' X)^-1
6 | * @se Array where to store SE
7 | * @N Number of obs
8 | * @kx Number of columns in A
9 | * @return Store sqrt(diag(sum(@e^2 / (@N - @kx)) * @V)) in @se
10 | */
11 | void gf_regress_ols_seunw (
12 | ST_double *e,
13 | ST_double *w,
14 | ST_double *V,
15 | ST_double *se,
16 | GT_size *colix,
17 | GT_size N,
18 | GT_size kx,
19 | GT_size kmodel)
20 | {
21 | GT_size i;
22 | ST_double z, *eptr;
23 | GT_size kindep = colix[kx];
24 |
25 | z = 0;
26 | for (eptr = e; eptr < e + N; eptr++) {
27 | z += (*eptr) * (*eptr);
28 | }
29 | z /= ((ST_double) (N - kmodel));
30 |
31 | for (i = 0; i < kindep; i++) {
32 | se[i] = sqrt(V[i * kindep + i] * z);
33 | }
34 | }
35 |
36 | void gf_regress_ols_sew (
37 | ST_double *e,
38 | ST_double *w,
39 | ST_double *V,
40 | ST_double *se,
41 | GT_size *colix,
42 | GT_size N,
43 | GT_size kx,
44 | GT_size kmodel)
45 | {
46 | GT_size i;
47 | ST_double *eptr;
48 | ST_double z = 0;
49 | ST_double *wptr = w;
50 | GT_size kindep = colix[kx];
51 |
52 | for (eptr = e; eptr < e + N; eptr++, wptr++) {
53 | z += (*eptr) * (*eptr) * (*wptr);
54 | }
55 | z /= ((ST_double) (N - kmodel));
56 |
57 | for (i = 0; i < kindep; i++) {
58 | se[i] = sqrt(V[i * kindep + i] * z);
59 | }
60 | }
61 |
62 | void gf_regress_ols_sefw (
63 | ST_double *e,
64 | ST_double *w,
65 | ST_double *V,
66 | ST_double *se,
67 | GT_size *colix,
68 | GT_size N,
69 | GT_size kx,
70 | GT_size kmodel)
71 | {
72 | GT_size i;
73 | ST_double *eptr;
74 | ST_double z = 0;
75 | ST_double Ndbl = 0;
76 | ST_double *wptr = w;
77 | GT_size kindep = colix[kx];
78 |
79 | for (eptr = e; eptr < e + N; eptr++, wptr++) {
80 | z += (*eptr) * (*eptr) * (*wptr);
81 | Ndbl += *wptr;
82 | }
83 | z /= (Ndbl - kmodel);
84 |
85 | for (i = 0; i < kindep; i++) {
86 | se[i] = sqrt(V[i * kindep + i] * z);
87 | }
88 | }
89 |
90 | void gf_regress_ols_copyvcov (
91 | ST_double *V,
92 | ST_double *XX,
93 | GT_size kx,
94 | GT_size *colix)
95 | {
96 | GT_size i, j, kindep = colix[kx];
97 | for (i = 0; i < kindep; i++) {
98 | for (j = 0; j < kindep; j++) {
99 | V[i * kindep + j] = XX[i * kindep + j];
100 | }
101 | }
102 | }
103 |
--------------------------------------------------------------------------------
/src/plugin/regress/vce/vceadj.c:
--------------------------------------------------------------------------------
1 | ST_double gf_regress_vceadj_ols_robust(
2 | GT_size N,
3 | GT_size kmodel,
4 | GT_size J,
5 | ST_double *w)
6 | {
7 | ST_double Ndbl = N;
8 | return(Ndbl / (Ndbl - kmodel));
9 | }
10 |
11 | ST_double gf_regress_vceadj_ols_cluster(
12 | GT_size N,
13 | GT_size kmodel,
14 | GT_size J,
15 | ST_double *w)
16 | {
17 | ST_double Ndbl = N;
18 | ST_double Jdbl = J;
19 | return(((Ndbl - 1) / (Ndbl - kmodel)) * (Jdbl / (Jdbl - 1)));
20 | }
21 |
22 | ST_double gf_regress_vceadj_mle_robust(
23 | GT_size N,
24 | GT_size kmodel,
25 | GT_size J,
26 | ST_double *w)
27 | {
28 | ST_double Ndbl = N;
29 | return(Ndbl / (Ndbl - 1));
30 | }
31 |
32 | ST_double gf_regress_vceadj_mle_cluster(
33 | GT_size N,
34 | GT_size kmodel,
35 | GT_size J,
36 | ST_double *w)
37 | {
38 | ST_double Jdbl = J;
39 | return(Jdbl / (Jdbl - 1));
40 | }
41 |
42 | ST_double gf_regress_vceadj_ols_robust_fw(
43 | GT_size N,
44 | GT_size kmodel,
45 | GT_size J,
46 | ST_double *w)
47 | {
48 | GT_size i;
49 | ST_double Ndbl = 0;
50 | for (i = 0; i < N; i++) {
51 | Ndbl += w[i];
52 | }
53 | return(Ndbl / (Ndbl - kmodel));
54 | }
55 |
56 | ST_double gf_regress_vceadj_ols_cluster_fw(
57 | GT_size N,
58 | GT_size kmodel,
59 | GT_size J,
60 | ST_double *w)
61 | {
62 | GT_size i;
63 | ST_double Ndbl = 0;
64 | ST_double Jdbl = J;
65 | for (i = 0; i < N; i++) {
66 | Ndbl += w[i];
67 | }
68 | return(((Ndbl - 1) / (Ndbl - kmodel)) * (Jdbl / (Jdbl - 1)));
69 | }
70 |
71 | ST_double gf_regress_vceadj_mle_robust_fw(
72 | GT_size N,
73 | GT_size kmodel,
74 | GT_size J,
75 | ST_double *w)
76 | {
77 | GT_size i;
78 | ST_double Ndbl = 0;
79 | for (i = 0; i < N; i++) {
80 | Ndbl += w[i];
81 | }
82 | return(Ndbl / (Ndbl - 1));
83 | }
84 |
85 | ST_double gf_regress_vceadj_mle_cluster_fw(
86 | GT_size N,
87 | GT_size kmodel,
88 | GT_size J,
89 | ST_double *w)
90 | {
91 | ST_double Jdbl = J;
92 | return(Jdbl / (Jdbl - 1));
93 | }
94 |
--------------------------------------------------------------------------------
/src/plugin/spi:
--------------------------------------------------------------------------------
1 | lib/spi-3.0
--------------------------------------------------------------------------------
/src/plugin/stats/gstats.c:
--------------------------------------------------------------------------------
1 | #include "gstats.h"
2 | #include "hdfe.c"
3 | #include "winsor.c"
4 | #include "summarize.c"
5 | #include "transform.c"
6 |
7 | ST_retcode sf_stats (struct StataInfo *st_info, int level, char *fname)
8 | {
9 |
10 | if ( st_info->gstats_code == 1 ) {
11 | return (sf_stats_winsor(st_info, level));
12 | }
13 | else if ( st_info->gstats_code == 2 ) {
14 | return (sf_stats_summarize(st_info, level, fname));
15 | }
16 | else if ( st_info->gstats_code == 3 ) {
17 | return (sf_stats_transform(st_info, level));
18 | }
19 | else if ( st_info->gstats_code == 4 ) {
20 | return (sf_stats_hdfe(st_info, level));
21 | }
22 | else {
23 | sf_errprintf("Unknown gstats code; error in sf_stats.");
24 | return (198);
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/plugin/stats/gstats.h:
--------------------------------------------------------------------------------
1 | #ifndef GSTATS
2 | #define GSTATS
3 |
4 | ST_retcode sf_stats (struct StataInfo *st_info, int level, char *fname);
5 | ST_retcode sf_stats_winsor (struct StataInfo *st_info, int level);
6 | ST_retcode sf_stats_summarize (struct StataInfo *st_info, int level, char *fname);
7 | ST_retcode sf_stats_summarize_p (struct StataInfo *st_info, int level, char *fname);
8 | ST_retcode sf_stats_summarize_w (struct StataInfo *st_info, int level, char *fname);
9 | ST_retcode sf_stats_transform (struct StataInfo *st_info, int level);
10 | ST_retcode sf_stats_hdfe (struct StataInfo *st_info, int level);
11 |
12 | void sf_stats_hdfe_index (
13 | struct StataInfo *st_info,
14 | GT_size *index_st);
15 |
16 | ST_retcode sf_stats_hdfe_read (
17 | struct StataInfo *st_info,
18 | ST_double *X,
19 | ST_double *w,
20 | void *FE,
21 | GT_size *nj,
22 | GT_size *index_st);
23 |
24 | ST_retcode sf_stats_hdfe_write (
25 | struct StataInfo *st_info,
26 | ST_double *X,
27 | GT_size *nj,
28 | GT_size *index_st);
29 |
30 | ST_retcode sf_stats_hdfe_absorb(
31 | struct GtoolsHash *AbsorbHashes,
32 | GtoolsAlgorithmHDFE AlgorithmHDFE,
33 | ST_double *stats,
34 | GT_size *maps,
35 | GT_size J,
36 | GT_size kabs,
37 | GT_size kx,
38 | GT_size *nj,
39 | GT_size *njptr,
40 | ST_double *xptr,
41 | ST_double *wptr,
42 | ST_double hdfetol,
43 | GT_size benchmark);
44 |
45 | #endif
46 |
--------------------------------------------------------------------------------
/src/stata.toc:
--------------------------------------------------------------------------------
1 | v 1.11.8
2 | d Mauricio Caceres Bravo, mauricio.caceres.bravo@gmail.com
3 | p 'GTOOLS': Faster implementation of common Stata commands for big data
4 |
--------------------------------------------------------------------------------
/src/test/bench_v2/glevelsof:
--------------------------------------------------------------------------------
1 | version| id| N| J| stata| gtools| ratio
2 | 2| 1| 100000| 10| .087| .133| .65413533834586
3 | 2| 2| 100000| 10| .023| .018| 1.2777777777778
4 | 2| 3| 100000| 10| .025| .024| 1.0416666666667
5 | 2| 4| 100000| 10| .024| .024| 1
6 | 2| 5| 100000| 10| .171| .022| 7.7727272727273
7 | 2| 6| 100000| 10| .153| .021| 7.2857142857143
8 | 2| 7| 1000000| 10| .208| .106| 1.9622641509434
9 | 2| 8| 1000000| 10| .174| .103| 1.6893203883495
10 | 2| 9| 1000000| 10| .206| .149| 1.3825503355705
11 | 2| 10| 1000000| 10| .206| .144| 1.4305555555556
12 | 2| 11| 1000000| 10| 2.848| .141| 20.198581560284
13 | 2| 12| 1000000| 10| 3.063| .141| 21.723404255319
14 | 2| 13| 10000000| 10| 2.066| .964| 2.143153526971
15 | 2| 14| 10000000| 10| 1.868| 1.085| 1.7216589861751
16 | 2| 15| 10000000| 10| 2.242| 1.617| 1.3865182436611
17 | 2| 16| 10000000| 10| 2.058| 1.275| 1.6141176470588
18 | 2| 17| 10000000| 10| 52.763| 1.461| 36.114305270363
19 | 2| 18| 10000000| 10| 52.36| 1.387| 37.7505407354
20 |
--------------------------------------------------------------------------------
/src/test/bench_v2/gquantiles_by:
--------------------------------------------------------------------------------
1 | version| id| N| J| stata| gtools| ratio
2 | 2| 1| 100000| 10| .242| .116| 2.0862068965517
3 | 2| 2| 100000| 10| .191| .053| 3.6037735849057
4 | 2| 3| 100000| 10| .164| .057| 2.8771929824561
5 | 2| 4| 100000| 10| .173| .059| 2.9322033898305
6 | 2| 5| 100000| 10| .176| .058| 3.0344827586207
7 | 2| 6| 100000| 10| .195| .061| 3.1967213114754
8 | 2| 7| 100000| 10| .19| .061| 3.1147540983607
9 | 2| 8| 1000000| 10| 2.109| .486| 4.3395061728395
10 | 2| 9| 1000000| 10| 2.118| .516| 4.1046511627907
11 | 2| 10| 1000000| 10| 2.063| .51| 4.0450980392157
12 | 2| 11| 1000000| 10| 2.132| .579| 3.6822107081174
13 | 2| 12| 1000000| 10| 2.207| .521| 4.236084452975
14 | 2| 13| 1000000| 10| 2.412| .543| 4.4419889502762
15 | 2| 14| 1000000| 10| 2.44| .544| 4.4852941176471
16 | 2| 15| 10000000| 10| 28.178| 5.258| 5.3590718904526
17 | 2| 16| 10000000| 10| 28.556| 5.396| 5.2920681986657
18 | 2| 17| 10000000| 10| 28.04| 5.297| 5.2935623938078
19 | 2| 18| 10000000| 10| 29.124| 5.883| 4.9505354411015
20 | 2| 19| 10000000| 10| 30.825| 5.443| 5.6632371853757
21 | 2| 20| 10000000| 10| 32.669| 5.638| 5.7944306491664
22 | 2| 21| 10000000| 10| 31.434| 5.63| 5.5833037300178
23 |
--------------------------------------------------------------------------------
/src/test/bench_v2/gquantiles_pctile:
--------------------------------------------------------------------------------
1 | version| id| N| J| stata| gtools| ratio
2 | 2| 1| 100000| 10| .079| .024| 3.2916666666667
3 | 2| 2| 1000000| 10| .871| .154| 5.6558441558442
4 | 2| 3| 10000000| 10| 14.8| 1.605| 9.2211838006231
5 |
--------------------------------------------------------------------------------
/src/test/bench_v2/gquantiles_xtile:
--------------------------------------------------------------------------------
1 | version| id| N| J| stata| gtools| ratio
2 | 2| 1| 100000| 10| .351| .098| 3.5816326530612
3 | 2| 2| 1000000| 10| 3.566| .288| 12.381944444444
4 | 2| 3| 10000000| 10| 50.886| 2.856| 17.817226890756
5 |
--------------------------------------------------------------------------------
/src/test/bench_v2/gstats_sum:
--------------------------------------------------------------------------------
1 | version| id| N| J| stata| gtools| ratio
2 | 2| 1| 100000| 10| .09| .154| .58441558441558
3 | 2| 2| 1000000| 10| 1.219| .336| 3.6279761904762
4 | 2| 3| 10000000| 10| 19.135| 3.844| 4.9778876170656
5 |
--------------------------------------------------------------------------------
/src/test/bench_v2/gstats_tab:
--------------------------------------------------------------------------------
1 | version| id| N| J| stata| gtools| ratio
2 | 2| 1| 100000| 10| .249| .292| .8527397260274
3 | 2| 2| 100000| 10| .235| .035| 6.7142857142857
4 | 2| 3| 100000| 10| .252| .038| 6.6315789473684
5 | 2| 4| 100000| 10| .245| .039| 6.2820512820513
6 | 2| 5| 100000| 10| .242| .033| 7.3333333333333
7 | 2| 6| 100000| 10| .257| .035| 7.3428571428571
8 | 2| 7| 1000000| 10| 2.254| .224| 10.0625
9 | 2| 8| 1000000| 10| 1.708| .217| 7.8709677419355
10 | 2| 9| 1000000| 10| 2.1| .246| 8.5365853658537
11 | 2| 10| 1000000| 10| 2.008| .259| 7.7528957528958
12 | 2| 11| 1000000| 10| 2.382| .248| 9.6048387096774
13 | 2| 12| 1000000| 10| 2.792| .24| 11.633333333333
14 | 2| 13| 10000000| 10| 28.533| 2.405| 11.864033264033
15 | 2| 14| 10000000| 10| 23.831| 2.252| 10.58214920071
16 | 2| 15| 10000000| 10| 28.836| 2.711| 10.636665437108
17 | 2| 16| 10000000| 10| 27.756| 2.613| 10.622273249139
18 | 2| 17| 10000000| 10| 27.384| 2.492| 10.988764044944
19 | 2| 18| 10000000| 10| 33.357| 2.627| 12.69775409212
20 |
--------------------------------------------------------------------------------
/src/test/bench_v2/gstats_winsor:
--------------------------------------------------------------------------------
1 | version| id| N| J| stata| gtools| ratio
2 | 2| 1| 100000| 10| .164| .027| 6.0740740740741
3 | 2| 2| 1000000| 10| 1.875| .628| 2.9856687898089
4 | 2| 3| 10000000| 10| 22.507| 4.983| 4.5167569737106
5 |
--------------------------------------------------------------------------------
/src/test/bench_v2/gstats_winsor_by:
--------------------------------------------------------------------------------
1 | version| id| N| J| stata| gtools| ratio
2 | 2| 1| 100000| 10| .566| .059| 9.5932203389831
3 | 2| 2| 100000| 10| .547| .033| 16.575757575758
4 | 2| 3| 100000| 10| .479| .034| 14.088235294118
5 | 2| 4| 100000| 10| .517| .04| 12.925
6 | 2| 5| 100000| 10| .516| .034| 15.176470588235
7 | 2| 6| 100000| 10| .664| .037| 17.945945945946
8 | 2| 7| 100000| 10| .689| .041| 16.80487804878
9 | 2| 8| 1000000| 10| 7.242| .29| 24.972413793103
10 | 2| 9| 1000000| 10| 7.772| .343| 22.65889212828
11 | 2| 10| 1000000| 10| 6.87| .313| 21.948881789137
12 | 2| 11| 1000000| 10| 7.721| .389| 19.848329048843
13 | 2| 12| 1000000| 10| 7.561| .44| 17.184090909091
14 | 2| 13| 1000000| 10| 9.694| .353| 27.461756373938
15 | 2| 14| 1000000| 10| 9.235| .346| 26.690751445087
16 | 2| 15| 10000000| 10| 97.966| 2.855| 34.313835376532
17 | 2| 16| 10000000| 10| 106.955| 3.063| 34.918380672543
18 | 2| 17| 10000000| 10| 91.639| 3.014| 30.404445919044
19 | 2| 18| 10000000| 10| 103.061| 3.555| 28.990436005626
20 | 2| 19| 10000000| 10| 97.148| 3.108| 31.2574002574
21 | 2| 20| 10000000| 10| 120.579| 3.317| 36.351823937293
22 | 2| 21| 10000000| 10| 122.325| 3.357| 36.438784629133
23 |
--------------------------------------------------------------------------------
/src/test/bench_v2/material.json:
--------------------------------------------------------------------------------
1 | /home/mauricio/code/stata-gtools/docs/benchmarks/material.json
--------------------------------------------------------------------------------
/src/test/test_benchmarks.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | # import matplotlib.pyplot as plt
5 | import pandas as pd
6 | # import numpy as np
7 | import json
8 |
9 | palette = json.loads(open('bench_v2/material.json').read())
10 | df = pd.read_csv('bench_v2/gisid', delimiter = '|')
11 |
12 | # df['ix'] = np.arange(df.shape[0])
13 | # df[' '] = df[' '].astype('category')
14 |
15 | # int1
16 | # int1 int2
17 | # double1
18 | # double1 double2
19 | # str_short
20 | # str_short str_long
21 | # int1 double1 str_mid
22 |
--------------------------------------------------------------------------------
/src/test/test_pthreads.do:
--------------------------------------------------------------------------------
1 | version 13
2 | clear all
3 | set more off
4 | set varabbrev off
5 | set seed 1729
6 | set linesize 255
7 |
8 | if ( inlist("`c(os)'", "MacOSX") | strpos("`c(machine_type)'", "Mac") ) {
9 | local c_os_ macosx
10 | }
11 | else {
12 | local c_os_: di lower("`c(os)'")
13 | }
14 | log using gtools_pthreads_`c_os_'.log, text replace name(gtools_pthreads)
15 |
16 | set obs 1000
17 | gen rand = runiform()
18 | expand 20000
19 |
20 | global GTOOLS_FORCE_PARALLEL = 1
21 | gunique rand, b
22 | log close gtools_pthreads
23 |
--------------------------------------------------------------------------------