├── .gitignore ├── img ├── gather.jpg └── spread.jpg ├── tidy.pkg ├── .sublime2Stata.do ├── stata.toc ├── test.do ├── unite.ado ├── license.txt ├── README.md ├── unite.sthlp ├── spread.sthlp ├── gather.sthlp ├── gather.ado └── spread.ado /.gitignore: -------------------------------------------------------------------------------- 1 | .sublime* 2 | data/.sublime* -------------------------------------------------------------------------------- /img/gather.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matthieugomez/tidy/HEAD/img/gather.jpg -------------------------------------------------------------------------------- /img/spread.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matthieugomez/tidy/HEAD/img/spread.jpg -------------------------------------------------------------------------------- /tidy.pkg: -------------------------------------------------------------------------------- 1 | d tidy 2 | d {bf: M. Gomez} 3 | d Distribution-Date: 20170710 4 | f gather.ado 5 | f spread.ado 6 | f unite.ado 7 | f gather.sthlp 8 | f spread.sthlp 9 | f unite.sthlp 10 | 11 | -------------------------------------------------------------------------------- /.sublime2Stata.do: -------------------------------------------------------------------------------- 1 | discard 2 | clear all 3 | set obs 100 4 | gen id = _n 5 | gen a = 3 6 | gen b = 4 7 | label variable a "price" 8 | label variable b "quantity" 9 | gather a b, label("label") 10 | spread variable value, label(label) 11 | -------------------------------------------------------------------------------- /stata.toc: -------------------------------------------------------------------------------- 1 | v 3 2 | d Matthieu Gomez, Princeton University (mattg@princeton.edu) 3 | p tidy TIDY - Stata implementation of the R package tidyr 4 | 5 | * l word-to-show path-or-url [description] 6 | * t: Directories within site: 7 | * t path [description] 8 | * Packages: 9 | * p pkgname [description] 10 | -------------------------------------------------------------------------------- /test.do: -------------------------------------------------------------------------------- 1 | discard 2 | clear all 3 | set obs 100 4 | gen id = _n 5 | gen ___1 = 3 6 | gen ___2 = 4 7 | label variable ___1 "price" 8 | label variable ___2 "quantity" 9 | fastreshape long ___, i(id) j(new) 10 | 11 | 12 | gather a b 13 | spread variable value 14 | 15 | 16 | 17 | discard 18 | clear all 19 | set obs 100 20 | gen id = _n 21 | gen a = 3 22 | gen b = 4 23 | label variable a "price" 24 | label variable b "quantity" 25 | gather a b, label("label") 26 | spread variable value, label(label) -------------------------------------------------------------------------------- /unite.ado: -------------------------------------------------------------------------------- 1 | program define unite 2 | version 12.1 3 | syntax varlist, gen(string) [sep(string)] 4 | confirm new variable `gen' 5 | 6 | 7 | if "`sep'" == ""{ 8 | local sep "_" 9 | } 10 | local i = 0 11 | foreach v in `varlist'{ 12 | local i = `i' + 1 13 | if `i' > 1 { 14 | local script "`script' + "`sep'" +" 15 | } 16 | cap confirm string variable `v' 17 | if _rc{ 18 | local script `script' string(`v') 19 | } 20 | else{ 21 | local script `script' `v' 22 | } 23 | } 24 | gen `gen' = `script' 25 | end 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Matthieu Gomez 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | stata-tidy 2 | =========== 3 | 4 | This is a basic implementation of the [tidyr package](https://github.com/hadley/tidyr) from R. 5 | ## gather 6 | gather transforms a wide dataset into a long dataset (i.e. reshape long). The command takes a list of variables as argument. This list corresponds to variables to gather. 7 | 8 | Use the option `label` to save the variable labels as a new variable 9 | 10 | ![](img/gather.jpg) 11 | 12 | 13 | ## spread 14 | spread transforms a long dataset into a wide dataset (i.e. reshape wide). The command takes two variable names as argument. The first variable contains the new variable names. The second variable contains the new variable values. 15 | 16 | ![](img/spread.jpg) 17 | 18 | 19 | ## Installation 20 | ``` 21 | net install tidy, from("https://raw.githubusercontent.com/matthieugomez/tidy.ado/master/") 22 | ``` 23 | 24 | If you have a version of Stata < 13, you need to install it manually: 25 | 1. Click the "Download ZIP" button in the right column to download a zipfile. Extract it into a folder (e.g. ~/SOMEFOLDER) 26 | 2. Run: (changing SOMEFOLDER with whatever you picked) 27 | ``` 28 | cap ado uninstall tidy 29 | net install tidy, from("~/SOMEFOLDER") 30 | ``` -------------------------------------------------------------------------------- /unite.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* *! version 1.0 10jul2017}{...} 3 | {viewerjumpto "Syntax" "unite##syntax"}{...} 4 | {viewerjumpto "Description" "unite##description"}{...} 5 | {viewerjumpto "Options" "unite##options"}{...} 6 | {viewerjumpto "Examples" "unite##examples"}{...} 7 | 8 | {title:Title} 9 | {bf:unite} {hline 2} Paste together multiple variables into one 10 | 11 | {marker syntax}{...} 12 | {title:Syntax} 13 | {p 8 15 2}{cmd:unite} {varlist} {cmd:,} {opt gen(newvar)} {cmd:[}{opt sep(string)}{cmd:]} 14 | 15 | {marker description}{...} 16 | {title:Description} 17 | {pstd} 18 | {cmd:unite} pastes together a list of variables {varlist} into one. Its goal is similar to the homonym function in the R package tidyr. 19 | 20 | {marker options}{...} 21 | {title:Options} 22 | {synoptset 30 tabbed}{...} 23 | {synopthdr} 24 | {synoptline} 25 | {synopt :{opt sep(newvar)}} Name of the new variable {p_end} 26 | {synopt :{opt sep(string)}} Separator between variables. Default to _ {p_end} 27 | {synoptline} 28 | {p2colreset}{...} 29 | 30 | {marker examples}{...} 31 | {title:Examples} 32 | {phang2}{cmd:. set obs 100}{p_end} 33 | {phang2}{cmd:. gen var1 = "a"}{p_end} 34 | {phang2}{cmd:. gen var2 = "b"}{p_end} 35 | {phang2}{cmd:. unite var1 var2, gen(var3)}{p_end} 36 | 37 | {marker contact}{...} 38 | {title:Author} 39 | 40 | {phang} 41 | Matthieu Gomez 42 | 43 | {phang} 44 | Department of Economics, Princeton University 45 | 46 | {phang} 47 | Please report issues on Github 48 | {browse "https://github.com/matthieugomez/stata-tidy":https://github.com/matthieugomez/stata-tidy} 49 | {p_end} -------------------------------------------------------------------------------- /spread.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* *! version 1.0 10jul2017}{...} 3 | {vieweralsosee "reshape" "help reshape"}{...} 4 | {viewerjumpto "Syntax" "spread##syntax"}{...} 5 | {viewerjumpto "Description" "spread##description"}{...} 6 | {viewerjumpto "Options" "spread##options"}{...} 7 | {viewerjumpto "Examples" "spread##examples"}{...} 8 | 9 | {title:Title} 10 | {bf:spread} {hline 2} An easier way to reshape wide 11 | 12 | {marker syntax}{...} 13 | {title:Syntax} 14 | {p 8 15 2}{cmd:spread} {it:variable} {it:value} {cmd:, [}{opt label(varname)}{cmd:]} 15 | 16 | {marker description}{...} 17 | {title:Description} 18 | {pstd} 19 | {cmd:spread} spreads a variable value pair across multiple columns. It is a simpler version of reshape wide. Its goal is similar to the homonym function in the R package tidyr. 20 | 21 | {marker options}{...} 22 | {title:Options} 23 | {synoptset 30 tabbed}{...} 24 | {synopthdr} 25 | {synoptline} 26 | {synopt :{opt label(varname)}} uses the string variable varname to construct variable labels for new variables {p_end} 27 | {synoptline} 28 | {p2colreset}{...} 29 | 30 | {marker examples}{...} 31 | {title:Examples} 32 | {phang2}{cmd:. sysuse educ99gdp.dta, clear}{p_end} 33 | {phang2}{cmd:. gather public private}{p_end} 34 | {phang2}{cmd:. spread variable value}{p_end} 35 | 36 | {marker contact}{...} 37 | {title:Author} 38 | 39 | {phang} 40 | Matthieu Gomez 41 | 42 | {phang} 43 | Department of Economics, Princeton University 44 | 45 | {phang} 46 | Please report issues on Github 47 | {browse "https://github.com/matthieugomez/stata-tidy":https://github.com/matthieugomez/stata-tidy} 48 | {p_end} 49 | -------------------------------------------------------------------------------- /gather.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* *! version 1.0 10jul2017}{...} 3 | {vieweralsosee "reshape" "help reshape"}{...} 4 | {viewerjumpto "Syntax" "gather##syntax"}{...} 5 | {viewerjumpto "Description" "gather##description"}{...} 6 | {viewerjumpto "Options" "gather##options"}{...} 7 | {viewerjumpto "Examples" "gather##examples"}{...} 8 | 9 | {title:Title} 10 | {bf:gather} {hline 2} An easier way to reshape long 11 | 12 | {marker syntax}{...} 13 | {title:Syntax} 14 | {p 8 15 2}{cmd:gather} {varlist} {cmd:, [}{opt variable(newvar)} {opt value(newvar)} {opt label(newvar)}{cmd:]} 15 | 16 | {marker description}{...} 17 | {title:Description} 18 | {pstd} 19 | {cmd:gather} takes a list of variables {varlist} and collapses into variable-value pairs. It is a simpler version of reshape long. Its goal is similar to the homonym function in the R package tidyr. 20 | 21 | {marker options}{...} 22 | {title:Options} 23 | {synoptset 30 tabbed}{...} 24 | {synopthdr} 25 | {synoptline} 26 | {synopt :{opt variable(newvar)}} name of new variable corresponding to variable names. Defaults to "variable" {p_end} 27 | {synopt :{opt value(newvar)}} name of new variable corresponding to variable values. Defaults to "values" {p_end} 28 | {synopt :{opt label(newvar)}} creates a new variable to store the variable labels of {it:varlist} {p_end} 29 | {synoptline} 30 | {p2colreset}{...} 31 | 32 | {marker examples}{...} 33 | {title:Examples} 34 | {phang2}{cmd:. sysuse educ99gdp.dta, clear}{p_end} 35 | {phang2}{cmd:. gather public private}{p_end} 36 | 37 | 38 | {marker contact}{...} 39 | {title:Author} 40 | 41 | {phang} 42 | Matthieu Gomez 43 | 44 | {phang} 45 | Department of Economics, Princeton University 46 | 47 | {phang} 48 | Please report issues on Github 49 | {browse "https://github.com/matthieugomez/stata-tidy":https://github.com/matthieugomez/stata-tidy} 50 | {p_end} -------------------------------------------------------------------------------- /gather.ado: -------------------------------------------------------------------------------- 1 | program define gather 2 | version 12.1 3 | syntax varlist[, variable(string) value(string) label(string) fast] 4 | 5 | if ("`fast'" == "") preserve 6 | 7 | if "`variable'"==""{ 8 | local variable variable 9 | } 10 | if "`value'"==""{ 11 | local value value 12 | } 13 | cap confirm new variable `variable' 14 | if _rc{ 15 | di as error "variable `variable' already exists. Change default name of new variable with option variable()" 16 | exit 17 | } 18 | cap confirm new variable `value' 19 | if _rc{ 20 | di as error "variable `value' already exists. Change default name of new variable with option value()" 21 | exit 22 | } 23 | 24 | qui ds `varlist' 25 | local varlist `r(varlist)' 26 | 27 | /* check same type */ 28 | local type "" 29 | foreach v in `varlist'{ 30 | cap confirm string var `v' 31 | if _rc == 0{ 32 | if "`type'" == "numeric"{ 33 | di as error "Variables to gather do not have the same type" 34 | exit 198 35 | } 36 | else{ 37 | local type "string" 38 | } 39 | } 40 | else{ 41 | if "`type'" == "string"{ 42 | di as error "Variables to gather do not have the same type" 43 | exit 198 44 | } 45 | else{ 46 | local type "numeric" 47 | } 48 | } 49 | } 50 | 51 | 52 | cap ds ____* 53 | if _rc == 0 { 54 | display as error "Please rename variables staring with ____ first" 55 | exit 4 56 | } 57 | 58 | local i = 0 59 | qui ds `varlist', not 60 | local ivar `r(varlist)' 61 | if "`ivar'" ~= ""{ 62 | cap bys `ivar': assert _N == 1 63 | if _rc { 64 | display as error "key variables do not uniquely identify the observations" 65 | exit 4 66 | } 67 | } 68 | else{ 69 | tempvar ivar 70 | gen `ivar' = 1 71 | } 72 | tempname tempdup 73 | 74 | 75 | 76 | local names "" 77 | foreach v in `varlist'{ 78 | local i = `i'+1 79 | local l`i' : variable label `v' 80 | rename `v' ____`i' 81 | } 82 | cap which greshape 83 | if _rc == 0{ 84 | local reshape greshape 85 | } 86 | else{ 87 | local reshape reshape 88 | } 89 | reshape long ____, i(`ivar') j(`variable') string `fast' 90 | if _rc{ 91 | if _rc== 103{ 92 | display as error "too many variables specified" 93 | } 94 | else{ 95 | display as error "reshape terminated with error" 96 | } 97 | local i = 0 98 | foreach v in `varlist'{ 99 | local i = `i'+1 100 | rename ____`i' `v' 101 | } 102 | display as error "reshape terminated with error" 103 | exit _rc 104 | } 105 | else{ 106 | rename ____ `value' 107 | tokenize `varlist' 108 | local i = 0 109 | if "`label'"~=""{ 110 | gen `label'="" 111 | order `ivar' `variable' `label' `value' 112 | local i = 0 113 | foreach name in `varlist'{ 114 | local i = `i'+1 115 | qui replace `label' = "`l`i''" if `variable' == "`i'" 116 | } 117 | } 118 | local i =0 119 | foreach name in `varlist'{ 120 | local i = `i'+1 121 | qui replace `variable' = "``i''" if `variable' == "`i'" 122 | } 123 | } 124 | 125 | if ("`fast'" == "") cap restore, not 126 | end 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /spread.ado: -------------------------------------------------------------------------------- 1 | program define spread 2 | version 12.1 3 | syntax varlist, [variable(varname) value(varname) label(string) fast] 4 | 5 | 6 | if ("`fast'" == "") preserve 7 | 8 | tokenize `varlist' 9 | local variable `1' 10 | local value `2' 11 | if "`value'" == ""{ 12 | di as error `"The correct syntax is "spread namevariable valuevariable". The valuevariable is missing."' 13 | exit 4 14 | } 15 | qui{ 16 | 17 | /* take care of label */ 18 | if "`label'" == "" & "`:value label `variable''" ~= ""{ 19 | tempvar label 20 | decode `variable', gen(`label') 21 | } 22 | 23 | /* create variable`i' and label`i' */ 24 | sort `variable' 25 | tempvar bylength 26 | bys `variable' : gen double `bylength' = _N 27 | local start = 1 28 | local i = 0 29 | while `start' <= _N { 30 | local i = `i' + 1 31 | local end = `start' + `=`bylength'[`start']' - 1 32 | local variable_levels `"`variable_levels' `"`=`variable'[`start']'"'"' 33 | if "`label'" ~= ""{ 34 | local label_levels `"`label_levels' `"`=`label'[`start']'"'"' 35 | } 36 | local start = `end' + 1 37 | } 38 | local n = `i' 39 | 40 | ds `variable' `value' `label' `bylength' , not 41 | local ivar `r(varlist)' 42 | if "`ivar'" == ""{ 43 | tempvar newivar 44 | gen `newivar' = _N 45 | local ivar `newivar' 46 | } 47 | 48 | 49 | 50 | 51 | cap confirm string variable `variable' 52 | if _rc{ 53 | local string "" 54 | } 55 | else{ 56 | local string string 57 | tempvar temp 58 | gen `temp' = !regexm(`variable',"^[a-zA-Z_]*[a-zA-Z\_0-9]*$") 59 | count if `temp' == 1 60 | if `r(N)' > 0 { 61 | levelsof `variable' if `temp' == 1 62 | display as error `"Some observations for `variable' don't have valid variable names: `=r(levels)'"' 63 | exit 4 64 | } 65 | foreach v in `ivar'{ 66 | local i : list posof "`v'" in variable_levels 67 | if `i' ~= 0{ 68 | display as error `"The value `v' for the variable "`variable'" conflicts with existing variables"' 69 | exit 4 70 | } 71 | } 72 | } 73 | /* manage when more than 10 id variables */ 74 | loca ni `:word count `ivar'' 75 | if `ni' > 10{ 76 | tempvar id 77 | bys `ivar': gen `id' = 1 78 | qui replace `id' = sum(`id') 79 | local i `id' 80 | } 81 | else{ 82 | local i `ivar' 83 | } 84 | 85 | /* reshape */ 86 | drop `bylength' `label' 87 | cap which greshape 88 | if _rc == 0{ 89 | local reshape greshape 90 | } 91 | else{ 92 | local reshape reshape 93 | } 94 | qui `reshape' wide `value', i(`i') j(`variable') `string' `fast' 95 | 96 | 97 | /* check all new variable names are valid new variable name */ 98 | if "`string'" == ""{ 99 | local change "no" 100 | } 101 | else{ 102 | forval i = 1/`n'{ 103 | local v : word `i' of `variable_levels' 104 | cap confirm new variable `v' 105 | if _rc{ 106 | local change "no" 107 | } 108 | } 109 | } 110 | 111 | 112 | forval i = 1/`n'{ 113 | local v : word `i' of `variable_levels' 114 | if "`change'" != "no"{ 115 | rename `value'`v' `v' 116 | local names `names' `v' 117 | if "`label'" ~= ""{ 118 | local l : word `i' of `label_levels' 119 | label variable `v' `"`l'"' 120 | } 121 | } 122 | else{ 123 | local names `names' `value'`v' 124 | if "`label'" ~= ""{ 125 | local l : word `i' of `label_levels' 126 | label variable `value'`v' `"`l'"' 127 | } 128 | } 129 | } 130 | di as result "new variables created: " as text "`=subinstr("`names'", " ", ", ", .)'" 131 | } 132 | 133 | if ("`fast'" == "") cap restore, not 134 | end --------------------------------------------------------------------------------