├── .gitignore ├── numplusone ├── numplusone.sthlp ├── stata.toc ├── numplusone.pkg └── numplusone.ado ├── docs ├── educationdata.zip ├── stata.toc ├── educationdata.pkg └── educationdata.ado ├── educationdata ├── stata.toc └── educationdata.pkg ├── license.txt ├── helper-programs ├── stata_code_example.csv ├── api-to-sthlp.py ├── sthlp_table.txt └── api-to-stata-examples.py ├── awards-by-major-example.do ├── README.md └── changelog.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store -------------------------------------------------------------------------------- /numplusone/numplusone.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {right:version 0.0.1} 3 | -------------------------------------------------------------------------------- /docs/educationdata.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UrbanInstitute/education-data-package-stata/HEAD/docs/educationdata.zip -------------------------------------------------------------------------------- /numplusone/stata.toc: -------------------------------------------------------------------------------- 1 | v 0.0.1 2 | d 'NUMPLUSONE': a function that adds one number to an input number and prints 3 | d the result. 4 | p numplusone -------------------------------------------------------------------------------- /docs/stata.toc: -------------------------------------------------------------------------------- 1 | v 0.4.3 2 | d 'EDUCATIONDATA': The package for accessing the Education Data Portal https://educationdata.urban.org/ 3 | p educationdata -------------------------------------------------------------------------------- /educationdata/stata.toc: -------------------------------------------------------------------------------- 1 | v 0.4.3 2 | d 'EDUCATIONDATA': The package for accessing the Education Data Portal https://educationdata.urban.org/ 3 | p educationdata -------------------------------------------------------------------------------- /numplusone/numplusone.pkg: -------------------------------------------------------------------------------- 1 | d 'NUMPLUSONE': a function that adds one number to an input number and prints {break} 2 | d the result. 3 | d 4 | d Distribution-Date: 20180305 5 | d 6 | F numplusone.ado 7 | F numplusone.sthlp -------------------------------------------------------------------------------- /docs/educationdata.pkg: -------------------------------------------------------------------------------- 1 | d 'EDUCATIONDATA': The package for accessing the Education Data Portal 2 | d https://educationdata.urban.org/ 3 | d 4 | d Distribution-Date: 20220926 5 | d 6 | F educationdata.ado 7 | F educationdata.sthlp -------------------------------------------------------------------------------- /educationdata/educationdata.pkg: -------------------------------------------------------------------------------- 1 | d 'EDUCATIONDATA': The package for accessing the Education Data Portal 2 | d https://educationdata.urban.org/ 3 | d 4 | d Distribution-Date: 20220926 5 | d 6 | F educationdata.ado 7 | F educationdata.sthlp -------------------------------------------------------------------------------- /numplusone/numplusone.ado: -------------------------------------------------------------------------------- 1 | /*** DO NOT EDIT THIS LINE ----------------------------------------------------- 2 | Version: 0.0.1 3 | Title: numplusone 4 | Description: input a number, it will add one to it and print the output 5 | [Education Data Package](http://www.github.com/UI-Research/education-data-package-stata) website 6 | ----------------------------------------------------- DO NOT EDIT THIS LINE ***/ 7 | 8 | prog define numplusone 9 | 10 | args num 11 | 12 | display `num' + 1 13 | 14 | end 15 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 The Urban Institute 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /helper-programs/stata_code_example.csv: -------------------------------------------------------------------------------- 1 | endpoint_id,stata_code 2 | 1,"educationdata using \""college ipeds directory\"", sub(year=2003)" 3 | 2,"educationdata using \""college ipeds institutional-characteristics\"", sub(year=1990)" 4 | 3,"educationdata using \""college ipeds admissions-enrollment\"", sub(year=2001)" 5 | 4,"educationdata using \""college ipeds admissions-requirements\"", sub(year=2016)" 6 | 5,"educationdata using \""college ipeds academic-charges-professional\"", sub(year=2006)" 7 | 6,"educationdata using \""college ipeds academic-charges-general\"", sub(year=2009)" 8 | 7,"educationdata using \""college ipeds program-year-charges\"", sub(year=2001)" 9 | 8,"educationdata using \""college ipeds program-year-charges-cip\"", sub(year=2013)" 10 | 9,"educationdata using \""college ipeds enrollment-full-time-equivalent\"", sub(year=2004 level_of_study=2)" 11 | 10,"educationdata using \""college ipeds fall-enrollment\"", sub(year=2000 level_of_study=undergraduate)" 12 | 11,"educationdata using \""college ipeds fall-enrollment race\"", sub(year=2007 level_of_study=graduate)" 13 | 12,"educationdata using \""college ipeds fall-enrollment sex\"", sub(year=2001 level_of_study=undergraduate)" 14 | 13,"educationdata using \""college ipeds fall-enrollment race sex\"", sub(year=2015 level_of_study=undergraduate)" 15 | 14,"educationdata using \""college ipeds fall-enrollment age\"", sub(year=2011 level_of_study=graduate)" 16 | 15,"educationdata using \""college ipeds fall-enrollment age sex\"", sub(year=2008 level_of_study=undergraduate)" 17 | 16,"educationdata using \""college ipeds fall-retention\"", sub(year=2003)" 18 | 17,"educationdata using \""college ipeds student-faculty-ratio\"", sub(year=2014)" 19 | 18,"educationdata using \""college ipeds student-financial-aid\"", sub(year=2002 level_of_study=1)" 20 | 19,"educationdata using \""college ipeds grad-rates-200pct\"", sub(year=2007)" 21 | 20,"educationdata using \""college ipeds grad-rates\"", sub(year=2002)" 22 | 21,"educationdata using \""college ipeds grad-rates-pell\"", sub(year=2015)" 23 | 22,"educationdata using \""college ipeds outcome-measures\"", sub(year=2015)" 24 | 23,"educationdata using \""college ipeds completions-cip\"", sub(year=2009)" 25 | 24,"educationdata using \""school ccd directory\"", sub(year=1988)" 26 | 25,"educationdata using \""school ccd enrollment\"", sub(year=2014 grade=8)" 27 | 26,"educationdata using \""school ccd enrollment race\"", sub(year=2013 grade=3)" 28 | 27,"educationdata using \""school ccd enrollment sex\"", sub(year=2012 grade=5)" 29 | 28,"educationdata using \""school ccd enrollment race sex\"", sub(year=2007 grade=6)" 30 | 29,"educationdata using \""district ccd finance\"", sub(year=2013)" 31 | 30,"educationdata using \""district saipe\"", sub(year=2008)" 32 | 31,"educationdata using \""college ipeds completers\"", sub(year=2012)" 33 | 32,"educationdata using \""college ipeds enrollment-headcount\"", sub(year=2011 level_of_study=1)" 34 | -------------------------------------------------------------------------------- /awards-by-major-example.do: -------------------------------------------------------------------------------- 1 | *Use Urban Institute's Education Data Portal to pull number of graduates by race*gender in a specificied major*college, 1994-2016 2 | *Source: IPEDS awards by 6-digit CIP code (Documentation: https://educationdata.urban.org/documentation/colleges.html#ipeds-awards-by-6-digit-cip-code) 3 | 4 | *Install the educationdata Stata package if you do not already have it installed 5 | *More information and troubleshooting is available here: https://github.com/UrbanInstitute/education-data-package-stata 6 | ssc install libjson 7 | net install educationdata, replace from("https://urbaninstitute.github.io/education-data-package-stata/") 8 | 9 | *Choose a college, degree level, and major 10 | global college 243744 // College: example is Stanford (look up "IPEDS ID" at https://nces.ed.gov/collegenavigator) 11 | global level 7 // Level of degree: example is Bachelor's (see description of award_level at https://educationdata.urban.org/documentation/colleges.html#ipeds-awards-by-6-digit-cip-code for more) 12 | global major 450601 // Major: example is "Economics, general" (see https://nces.ed.gov/ipeds/cipcode/Default.aspx?y=56 for others) 13 | 14 | educationdata using "college ipeds completions-cip-6", sub(unitid=$college award_level=$level cipcode_6digit=$major year=1994:2016) clear // this gets degrees in chosen major by race and sex 15 | save temp, replace 16 | educationdata using "college ipeds completions-cip-6", sub(unitid=$college award_level=$level cipcode_6digit=99 year=1994:2016) clear // this gets all degrees by race and sex 17 | append using temp 18 | save awards, replace 19 | erase temp.dta 20 | 21 | *Build a chart showing a race*gender group as a percent of all students vs. of students in the specified major over time (modeled on chart in Grondin and Queiroz 2020: https://www.stanforddaily.com/2020/02/10/me-al-why-are-there-so-few-black-and-latinx-students-in-our-econ-classes/) 22 | 23 | use awards, clear 24 | drop if majornum==2 // drop data on second majors (only available 2000 onward) 25 | drop unitid fips award_level majornum // drop extraneous variables 26 | replace year=year+1 // define year as spring (when graduation generally occurs) instead of fall 27 | lab var year "Academic year (spring)" 28 | 29 | *Select race*gender group of interest 30 | global sex 2 // example is female (1=Male 2=Female) 31 | global race 2 // example is Black (1=White 2=Black 3=Hispanic 4=Asian 5=American Indian or Alaska Native 6=Native Hawaiian or other Pacific Islander 7=Two or more races 8=Nonresident alien 9=Unknown 99=Total) 32 | 33 | gen group=1 if sex==$sex & race==$race 34 | replace group=2 if sex==99 & race==99 35 | drop if group==. 36 | 37 | drop sex race 38 | reshape wide awards, i(year cipcode_6digit) j(group) 39 | gen percent = awards1/awards2 40 | drop awards1 awards2 41 | 42 | reshape wide percent, i(year) j(cipcode_6digit) 43 | lab var percent99 "Share of all students" 44 | lab var percent450601 "Share of economics majors" // Change variable label if you select a difference major 45 | 46 | twoway connected percent99 year || connected percent450601 year, ytitle("Percent who are Black Women") // Change axis title if you select a different race*gender group 47 | -------------------------------------------------------------------------------- /helper-programs/api-to-sthlp.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import re 4 | from operator import itemgetter 5 | 6 | # Python program to convert API endpoint to Stata help document 7 | url = 'https://educationdata-stg.urban.org/api/v1/api-endpoints/' 8 | res = requests.get(url).text 9 | data = json.loads(res) 10 | keep_str = [] 11 | 12 | # Temporarily hide endpoints 13 | hide = ["college-university ipeds academic-charges-professional","college-university ipeds academic-charges-general","college-university ipeds program-year-charges","college-university ipeds program-year-charges-cip","college-university ipeds student-financial-aid"] 14 | 15 | # Translate from long name to short name 16 | translate = {"college-university":"college", "schools":"school", "school-districts":"district"} 17 | 18 | # Break description up for Stata to read 19 | def break_desc(t1, t2): 20 | max_len = 80 21 | lineone = len(t1) 22 | temp_str = "" 23 | tsplit = re.sub(' +', ' ', t2) 24 | tsplit = tsplit.split() 25 | cur_line = " " + t1 26 | for t in tsplit: 27 | test_str = cur_line + " " + t 28 | if len(test_str) > max_len: 29 | temp_str = temp_str + cur_line[1:] + '\n' 30 | cur_line = " " + t 31 | else: 32 | cur_line = cur_line + " " + t 33 | temp_str = temp_str + cur_line[1:] + '\n' 34 | return temp_str.replace(t1 + " ", '') 35 | 36 | for i in data["results"]: 37 | res_str = "" 38 | ep = i["endpoint_url"].replace('/api/v1/','').split('/') 39 | for j in ep: 40 | if len(j) > 0: 41 | if j[0] != '{': 42 | res_str = res_str + " " + j 43 | res_str = res_str[1:] 44 | if i["description"] == "": i["description"] = "No description at this time." 45 | i["description"] = break_desc(res_str,i["description"]) 46 | rsplit = res_str.split() 47 | term = translate[rsplit[0]] 48 | for j in range(1,len(rsplit)): 49 | term += " " + rsplit[j] 50 | if res_str not in hide and i["hide"] == 0: 51 | keep_str.append(['{bf:"' + term + '"}: ' + i["description"] + '\n'.replace('\n\n\n','\n\n').replace('\n\n\n','\n\n'),res_str.split()[0],int(i["order"])]) 52 | 53 | # To do - sort and convert to string, and convert long names to short 54 | keep_str = sorted(keep_str, key=itemgetter(1,2)) 55 | prev_cat = "" 56 | perm_str = "" 57 | sover = 0 58 | dsname = {"college":"colleges","district":"school-districts","school":"schools"} 59 | for i in range(len(keep_str)): 60 | if keep_str[i][1] != prev_cat: 61 | sover = 0 62 | prev_cat = keep_str[i][1] 63 | if sover == 0: 64 | perm_str += '{bf:' + translate[keep_str[i][1]].title() + '} - {browse "https://educationdata.urban.org/documentation/' + dsname[translate[keep_str[i][1]]] + '.html":Read Complete Documentation}\n\n' 65 | sover = 1 66 | perm_str += keep_str[i][0] 67 | 68 | # replace url from HTML to STATA helper function language 69 | urls = {"https://developers.arcgis.com/rest/geocode/api-reference/geocoding-service-output.htm": "here", 70 | "https://www.urban.org/research/publication/ipeds-finance-user-guide": "here", 71 | "http://www.doe.virginia.gov/statistics_reports/index.shtml": "website"} 72 | 73 | for url in urls: 74 | url_html = fr'{urls[url]}' 75 | url_sthlp = f'{{browse "{url}":{urls[url]}}}' 76 | perm_str = re.sub(url_html, url_sthlp, perm_str) 77 | 78 | perm_str = re.sub("–", "–", perm_str) 79 | 80 | if ('href' in perm_str) or ("–" in perm_str): 81 | raise ValueError('Please fix the HTML URLs in the text.') 82 | 83 | with open('sthlp_table.txt', 'w') as f: 84 | f.write(perm_str) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Education Data Portal - Package for Stata 2 | 3 | Welcome to the [Education Data Portal](https://educationdata.urban.org) Stata Package repository. You'll need an internet connection to install and use the package. 4 | 5 | **NOTE**: By downloading and using this programming package, you agree to abide by the [Data Policy and Terms of Use of the Education Data Portal](https://educationdata.urban.org/documentation/#terms). 6 | 7 | ### Quickstart Example 8 | 9 | [This example](https://github.com/UrbanInstitute/education-data-package-stata/blob/master/awards-by-major-example.do) walks you through installing the package and using the Education Data Portal to pull data for a specific college and major by race and gender for all years. 10 | 11 | ### Install or Update the Package 12 | 13 | *Before you install the package, run the following:* 14 | 15 | ```stata 16 | ssc install libjson 17 | ``` 18 | 19 | Then, the following command will install, or if the package is already installed, update the education data package. 20 | 21 | ```stata 22 | ssc install educationdata, replace 23 | ``` 24 | 25 | If you are having trouble, first try running the following: 26 | 27 | ```stata 28 | adoupdate 29 | ``` 30 | 31 | Note that it can sometimes take a small amount of time for the package to be updated on SSC, as it involves a manual step to submit, per the maintainer's instructions. To get the latest version from Github, you can run: 32 | 33 | ```stata 34 | net install educationdata, replace from("https://urbaninstitute.github.io/education-data-package-stata/") 35 | ``` 36 | 37 | If you're still having trouble with this command, uninstall any existing installations using the instructions below, close and re-open Stata, then re-run the commands above. 38 | 39 | #### Error r(672) - Server refused to send file OR similar errors in installation 40 | 41 | If you run into the error `server refused to send file` or similar errors, your firewall may be blocking access to Stata's ability to download content, or Stata may have a conflict with other programs installed on your operating system or in your organization. Note that this may also block your ability to download data via an API, so this may not solve all concerns, but in most tested cases, this method should solve the problem. First, download the zip file of the package, by copying the following to your browser: 42 | 43 | ``` 44 | https://urbaninstitute.github.io/education-data-package-stata/educationdata.zip 45 | ``` 46 | 47 | Unzip the file, and do one of two things: 48 | 49 | 1) Type `. personal` in Stata, and note the location of your personal ado directoy, i.e. `C:\ado\personal\` 50 | 2) Move all the extracted files from the zip file to that personal directory 51 | 52 | OR 53 | 54 | 1) Note the filepath of the extracted files, e.g., `D:/Users/Username/Downloads/educationdata/` 55 | 2) Type `net install educationdata, force from("D:/Users/Username/Downloads/educationdata/")` in Stata, replacing the path in the `from()` command with the filepath you noted in step 1 56 | 57 | ### Testing the package 58 | 59 | First, ensure you are connected to the internet. Then run one of the simpler examples, such as getting the metadata for a dataset: 60 | 61 | ```stata 62 | educationdata using "college ipeds directory", meta 63 | ``` 64 | 65 | Next, try downloading a subset of the dataset: 66 | 67 | ``` 68 | educationdata using "college ipeds directory", sub(year=2011 fips=12) 69 | ``` 70 | 71 | Use the `help` command to read about all of the commands and dataset options: 72 | 73 | ``` 74 | help educationdata 75 | ``` 76 | 77 | ### Uninstall the Package 78 | 79 | ```stata 80 | ado uninstall educationdata 81 | ``` 82 | 83 | If you recieve the error `criterion matches more than one package` and you're running Stata 14 or newer, run the following: 84 | 85 | ```stata 86 | adoupdate 87 | ado uninstall educationdata 88 | ``` 89 | 90 | ### Changes 91 | 92 | To view the changes made to the package over time, see the [changelog](https://github.com/UrbanInstitute/education-data-package-stata/blob/master/changelog.md). 93 | 94 | ### License 95 | 96 | This software is licensed under the [MIT License](https://github.com/UrbanInstitute/education-data-package-stata/blob/master/license.txt). 97 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog for educationdata Stata Package 2 | 3 | #### 0.4.3 (2022-09-26) 4 | 5 | - Update message when using `csv` option to say: "Note that this function **temporarily** writes data to the current working directory" 6 | 7 | #### 0.4.2 (2022-08-02) 8 | 9 | - Report package version 10 | - Add example using multiple filters in summary endpoints to documentation 11 | - Fix error from aggregating by only year in summary endpoints 12 | - Update package to incorporate new endpoints and add new years to existing endpoints from latest release 13 | 14 | #### 0.4.1 (2021-04-20) 15 | 16 | - Update package to incorporate new endpoints and add new years to existing endpoints from latest release 17 | - Fix value labeling issue by adding replace option 18 | 19 | #### 0.4.0 20 | 21 | - Allow multiple grouping variables in summary endpoints 22 | - Fix missing value labels in summary endpoints 23 | - Add error handling in summary endpoints 24 | 25 | #### 0.3.9 26 | 27 | - Update package to incorporate new endpoints from latest release 28 | - Add summaries option to allow access to the new summary endpoint functionality 29 | 30 | #### 0.3.8 31 | 32 | - Fix precision issue for long integers by converting data type to double 33 | 34 | #### 0.3.7 35 | 36 | - Update package to incorporate IPEDS endpoint updates from latest release 37 | 38 | #### 0.3.6 39 | 40 | - Updated package help to include new endpoints from latest release 41 | - Fix widened formats for float variables by adding 'nofix' option when labelling values 42 | - Fix display issue for very long integers by converting data type from long to float 43 | 44 | #### 0.3.5 45 | 46 | - Update package help to include new endpoints from latest release 47 | 48 | #### 0.3.4 49 | 50 | - Fix error caused by lowercase fix introduced in 0.3.1 that causes columns to only show missing data if they had uppercase characters, instead of showing the actual data values 51 | 52 | #### 0.3.3 53 | 54 | - Updated package help to include new endpoints from latest release 55 | 56 | #### 0.3.2 57 | 58 | - Fix the variable labels and value labels where display html codes 59 | 60 | #### 0.3.1 61 | 62 | - Fix flaw where Stata reads in variables automatically as lowercase, conflicting with metadata. Converts all variables automatically to full lowercase 63 | 64 | #### 0.3.0 65 | 66 | - Add additional parser for grade_edfacts for 7/31/2019 release, requiring major code update 67 | - Manually update valid grades for CCD and EDFacts 68 | 69 | #### 0.2.6 70 | 71 | - Fix bug that added redundant "mode=stata" additions to URL 72 | - Change the data options to force all commands to lowercase 73 | 74 | #### 0.2.5 75 | 76 | - Updated package help to include new endpoints from latest release 77 | - Updated educationdata command to allow debug option for error reporting 78 | 79 | #### 0.2.4 80 | 81 | - Remove instructions to quote strings in subset, clarify the no space requirement between = in documentation. 82 | 83 | #### 0.2.3 84 | 85 | - Add multiple year examples, move examples to the top of the documentation, per user feedback 86 | 87 | #### 0.2.2 88 | 89 | - Add option to allow users to clear cache, for recent data updates when the technical team has not yet cleared the cache on the server 90 | 91 | #### 0.2.1 92 | 93 | - More clear instructions in the documentation and program for the csv option, specifically setting the working directory 94 | - Clear documentation on additional resources available in the Stata help file 95 | - Harmonize errors for invalid option selection so they are consistent, display correct list of options on error to help user 96 | - Fix error subsetting grade options for csv 97 | - Add mode=stata argument to all URLs to track usage of API from Stata 98 | 99 | #### 0.2.0 100 | 101 | - Add csv option to allow users of larger extracts to download from the CSV 102 | - Improve error messages, fix error message errors for fields in which the user types invalid values 103 | - Clear up documentation to add description of clear, csv options 104 | 105 | #### 0.1.8 106 | 107 | - Fix year parsing error in api-endpoints, that changed normal dash to ndash, to correctly parse years 108 | 109 | #### 0.1.7 110 | 111 | - Fix year parsing error in api-endpoints, that changed mdash to normal dash, to correctly parse years 112 | 113 | #### 0.1.6 114 | 115 | - Small wording improvements to error messages 116 | - Add staging option to give advanced users the option to test against the staging server, which may contain errors, bugs, or issues (do not use for normal purposes) 117 | 118 | #### 0.1.5 119 | 120 | - Functional change to underlying code to make it easy to change the base URL for launch 121 | - Correct allowed values for grade and level_of_study in help file 122 | - Add error messages when a list of multiple filtered values is not valid 123 | - Update float values to double, so that short decimals are stored efficiently, accurately and are easier to view 124 | 125 | #### 0.1.4 126 | 127 | - Adds option to specify pre-k and k grades as "-1" and "0", as specified in the documentation, adds this to Stata help 128 | - Fixes error in time estimate that produced blanks if the first API call had no records 129 | - Adds the ability to get the variable names, labels, and values only via "meta" or "metadata" option 130 | 131 | #### 0.1.3 132 | 133 | - Adds totals example to documentation, clarifies values for grade and level_of_study, and explains the use of 99 for totals only queries 134 | - Fixes year parsing error that did not correctly give all years of data for endpoints that skip years, so all data should now be returned for these calls 135 | - More helpful error message telling the user the specific option value that caused the error so they can correct it 136 | - Literal strings showing up as "" in Stata datasets should now all be blank 137 | 138 | #### 0.1.2 139 | 140 | - Fixes to documentation to ensure line-wraps are included 141 | - Fixes to documentation to include working FIPS example 142 | - Removes debugging statement from function that printed out the URL of each endpoint 143 | 144 | #### 0.1.1 145 | 146 | - Update documentation to reflect that you do not need to use quotes around the `subset()` and `columns()` arguments 147 | - Adds functionality that checks whether libjson is installed and installs it on first command if it is not 148 | - Adds a "clear" option to the command to clear your current data in memory before adding additional data 149 | - Properly formats variables whose names are not the same as their format names, such as Yes/No variables and Grade Offered 150 | - Fixes a bug that didn't allow grade selection for grades 1-12 and total (99), now short grade=1 selection should work properly 151 | 152 | #### 0.1.0 153 | 154 | - Skips validity checks for variable names if the endpoint returns no formatted variables, so no error is thrown that stops the program 155 | - Removes API endpoints that are hidden because they are not yet available, from the Stata help file 156 | - Fixes an error thrown when the length of the variable value definition is longer than 32 characters 157 | - Removes the Stata "clear" command and returns an error if the user already has a dataset in memory 158 | - Adds compression to the dataset after it is fully downloaded 159 | - Modifies time estimate language to be more sensible English 160 | - Adds a col() option to only keep certain variables once downloaded 161 | - Uses hard coded "short names" for college-university, school-districts, and schools 162 | - Orders the datasets in the Stata help file in the order specified by the API 163 | - Throws a helpful error when the user does not select a valid dataset name 164 | - Labels no longer have spaces before and after hyphens 165 | - Add feature that validates required options, and allows for shortened grade numbers (e.g., pk OR grade-pk) for grade inputs 166 | 167 | #### 0.0.3 168 | 169 | - Fixes the error that stopped the program when an API call returns no results 170 | 171 | #### 0.0.2 172 | 173 | - Adding formats "-1", "-2", and "-3" to all numeric variables without a format value 174 | - Providing a more useful time range estimate, given Stata can only measure the time when the program starts 175 | - Printing more detailed progress to the console for the user when the data are downloading 176 | 177 | #### 0.0.1 178 | 179 | Hello world! 180 | -------------------------------------------------------------------------------- /helper-programs/sthlp_table.txt: -------------------------------------------------------------------------------- 1 | {bf:College} - {browse "https://educationdata.urban.org/documentation/colleges.html":Read Complete Documentation} 2 | 3 | {bf:"college ipeds directory"}: This file contains directory information for 4 | every institution in the IPEDS universe. This includes name, address, city, 5 | state, zip code, and various URL links to the institution's home page, 6 | admissions, financial aid offices, and the net price calculator. Identifies 7 | institutions as currently active and institutions that participate in Title IV 8 | federal financial aid programs for which IPEDS is mandatory. It also includes 9 | variables derived from the Institutional Characteristics survey, such as 10 | control and level of institution, highest level and highest degree offered and 11 | Carnegie classifications. 12 | 13 | {bf:"college ipeds institutional-characteristics"}: This endpoint contains 14 | data on program and award level offerings, control and affiliation of 15 | institution. It also contains information on special learning opportunities, 16 | student services, disability services, tuition plans and athletic conference 17 | associations. Services and programs for service members and veterans are also 18 | included. 19 | 20 | {bf:"college ipeds admissions-enrollment"}: This endpoint contains data on 21 | applicants and admissions by sex and enrollees by sex and full-time/part-time 22 | status. These data are limited to undergraduate first-time, degree- or 23 | certificate-seeking students. 24 | 25 | {bf:"college ipeds admissions-requirements"}: This endpoint contains data on 26 | admissions considerations for the undergraduate selection process. SAT and ACT 27 | test scores are included for institutions that require test scores for 28 | admission. These data are applicable for institutions that do not have an open 29 | admissions policy for entering first-time students. Writing scores for both SAT 30 | and ACT are no longer collected. The possible values for the admission 31 | consideration variables have changed from prior data. "Do not know" is no 32 | longer an option and "considered but not required" was added. 33 | 34 | {bf:"college ipeds academic-year-tuition"}: This endpoint contains data on 35 | tuition and fees for institutions that offer primarily academic programs. 36 | 37 | {bf:"college ipeds academic-year-tuition-prof-program"}: This endpoint 38 | contains data on tuition and fees for professional degree programs at 39 | institutions that offer primarily academic programs. 40 | 41 | {bf:"college ipeds academic-year-room-board-other"}: This endpoint contains 42 | data on room, board, and other expenses for institutions that offer primarily 43 | academic programs. 44 | 45 | {bf:"college ipeds program-year-tuition-cip"}: This endpoint contains data 46 | on tuition fees for the largest programs at institutions that offer primarily 47 | occupational programs. 48 | 49 | {bf:"college ipeds program-year-room-board-other"}: This endpoint contains 50 | data on room, board, and other expenses for institutions that offer primarily 51 | occupational programs. 52 | 53 | {bf:"college ipeds enrollment-full-time-equivalent"}: This endpoint contains 54 | data on instructional activity measured in total credit and/or contact hours 55 | delivered by institutions during a 12-month period. The credit hour and contact 56 | hour activity data are used to derive 12-month full-time equivalent (FTE) 57 | enrollments for both undergraduate and graduate levels. The graduate level does 58 | not include credit hours for doctoral professional practice students. 59 | Institutions can choose to accept the derived FTE or report their own FTE. Both 60 | reported and estimated/derived FTE are available in this data table for 2003 61 | and later. In addition, the reported FTE of doctoral professional practice 62 | students are also included. These data are only available at the undergraduate, 63 | graduate, and first professional level. 64 | 65 | {bf:"college ipeds fall-enrollment race sex"}: This endpoint contains the 66 | number of students enrolled in the fall by race, sex, full-time/part-time 67 | status, and level of study. For undergraduates, this is further broken down by 68 | degree-seeking/non–degree-seeking, and degree-seeking is broken down by 69 | class level. Institutions with traditional academic year calendar systems 70 | (semester, quarter, trimester, or 4-1-4) report their enrollment as of October 71 | 15 or the official fall reporting date of the institution. Institutions with 72 | calendar systems that differ by program or allow continuous enrollment report 73 | students who are enrolled at any time between August 1 and October 31. 74 | Available levels of study are undergraduate, graduate, first-professional 75 | (through 2008 only), and postbaccalaureate (through 1998 only). 76 | 77 | {bf:"college ipeds fall-enrollment age sex"}: This endpoint contains the 78 | number of students enrolled in the fall by age categories, sex, 79 | full-time/part-time status, and level of study. Institutions with traditional 80 | academic year calendar systems (semester, quarter, trimester, or 4-1-4) report 81 | their enrollment as of October 15 or the official fall reporting date of the 82 | institution. Institutions with calendar systems that differ by program or allow 83 | continuous enrollment report students who are enrolled at any time between 84 | August 1 and October 31. Submission of enrollment by age categories is optional 85 | in even-numbered years. Available levels of study are undergraduate, graduate, 86 | and first-professional (through 2008 only); in 2000, only undergraduate data 87 | are available. 88 | 89 | {bf:"college ipeds fall-enrollment residence"}: This endpoint contains the 90 | number of first-time freshmen by state of residence, along with data on the 91 | number who graduated from high school the previous year. Institutions with 92 | traditional academic year calendar systems (i.e., semester, quarter, trimester, 93 | or 4-1-4) report their enrollment as of October 15 or the institution's 94 | official fall reporting date. Institutions with calendar systems that differ by 95 | program or allow continuous enrollment report students that are enrolled at any 96 | time between August 1 and October 31. Submission of enrollment of first-time 97 | undergraduate students by residency is mandatory in even-numbered years and 98 | optional in odd-numbered years. 99 | 100 | {bf:"college ipeds enrollment-headcount"}: This endpoint contains the 101 | unduplicated head count of students enrolled over a 12-month period for both 102 | undergraduate and graduate levels. These enrollment data are particularly 103 | valuable for institutions that use non-traditional calendar systems and offer 104 | short-term programs. Because this enrollment measure encompasses an entire 105 | year, it provides a more complete picture of the number of students these 106 | schools serve. Counts are available by level of study, sex, and race/ethnicity. 107 | 108 | {bf:"college ipeds fall-retention"}: The first-year retention rate data 109 | measures the percentage of first-year students who persisted in or completed 110 | their educational program a year later. This is provided for full-time and 111 | part-time students. 112 | 113 | {bf:"college ipeds finance"}: This endpoint contains institutional finance 114 | data, including institutional revenue by source, scholarships, expenditures by 115 | functional and natural classifications, endowments, assets and liabilities, and 116 | pensions. A user guide to these data can be found {browse "https://www.urban.org/research/publication/ipeds-finance-user-guide":here}. 117 | 118 | {bf:"college ipeds student-faculty-ratio"}: Student-to-faculty ratio is 119 | defined as total FTE students not in graduate or professional programs divided 120 | by total FTE instructional staff not teaching in graduate or professional 121 | programs. All data on this file is applicable only to institutions with 122 | undergraduate students. 123 | 124 | {bf:"college ipeds sfa-grants-and-net-price"}: This endpoint contains data 125 | on net price, grant amounts, and total students receiving grant aid for 126 | first-time, full-time degree-seeking students receiving Title IV aid or any 127 | grant aid. 128 | 129 | {bf:"college ipeds sfa-by-living-arrangement"}: This endpoint contains data 130 | on total first-time, full-time degree-seeking students receiving Title IV aid 131 | or any grant aid, by living arrangement. 132 | 133 | {bf:"college ipeds sfa-by-tuition-type"}: This endpoint contains data on 134 | total first-time, full-time degree-seeking students paying in-district, 135 | in-state, or out-of-state tuition. 136 | 137 | {bf:"college ipeds sfa-all-undergraduates"}: This endpoint contains data on 138 | total undergraduate students receiving different types of aid. 139 | 140 | {bf:"college ipeds sfa-ftft"}: This endpoint contains data on total 141 | first-time, full-time degree-seeking students receiving different types of aid. 142 | 143 | {bf:"college ipeds grad-rates"}: This endpoint contains the graduation rate 144 | status as of August 31, at the end of the academic year, for the cohort of 145 | full-time, first-time degree- or certificate-seeking undergraduates, by race 146 | and sex. Data for four-year institutions include the number of bachelor's 147 | degree–seeking students who enrolled six academic years earlier, the 148 | number of bachelor's degree–seeking students who completed any degree or 149 | certificate within 150 percent of normal time, the number who completed a 150 | bachelor's degree within 100, 125 or 150 percent on normal time, and the number 151 | of bachelor's degree–seeking students who transferred out. Data for 152 | students seeking a degree or certificate other than a bachelor's degree are 153 | also included for four-year institutions. Data for two-year institutions 154 | include the number of full-time, first-time students who enrolled three 155 | academic years earlier, the number of students who completed any degree or 156 | certificate within 100 or 150 percent of normal time and the number of students 157 | who transferred out. The number of students who completed a degree or 158 | certificate within 100 percent of normal time is not available by race and sex. 159 | 160 | {bf:"college ipeds grad-rates-200pct"}: This endpoint contains the 161 | graduation rate status as of August 31, at the end of the academic year, for 162 | the cohort of full-time, first-time degree- or certificate-seeking 163 | undergraduates. Data for four-year institutions include the number of 164 | bachelor's degree–seeking students who enrolled eight academic years 165 | earlier, the number of bachelor's degree–seeking students who completed a 166 | bachelor's degree within 100, 150 or 200 percent on normal time. Data for less 167 | than four-year institutions include the number of full-time, first-time 168 | students who enrolled four academic years earlier, the number of students who 169 | completed any degree or certificate within 100, 150, or 200 percent of normal 170 | time. 171 | 172 | {bf:"college ipeds grad-rates-pell"}: This endpoint contains the graduation 173 | rate status as of August 31, at the end of the academic year, for three 174 | subcohorts of full-time, first-time degree- or certificate-seeking 175 | undergraduates. The three subcohorts are students who received a Pell grant, 176 | students who received a subsidized Stafford loan and did not receive a Pell 177 | grant, and students who did not receive either a Pell grant or Stafford loan. 178 | In four-year institutions each of the subcohorts will include the number of 179 | bachelor's degree–seeking students who enrolled six academic years 180 | earlier, the number of bachelor's degree–seeking students who completed 181 | any degree or certificate within 150 percent of normal time, the number who 182 | completed a bachelor's degree within 150 percent on normal time. Data for 183 | students seeking a degree or certificate other than a bachelor's degree are 184 | also included for four-year institutions. Data for two-year and less-than 185 | two-year institutions include the number of full-time, first-time students who 186 | enrolled three academic years earlier and the number of students who completed 187 | any degree or certificate 150 percent of normal time. 188 | 189 | {bf:"college ipeds outcome-measures"}: This endpoint contains award and 190 | enrollment data from degree-granting institutions on undergraduate cohorts that 191 | entered an institution eight academic years ago. The two years of data 192 | currently available have slightly different definitions for cohorts and 193 | information available. In 2015, there is information on four cohorts collected 194 | at two points in time: six academic years and eight academic years after entry. 195 | The four cohorts of degree- or certificate-seeking undergraduates are 196 | full-time, first-time entering; part-time, first-time entering; full-time, 197 | non-first-time entering; and part-time, non-first-time entering. In 2016, there 198 | is information on eight cohorts collected at three points in time: four 199 | academic years, six academic years, and eight academic years after entry. The 200 | eight cohorts are the four cohorts described for 2015, but for each, they 201 | further disaggregate by Pell recipients and non-Pell recipients. Additionally, 202 | in 2016, they collect more detailed information about the types of awards 203 | completed, such as a certificate, Associate's degree, or Bachelor's degree. 204 | Finally, the cohort definitions between the two years change. In 2015, the 205 | completion rate at 6 and 8 years after entry uses the adjusted cohort from the 206 | corresponding year after entry as the denominator, whereas in 2016, one 207 | adjusted cohort is used as the denominator in all rates, regardless of years 208 | after entry. 209 | 210 | {bf:"college ipeds completers"}: This endpoint contains the number of 211 | students who completed any degree or certificate by race and ethnicity and 212 | gender. 213 | 214 | {bf:"college ipeds completions-cip-2"}: This endpoint contains the number of 215 | awards by type of program, level of award (certificate or degree), first or 216 | second major, and by race and ethnicity and gender. Type of program is 217 | categorized according to the 2-digit Classification of Instructional Programs 218 | (CIP), a detailed coding system for postsecondary instructional programs, which 219 | groups the 6-digit CIPs into their families. There are some exceptions, such as 220 | law and medical fields, which were reported as 6-digits by the Integrated 221 | Postsecondary Education Data System. IPEDS reported data at the 2-digit CIP 222 | level until 2001; after that, the 6-digit CIP data were collapsed to the 223 | 2-digit level. 224 | 225 | {bf:"college ipeds completions-cip-6"}: This endpoint contains the number of 226 | awards by type of program, level of award (certificate or degree), first or 227 | second major, and by race and ethnicity and gender. Type of program is 228 | categorized according to the 6-digit Classification of Instructional Programs 229 | (CIP), a detailed coding system for postsecondary instructional programs, which 230 | changes over time. 231 | 232 | {bf:"college ipeds academic-libraries"}: This endpoint contains information 233 | on the academic institution's electronic and physical library, collections, 234 | expenditures, and services. These data are available only for degree-granting 235 | institutions, and expenditure data are available only for institutions with 236 | total expenditures above $100,000. 237 | 238 | {bf:"college ipeds salaries-instructional-staff"}: This endpoint contains 239 | the number of staff, total salary outlays and average salaries of full-time, 240 | nonmedical, instructional staff by academic rank, contract length, and sex. 241 | 242 | {bf:"college ipeds salaries-noninstructional-staff"}: This endpoint contains 243 | the number and salary outlays for full-time, nonmedical, noninstructional staff 244 | by occupational category. 245 | 246 | {bf:"college scorecard institutional-characteristics"}: This endpoint 247 | contains institutional characteristics for each college or university, 248 | primarily including flags for minority-serving institutions. To avoid 249 | duplication, we exclude data that College Scorecard sourced from IPEDS. For 250 | these data, you can see the other Institutional Characteristics and Directory 251 | endpoints. Notably, most of the information contained in this endpoint can be 252 | found in 2016 and aside from identification and year, only predominant degree 253 | awarded is available in other years. 254 | 255 | {bf:"college scorecard student-characteristics aid-applicants"}: This 256 | endpoint contains detailed data on student aid applicants in each institution, 257 | including income level, dependency status, number of college students sent Free 258 | Application for Federal Student Aid (FAFSA) forms to, and other student 259 | demographics. These data are produced for rolling two-year pooled entry cohorts 260 | by the National Student Loan Data System (e.g., the 1997 data represent 261 | information from two cohorts, assessment year 1996–97 and assessment year 262 | 1997–98). 263 | 264 | {bf:"college scorecard student-characteristics home-neighborhood"}: This 265 | endpoint contains detailed demographic information on cohorts of students, 266 | based on when they enroll in college. These data describe the population 267 | residing in the students' home zip codes, including information about race, 268 | education level, nationality, poverty status, household income, and employment 269 | status. The US Treasury Department calculated these data elements using census 270 | data for two-year pooled cohorts at each institution (e.g., the 2005 file 271 | includes the 2004–05 and 2005–06 earnings cohorts). Home zip codes 272 | are determined using information from when the student first applied for 273 | financial aid. 274 | 275 | {bf:"college scorecard earnings"}: This endpoint contains information on 276 | earnings for former students, by their pooled entry cohort and institution. 277 | This information may be available 6, 7, 8, 9, and 10 years after the pooled 278 | cohort entered college, but availability varies by cohort. For example, the 279 | assessment year (AY) 1996–97 and AY 1997–98 pooled cohort has 280 | earnings data available 6, 7, 8, 9, and 10 years after entry, but the AY 281 | 2001-02 and AY 2002–03 pooled cohort only has earnings data 10 years 282 | after entry and the AY 2003–04 and AY 2004–05 pooled cohort only 283 | has earnings data 8 years after entry. 284 | 285 | {bf:"college scorecard default"}: This endpoint contains information on the 286 | default rates by cohort for two or three years after students entered 287 | repayment. The two-year default rate is available from 1996 to 2012, and the 288 | three-year default rate is available starting in 2011. The cohorts are those 289 | that entered repayment two or three years before the year of measurement. For 290 | example, those who entered repayment between October 1, 2012, and September 30, 291 | 2013 (which we identify as cohort 2012, but corresponds with fiscal year 2013), 292 | have their three-year default rate measured as of September 30, 2015 (year 293 | 2015). 294 | 295 | {bf:"college scorecard repayment"}: This endpoint contains detailed 296 | repayment data by pooled cohort and institution. These data are also available 297 | by subgroup, including dependency status, Pell recipiency, first-generation 298 | status, sex, and income level. These repayment rates are measured one, three, 299 | five, and seven years after the cohort enters repayment. For example the 300 | one-year repayment rate for fiscal year (FY) 2008 and FY 2009 cohorts 301 | (cohort_year 2008) are measured in FY 2009 and FY 2010 (year 2009), 302 | respectively. 303 | 304 | {bf:"college nhgis census-2010"}: This endpoint contains geographic 305 | variables corresponding to 2010 Census geographies for each IPEDS institution. 306 | Geographies are merged on by latitude and longitude when available; when 307 | unavailable, latitudes and longitudes were obtained from address information 308 | using Urban's geocoder. The geocoder uses StreetMap Premium from Esri to 309 | perform accurate offline geocoding. Geocode accuracy variables indicate the 310 | degree of precision of this geocoding. Additional information on the match 311 | accuracy can be found {browse "https://developers.arcgis.com/rest/geocode/api-reference/geocoding-service-output.htm":here}. 312 | Geographies for older years of data or low-accuracy geocode matches should be 313 | used with caution. 314 | 315 | {bf:"college nhgis census-2000"}: This endpoint contains geographic 316 | variables corresponding to 2000 Census geographies for each IPEDS institution. 317 | Geographies are merged on by latitude and longitude when available; when 318 | unavailable, latitudes and longitudes were obtained from address information 319 | using Urban's geocoder. The geocoder uses StreetMap Premium from Esri to 320 | perform accurate offline geocoding. Geocode accuracy variables indicate the 321 | degree of precision of this geocoding. Additional information on the match 322 | accuracy can be found {browse "https://developers.arcgis.com/rest/geocode/api-reference/geocoding-service-output.htm":here}. 323 | Geographies for older years of data or low-accuracy geocode matches should be 324 | used with caution. 325 | 326 | {bf:"college nhgis census-1990"}: This endpoint contains geographic 327 | variables corresponding to 1990 Census geographies for each IPEDS institution. 328 | Geographies are merged on by latitude and longitude when available; when 329 | unavailable, latitudes and longitudes were obtained from address information 330 | using Urban's geocoder. The geocoder uses StreetMap Premium from Esri to 331 | perform accurate offline geocoding. Geocode accuracy variables indicate the 332 | degree of precision of this geocoding. Additional information on the match 333 | accuracy can be found {browse "https://developers.arcgis.com/rest/geocode/api-reference/geocoding-service-output.htm":here}. 334 | Geographies for older years of data or low-accuracy geocode matches should be 335 | used with caution. 336 | 337 | {bf:"college fsa financial-responsibility"}: This endpoint contains 338 | financial responsibility composites scores for for-profit and nonprofit 339 | institutions. These scores gauge an institution's financial health and help the 340 | Department of Education assess an institution's financial responsibility 341 | compliance. The scores are based on a primary reserve ratio, an equity ratio, 342 | and a net income ratio. They are collected at the OPEID level, but we report 343 | them at the UNITID level in order to facilitate merging with IPEDS data. 344 | 345 | {bf:"college fsa grants"}: This endpoint contains data on Title IV grant 346 | volume and total recipients by institution and year. These data are collected 347 | by the Office of Federal Student Aid at the OPEID level, and we have provided 348 | them at the unit ID level. Where these differ, amounts have been allocated to 349 | each institution based on full-time equivalent students. To avoid 350 | double-counting, users should use the _unitid variables; _opeid variables are 351 | the original amounts and will lead to double-counting unless only one record 352 | per OPEID year is kept. 353 | 354 | {bf:"college fsa loans"}: This endpoint contains data on Title IV loan 355 | volume and total recipients by institution and year. These data are collected 356 | by the Office of Federal Student Aid at the OPEID level, and we have provided 357 | them at the unit ID level. Where these differ, amounts have been allocated to 358 | each institution based on full-time equivalent students. To avoid 359 | double-counting, users should use the _unitid variables; _opeid variables are 360 | the original amounts and will lead to double-counting unless only one record 361 | per OPEID year is kept. 362 | 363 | {bf:"college fsa campus-based-volume"}: This endpoint contains data on Title 364 | IV campus-based programs volume and total recipients by institution and year. 365 | These data are collected by the Office of Federal Student Aid at the OPEID 366 | level, and we have provided them at the unit ID level. Where these differ, 367 | amounts have been allocated to each institution based on full-time equivalent 368 | students. To avoid double-counting, users should use the _unitid variables; 369 | _opeid variables are the original amounts and will lead to double-counting 370 | unless only one record per OPEID year is kept. 371 | 372 | {bf:"college fsa 90-10-revenue-percentages"}: This endpoint contains the 373 | amount and percentage of each proprietary institution's revenues from Title IV 374 | sources and non–Title IV sources as provided by the institution in its 375 | audited financial statements. 376 | 377 | {bf:"college nacubo endowments"}: This endpoint contains data on annual 378 | endowments. 379 | 380 | {bf:"college nccs 990-forms"}: This endpoint contains data from 990 tax 381 | forms filed annually by nonprofit organizations. 382 | 383 | {bf:District} - {browse "https://educationdata.urban.org/documentation/school-districts.html":Read Complete Documentation} 384 | 385 | {bf:"district ccd directory"}: This endpoint contains school district (local 386 | education agency identification)-level geographic and mailing information, 387 | agency type, highest and lowest grades offered, special education students and 388 | English language learners, and full-time equivalent teachers and other staff. 389 | 390 | {bf:"district ccd enrollment"}: This endpoint contains student membership data 391 | for each school district by grade. 392 | 393 | {bf:"district ccd enrollment race"}: This endpoint contains student membership 394 | data for each school district by grade and race. 395 | 396 | {bf:"district ccd enrollment sex"}: This endpoint contains student membership 397 | data for each school district by grade and sex. 398 | 399 | {bf:"district ccd enrollment race sex"}: This endpoint contains student 400 | membership data for each school district by grade, race, and sex. 401 | 402 | {bf:"district ccd finance"}: This endpoint contains district level finance data 403 | including revenues from federal, state, and local governments and expenditures. 404 | 405 | {bf:"district saipe"}: This endpoint contains district level data on the size 406 | of the population, the size of the school age population, and the size of the 407 | school age population that is in poverty. 408 | 409 | {bf:"district edfacts assessments"}: This endpoint contains district-level 410 | achievement results for state assessments in mathematics and reading or 411 | language arts, by grade. It includes the number of students who completed each 412 | assessment for whom a proficiency level was assigned and the proficiency share. 413 | The proficiency share is reported as a range, unless there are more than 300 414 | students in the subgroup, with the magnitude of the range decreasing as the 415 | number of students reported increases. States can change their statewide 416 | assessments, academic standards, or thresholds for proficiency levels, leading 417 | to changes in the proficiency share from year to year. The proficiency shares 418 | for Virginia's 2016–17 grade 5–8 math assessments are too low. 419 | Users should instead refer to the Virginia Department of Education's Statistics 420 | and Reports {browse "http://www.doe.virginia.gov/statistics_reports/index.shtml":website} 421 | for accurate data. 422 | 423 | {bf:"district edfacts assessments race"}: This endpoint contains district-level 424 | achievement results for state assessments in mathematics and reading or 425 | language arts, by grade and race or ethnicity. It includes the number of 426 | students who completed each assessment for whom a proficiency level was 427 | assigned and the proficiency share. The proficiency share is reported as a 428 | range, unless there are more than 300 students in the subgroup, with the 429 | magnitude of the range decreasing as the number of students reported increases. 430 | States can change their statewide assessments, academic standards, or 431 | thresholds for proficiency levels, leading to changes in the proficiency share 432 | from year to year. Users are cautioned that the proficiency shares for 433 | Virginia's 2016–17 grade 5–8 math assessments are too low. The 434 | proficiency shares for Virginia's 2016–17 grade 5–8 math 435 | assessments are too low. Users should instead refer to the Virginia Department 436 | of Education's Statistics and Reports {browse "http://www.doe.virginia.gov/statistics_reports/index.shtml":website} 437 | for accurate data. 438 | 439 | {bf:"district edfacts assessments sex"}: This endpoint contains district-level 440 | achievement results for state assessments in mathematics and reading or 441 | language arts, by grade and sex. It includes the number of students who 442 | completed each assessment for whom a proficiency level was assigned and the 443 | proficiency share. The proficiency share is reported as a range, unless there 444 | are more than 300 students in the subgroup, with the magnitude of the range 445 | decreasing as the number of students reported increases. States can change 446 | their statewide assessments, academic standards, or thresholds for proficiency 447 | levels, leading to changes in the proficiency share from year to year. The 448 | proficiency shares for Virginia's 2016–17 grade 5–8 math 449 | assessments are too low. Users should instead refer to the Virginia Department 450 | of Education's Statistics and Reports {browse "http://www.doe.virginia.gov/statistics_reports/index.shtml":website} 451 | for accurate data. 452 | 453 | {bf:"district edfacts assessments special-populations"}: This endpoint contains 454 | district-level achievement results for state assessments in mathematics and 455 | reading or language arts, by grade and special population subgroups. It 456 | includes the number of students who completed each assessment for whom a 457 | proficiency level was assigned and the proficiency share. The proficiency share 458 | is reported as a range, unless there are more than 300 students in the 459 | subgroup, with the magnitude of the range decreasing as the number of students 460 | reported increases. Special population subgroups include children with one or 461 | more disabilities, economically disadvantaged students, students who are 462 | homeless, migrant students, and students with limited English proficiency. 463 | Beginning in 2017, special population subgroups also include students who are 464 | in foster care and students who are military connected. States can change their 465 | statewide assessments, academic standards, or thresholds for proficiency 466 | levels, leading to changes in the proficiency share from year to year. The 467 | proficiency shares for Virginia's 2016–17 grade 5–8 math 468 | assessments are too low. Users should instead refer to the Virginia Department 469 | of Education's Statistics and Reports {browse "http://www.doe.virginia.gov/statistics_reports/index.shtml":website} 470 | for accurate data. 471 | 472 | {bf:"district edfacts grad-rates"}: This endpoint contains district-level 473 | adjusted cohort graduation rates. The graduation rate is reported as a range, 474 | with the magnitude of the range decreasing as the number of students reported 475 | increases. Graduation rates are provided by race and special populations. 476 | 477 | {bf:School} - {browse "https://educationdata.urban.org/documentation/schools.html":Read Complete Documentation} 478 | 479 | {bf:"school ccd directory"}: This endpoint contains school-level information on 480 | location, mailing addresses, school types, highest and lowest grades offered, 481 | and free and reduced-price lunch. This endpoint also contains the school-level 482 | data on the number of full-time eqivalent teachers. 483 | 484 | {bf:"school ccd enrollment"}: This endpoint contains student membership data for each 485 | school by grade. Only operational schools serving one or more grades are 486 | required to report membership and only these are included in this endpoint. 487 | 488 | {bf:"school ccd enrollment race"}: This endpoint contains student membership data for 489 | each school by grade and race. Only operational schools serving one or more 490 | grades are required to report membership and only these are included in this 491 | endpoint. 492 | 493 | {bf:"school ccd enrollment sex"}: This endpoint contains student membership data for 494 | each school by grade and sex. Only operational schools serving one or more 495 | grades are required to report membership and only these are included in this 496 | endpoint. 497 | 498 | {bf:"school ccd enrollment race sex"}: This endpoint contains student membership data 499 | for each school by grade, race, and sex. Only operational schools serving one 500 | or more grades are required to report membership and only these are included in 501 | this endpoint. 502 | 503 | {bf:"school crdc directory"}: This endpoint contains school-level geographic 504 | information, grades offered, and information on school type (including charter, 505 | magnet, and alternative schools). 506 | 507 | {bf:"school crdc enrollment race sex"}: This endpoint contains student enrollment for 508 | each school by students' race and sex. 509 | 510 | {bf:"school crdc enrollment disability sex"}: This endpoint contains student 511 | enrollment for each school by students' disability status and sex. 512 | 513 | {bf:"school crdc enrollment lep sex"}: This endpoint contains student enrollment for 514 | each school by students' Limited English Proficiency status and sex. 515 | 516 | {bf:"school crdc discipline-instances"}: This endpoint contains the number of 517 | discipline instances in schools. 518 | 519 | {bf:"school crdc discipline disability sex"}: This endpoint contains student 520 | discipline information for each school, including suspensions, expulsions, 521 | arrests, referrals, and corporal punishment by students' race and sex. 522 | 523 | {bf:"school crdc discipline disability race sex"}: This endpoint contains student 524 | discipline information for each school, including suspensions, expulsions, 525 | arrests, referrals, and corporal punishment by students' disability status and 526 | sex. 527 | 528 | {bf:"school crdc discipline disability lep sex"}: This endpoint contains student 529 | discipline information for each school, including suspensions, expulsions, 530 | arrests, referrals, and corporal punishment by students' limited English 531 | proficiency status and sex. 532 | 533 | {bf:"school crdc harassment-or-bullying allegations"}: This endpoint contains the 534 | number of allegations of harassment or bullying on the basis of sex; on the 535 | basis of race, color, or national origin; or on the basis of disability. This 536 | only includes students in grades K-12 and comparable ungraded levels. 537 | 538 | {bf:"school crdc harassment-or-bullying race sex"}: This endpoint contains the number 539 | of students who reported being harassed or bullied and the number of students 540 | who were disciplined for harassment or bullying, by students' race and sex. 541 | These reports and disciplines could be on the basis of sex; on the basis of 542 | race, color, or national origin; or on the basis of disability. This includes 543 | only students in grades K–12 and comparable ungraded levels. 544 | 545 | {bf:"school crdc harassment-or-bullying disability sex"}: This endpoint contains the 546 | number of students who reported being harassed or bullied and the number of 547 | students who were disciplined for harassment or bullying, by students' 548 | disability status and sex. These reports and disciplines could be on the basis 549 | of sex, on the basis of race, color, or national origin, or on the basis of 550 | disability. This only includes students in grades K-12 and comparable ungraded 551 | levels. 552 | 553 | {bf:"school crdc harassment-or-bullying lep sex"}: This endpoint contains the number 554 | of students who reported being harassed or bullied and the number of students 555 | who were disciplined for harassment or bullying, by students' limited English 556 | proficiency status and sex. These reports and disciplines could be on the basis 557 | of sex, on the basis of race, color, or national origin, or on the basis of 558 | disability. This only includes students in grades K-12 and comparable ungraded 559 | levels. 560 | 561 | {bf:"school crdc chronic-absenteeism race sex"}: This endpoint contains the number of 562 | students who were chronically absent, by race and sex. Chronic absenteeism is 563 | defined as being absent 15 or more school days during the school year. A 564 | student is absent if he or she is not physically on school grounds and is not 565 | participating in instruction or instruction-related activities at an approved 566 | off-grounds location for the school day. Chronically absent students include 567 | students who are absent for any reason (e.g., illness, suspension, the need to 568 | care for a family member), regardless of whether absences are excused or 569 | unexcused. 570 | 571 | {bf:"school crdc chronic-absenteeism disability sex"}: This endpoint contains the 572 | number of students who were chronically absent, by disability status and sex. 573 | Chronic absenteeism is defined as being absent 15 or more school days during 574 | the school year. A student is absent if he or she is not physically on school 575 | grounds and is not participating in instruction or instruction-related 576 | activities at an approved off-grounds location for the school day. Chronically 577 | absent students include students who are absent for any reason (e.g., illness, 578 | suspension, the need to care for a family member), regardless of whether 579 | absences are excused or unexcused. 580 | 581 | {bf:"school crdc chronic-absenteeism lep sex"}: This endpoint contains the number of 582 | students who were chronically absent, by limited English proficiency status and 583 | sex. Chronic absenteeism is defined as being absent 15 or more school days 584 | during the school year. A student is absent if he or she is not physically on 585 | school grounds and is not participating in instruction or instruction-related 586 | activities at an approved off-grounds location for the school day. Chronically 587 | absent students include students who are absent for any reason (e.g., illness, 588 | suspension, the need to care for a family member), regardless of whether 589 | absences are excused or unexcused. 590 | 591 | {bf:"school crdc restraint-and-seclusion instances"}: This endpoint contains the 592 | number of instances of restraint or seclusion, by student's disability status. 593 | This includes only students in grades K–12 and comparable ungraded 594 | levels. 595 | 596 | {bf:"school crdc restraint-and-seclusion disability sex"}: This endpoint contains the 597 | number of students who were subjected to restraint or seclusion, by disability 598 | status and sex. This includes only students in grades K–12 and comparable 599 | ungraded levels. 600 | 601 | {bf:"school crdc restraint-and-seclusion disability race sex"}: This endpoint contains 602 | the number of students who were subjected to restraint or seclusion, by 603 | disability status, race, and sex. This includes only students in grades 604 | K–12 and comparable ungraded levels. 605 | 606 | {bf:"school crdc restraint-and-seclusion disability lep sex"}: This endpoint contains 607 | the number of students who were subjected to restraint or seclusion, by 608 | disability status, limited English proficiency status, and sex. This includes 609 | only students in grades K–12 and comparable ungraded levels. 610 | 611 | {bf:"school crdc ap-ib-enrollment race sex"}: This endpoint contains the number of 612 | students enrolled in Advanced Placement (AP) courses, the International 613 | Baccalaureate (IB) Diploma Programme, and gifted and talented (GT) programs, by 614 | race and sex. 615 | 616 | {bf:"school crdc ap-ib-enrollment disability sex"}: This endpoint contains the number 617 | of students enrolled in Advanced Placement (AP) courses, the International 618 | Baccalaureate (IB) Diploma Programme, and gifted and talented (GT) programs, by 619 | disability and sex. 620 | 621 | {bf:"school crdc ap-ib-enrollment lep sex"}: This endpoint contains the number of 622 | students enrolled in Advanced Placement (AP) courses, the International 623 | Baccalaureate (IB) Diploma Programme, and gifted and talented (GT) programs, by 624 | limited English proficiency status and sex. 625 | 626 | {bf:"school crdc ap-exams race sex"}: This endpoint contains the number of students 627 | taking AP exams, and the number of students passing AP exams, by students' race 628 | and sex. 629 | 630 | {bf:"school crdc ap-exams disability sex"}: This endpoint contains the number of 631 | students taking AP exams, and the number of students passing AP exams, by 632 | students' disability status and sex. 633 | 634 | {bf:"school crdc ap-exams lep sex"}: This endpoint contains the number of students 635 | taking AP exams, and the number of students passing AP exams, by students' 636 | Limited English Proficiency status and sex. 637 | 638 | {bf:"school crdc sat-act-participation race sex"}: This endpoint contains the number 639 | of students taking the SAT or ACT, by race and sex. 640 | 641 | {bf:"school crdc sat-act-participation disability sex"}: This endpoint contains the 642 | number of students taking the SAT or ACT, by disability status and sex. 643 | 644 | {bf:"school crdc sat-act-participation lep sex"}: This endpoint contains the number of 645 | students taking the SAT or ACT, by limited English proficiency status and sex. 646 | 647 | {bf:"school crdc teachers-staff"}: This endpoint contains data on the number of FTE 648 | teachers and staff at each school. 649 | 650 | {bf:"school crdc math-and-science race sex"}: This endpoint contains data on 651 | enrollment in Biology, Chemistry, Advanced Math, Calculus, Algebra II, Physics, 652 | and Geometry courses by race and sex. 653 | 654 | {bf:"school crdc math-and-science disability sex"}: This endpoint contains data on 655 | enrollment in Biology, Chemistry, Advanced Math, Calculus, Algebra II, Physics, 656 | and Geometry courses by disability and sex. 657 | 658 | {bf:"school crdc math-and-science lep sex"}: This endpoint contains data on enrollment 659 | in Biology, Chemistry, Advanced Math, Calculus, Algebra II, Physics, and 660 | Geometry courses by limited English proficiency status and sex. 661 | 662 | {bf:"school crdc algebra1 race sex"}: This endpoint contains data on the number of 663 | students enrolled in and passing Algebra I by race and sex. 664 | 665 | {bf:"school crdc algebra1 disability sex"}: This endpoint contains data on the number 666 | of students enrolled in and passing Algebra I by disability status and sex. 667 | 668 | {bf:"school crdc algebra1 lep sex"}: This endpoint contains data on the number of 669 | students enrolled in and passing Algebra I by Limited English Proficiency 670 | status and sex. 671 | 672 | {bf:"school crdc offenses"}: This endpoint contains data on the number of criminal 673 | incidents in schools. 674 | 675 | {bf:"school crdc dual-enrollment race sex"}: This endpoint contains data that 676 | indicates whether the school has any students enrolled in a dual 677 | enrollment/dual credit program by race and sex. 678 | 679 | {bf:"school crdc dual-enrollment disability sex"}: This endpoint contains data that 680 | indicates whether the school has any students enrolled in a dual 681 | enrollment/dual credit program by disability and sex. 682 | 683 | {bf:"school crdc dual-enrollment lep sex"}: This endpoint contains data that indicates 684 | whether the school has any students enrolled in a dual enrollment/dual credit 685 | program by limited English proficiency status and sex. 686 | 687 | {bf:"school crdc credit-recovery"}: This endpoint contains data on student enrollment 688 | in credit recovery. 689 | 690 | {bf:"school crdc suspensions-days race sex"}: This endpoint contains the number of 691 | days students missed due to suspensions by race and sex. 692 | 693 | {bf:"school crdc suspensions-days disability sex"}: This endpoint contains the number 694 | of days students missed due to suspensions by disability and sex. 695 | 696 | {bf:"school crdc suspensions-days lep sex"}: This endpoint contains the number of days 697 | students missed due to suspensions by limited English proficiency status and 698 | sex. 699 | 700 | {bf:"school crdc offerings"}: This endpoint contains details on the number and types 701 | of classes offered in schools. 702 | 703 | {bf:"school crdc school-finance"}: This endpoint contains school finance data. 704 | 705 | {bf:"school crdc retention race sex"}: This endpoint contains data on the number of 706 | students retained in a school. 707 | 708 | {bf:"school crdc retention disability sex"}: This endpoint contains data on the number 709 | of students retained in a school. 710 | 711 | {bf:"school crdc retention lep sex"}: This endpoint contains data on the number of 712 | students retained in a school. 713 | 714 | {bf:"school edfacts assessments"}: This endpoint contains school-level achievement 715 | results for state assessments in mathematics and reading or language arts, by 716 | grade. It includes the number of students who completed each assessment for 717 | whom a proficiency level was assigned and the proficiency share. The 718 | proficiency share is reported as a range, unless there are more than 300 719 | students in the subgroup, with the magnitude of the range decreasing as the 720 | number of students reported increases. States can change their statewide 721 | assessments, academic standards, or thresholds for proficiency levels, leading 722 | to changes in the proficiency share from year to year. The proficiency shares 723 | for Virginia's 2016–17 grade 5–8 math assessments are too low. 724 | Users should instead refer to the Virginia Department of Education's Statistics 725 | and Reports {browse "http://www.doe.virginia.gov/statistics_reports/index.shtml":website} 726 | for accurate data. 727 | 728 | {bf:"school edfacts assessments race"}: This endpoint contains school-level 729 | achievement results for state assessments in mathematics and reading or 730 | language arts, by grade and race or ethnicity. It includes the number of 731 | students who completed each assessment for whom a proficiency level was 732 | assigned and the proficiency share. The proficiency share is reported as a 733 | range, unless there are more than 300 students in the subgroup, with the 734 | magnitude of the range decreasing as the number of students reported increases. 735 | States can change their statewide assessments, academic standards, or 736 | thresholds for proficiency levels, leading to changes in the proficiency share 737 | from year to year. The proficiency shares for Virginia's 2016–17 grade 738 | 5–8 math assessments are too low. Users should instead refer to the 739 | Virginia Department of Education's Statistics and Reports {browse "http://www.doe.virginia.gov/statistics_reports/index.shtml":website} 740 | for accurate data. 741 | 742 | {bf:"school edfacts assessments sex"}: This endpoint contains school-level achievement 743 | results for state assessments in mathematics and reading or language arts, by 744 | grade and sex. It includes the number of students who completed each assessment 745 | for whom a proficiency level was assigned and the proficiency share. The 746 | proficiency share is reported as a range, unless there are more than 300 747 | students in the subgroup, with the magnitude of the range decreasing as the 748 | number of students reported increases. States can change their statewide 749 | assessments, academic standards, or thresholds for proficiency levels, leading 750 | to changes in the proficiency share from year to year. The proficiency shares 751 | for Virginia's 2016–17 grade 5–8 math assessments are too low. 752 | Users should instead refer to the Virginia Department of Education's Statistics 753 | and Reports {browse "http://www.doe.virginia.gov/statistics_reports/index.shtml":website} 754 | for accurate data. 755 | 756 | {bf:"school edfacts assessments special-populations"}: This endpoint contains 757 | school-level achievement results for state assessments in mathematics and 758 | reading or language arts, by grade and special population subgroups. It 759 | includes the number of students who completed each assessment for whom a 760 | proficiency level was assigned and the proficiency share. The proficiency share 761 | is reported as a range, unless there are more than 300 students in the 762 | subgroup, with the magnitude of the range decreasing as the number of students 763 | reported increases. Special population subgroups include children with one or 764 | more disabilities, economically disadvantaged students, students who are 765 | homeless, migrant students, and students with limited English proficiency. 766 | Beginning in 2017, special population subgroups also include students who are 767 | in foster care and students who are military connected. States can change their 768 | statewide assessments, academic standards, or thresholds for proficiency 769 | levels, leading to changes in the proficiency share from year to year. The 770 | proficiency shares for Virginia's 2016–17 grade 5–8 math 771 | assessments are too low. Users should instead refer to the Virginia Department 772 | of Education's Statistics and Reports {browse "http://www.doe.virginia.gov/statistics_reports/index.shtml":website} 773 | for accurate data. 774 | 775 | {bf:"school edfacts grad-rates"}: This endpoint contains school-level adjusted cohort 776 | graduation rates. The graduation rate is reported as a range, with the 777 | magnitude of the range decreasing as the number of students reported increases. 778 | Graduation rates are provided by race and special populations. 779 | 780 | {bf:"school nhgis census-2010"}: This endpoint contains geographic variables 781 | corresponding to 2010 Census geographies for each school in the CCD directory. 782 | Geographies are merged on by latitude and longitude when available; when 783 | unavailable, latitudes and longitudes were obtained from address information 784 | using Urban's geocoder. The geocoder uses StreetMap Premium from Esri to 785 | perform accurate offline geocoding. Geocode accuracy variables indicate the 786 | degree of precision of this geocoding. Additional information on the match 787 | accuracy can be found {browse "https://developers.arcgis.com/rest/geocode/api-reference/geocoding-service-output.htm":here}. 788 | Geographies for older years of data or low-accuracy geocode matches should be 789 | used with caution. In addition, we link schools' geographic locations to the 790 | geographic boundaries of school districts. 791 | 792 | {bf:"school nhgis census-2000"}: This endpoint contains geographic variables 793 | corresponding to 2000 Census geographies for each school in the CCD directory. 794 | Geographies are merged on by latitude and longitude when available; when 795 | unavailable, latitudes and longitudes were obtained from address information 796 | using Urban's geocoder. The geocoder uses StreetMap Premium from Esri to 797 | perform accurate offline geocoding. Geocode accuracy variables indicate the 798 | degree of precision of this geocoding. Additional information on the match 799 | accuracy can be found {browse "https://developers.arcgis.com/rest/geocode/api-reference/geocoding-service-output.htm":here}. 800 | Geographies for older years of data or low-accuracy geocode matches should be 801 | used with caution. In addition, we link schools' geographic locations to the 802 | geographic boundaries of school districts. 803 | 804 | {bf:"school nhgis census-1990"}: This endpoint contains geographic variables 805 | corresponding to 1990 Census geographies for each school in the CCD directory. 806 | Geographies are merged on by latitude and longitude when available; when 807 | unavailable, latitudes and longitudes were obtained from address information 808 | using Urban's geocoder. The geocoder uses StreetMap Premium from Esri to 809 | perform accurate offline geocoding. Geocode accuracy variables indicate the 810 | degree of precision of this geocoding. Additional information on the match 811 | accuracy can be found {browse "https://developers.arcgis.com/rest/geocode/api-reference/geocoding-service-output.htm":here}. 812 | Geographies for older years of data or low-accuracy geocode matches should be 813 | used with caution. In addition, we link schools' geographic locations to the 814 | geographic boundaries of school districts. 815 | 816 | {bf:"school meps"}: This endpoint contains school-level poverty information including 817 | original and modified MEPS, standard errors, and annual weighted percentiles. 818 | 819 | -------------------------------------------------------------------------------- /helper-programs/api-to-stata-examples.py: -------------------------------------------------------------------------------- 1 | program educationdata 2 | version 11.0 3 | mata: if (findfile("libjson.mlib") != "") {} else stata("ssc install libjson"); 4 | mata: if (libjson::checkVersion((1,0,2))) {} else printf("{err: The JSON library version is not compatible with this command and so will likely fail. Please update libjson by running the following: ado uninstall libjson, then run: ssc install libjson}\n"); 5 | syntax using/ , [SUBset(string)] [COLumns(string)] [SUMMARIES(string)] [CLEAR] [METAdata] [STAGING] [CSV] [CACHE] [DEBUG] 6 | mata: dummy=getalldata("`using'", "`columns'", "`subset'", "`summaries'", strlen("`clear'"),strlen("`metadata'"),strlen("`staging'"),strlen("`csv'"),strlen("`cache'"),strlen("`debug'")); 7 | end 8 | 9 | mata 10 | 11 | // Beginning section above and some structure borrowed from insheetjson - thanks!; 12 | // Helper function that returns results node 13 | pointer (class libjson scalar) scalar getresults(string scalar url){ 14 | pointer (class libjson scalar) scalar root 15 | pointer (class libjson scalar) scalar result 16 | if (st_global("debug_ind") == "1") printf(urlmode(url) + "\n") 17 | root = libjson::webcall(urlmode(url) ,""); 18 | result = root->getNode("results") 19 | return(result) 20 | } 21 | 22 | // Helper function that returns matrix of variable information from API 23 | string matrix getvarinfo(string scalar url){ 24 | pointer (class libjson scalar) scalar res 25 | pointer (class libjson scalar) scalar trow 26 | pointer (class libjson scalar) scalar result 27 | string scalar tempvar 28 | string scalar tempind 29 | real scalar numrows 30 | real scalar numrowscheck 31 | res = getresults(url) 32 | numrows = res->arrayLength() 33 | varinfo = J(6,numrows,"") 34 | for (r=1; r<=numrows; r++) { 35 | trow = res->getArrayValue(r) 36 | varinfo[1,r] = trow->getString("variable", "") 37 | varinfo[2,r] = trow->getString("label", "") 38 | tempvar = trow->getString("data_type", "") 39 | if (tempvar == "integer" || tempvar == "float") varinfo[3,r] = "double" 40 | else if (tempvar == "string"){ 41 | varinfo[3,r] = "str" + trow->getString("string_length", "") 42 | } 43 | varinfo[5,r] = trow->getString("format", "") 44 | if (varinfo[5,r] != varinfo[1,r] && varinfo[5,r] != "string" && varinfo[5,r] != "numeric"){ 45 | result = getresults(st_global("base_url") + "/api/v1/api-values/?format_name=" + varinfo[5,r]) 46 | } 47 | else result = getresults(st_global("base_url") + "/api/v1/api-values/?format_name=" + varinfo[1,r]) 48 | numrowscheck = result->arrayLength() 49 | if (numrowscheck == 0) varinfo[4,r] = "0" 50 | else varinfo[4,r] = "1" 51 | varinfo[6,r] = trow->getString("is_filter", "") 52 | } 53 | return(varinfo) 54 | } 55 | 56 | 57 | // Parse metadata to get api endpoint strings, years, and required selectors from enpoint URL 58 | string matrix endpointstrings(){ 59 | pointer (class libjson scalar) scalar res1 60 | pointer (class libjson scalar) scalar trow 61 | string matrix endpointdata 62 | res1 = getresults(st_global("base_url") + "/api/v1/api-endpoints/") 63 | numrows = res1->arrayLength() 64 | endpointdata = J(3,numrows,"") 65 | for (r=1; r<=numrows; r++){ 66 | trow = res1->getArrayValue(r) 67 | endpointdata[1,r] = trow->getString("endpoint_id", "") 68 | endpointdata[2,r] = trow->getString("endpoint_url", "") 69 | endpointdata[3,r] = trow->getString("years_available", "") 70 | } 71 | return(endpointdata) 72 | } 73 | 74 | // Helper function to parse url endpoint strings into required variables 75 | string rowvector parseurls(string scalar url, string scalar typevar){ 76 | string rowvector splits 77 | string scalar splitr 78 | string scalar keepvars 79 | real scalar stopme 80 | url = subinstr(url, "/api/v1/", "") 81 | t = tokeninit("/") 82 | s = tokenset(t, url) 83 | splits = tokengetall(t) 84 | keepvars = "" 85 | if (typevar == "optional"){ 86 | for (r=1; r<=length(splits); r++){ 87 | splitr = subinstr(subinstr(splits[r], "{", ""), "}", "") 88 | if (splitr != splits[r]){ 89 | if (keepvars == "") keepvars = keepvars + splitr 90 | else keepvars = keepvars + "," + splitr 91 | } 92 | } 93 | } 94 | else{ 95 | for (r=1; r<=length(splits); r++){ 96 | splitr = subinstr(subinstr(splits[r], "{", ""), "}", "") 97 | if (splitr == splits[r]){ 98 | if (keepvars == "") keepvars = keepvars + splits[r] 99 | else keepvars = keepvars + "," + splits[r] 100 | } 101 | } 102 | } 103 | t = tokeninit(",") 104 | s = tokenset(t, keepvars) 105 | return(tokengetall(t)) 106 | } 107 | 108 | // Helper function to parse required data as inputs, check validity, and return endpoint chosen 109 | real scalar validendpoints(string scalar eps){ 110 | string matrix endpoints 111 | string rowvector epsind 112 | string rowvector parsedurls 113 | real scalar check 114 | real scalar permcheck 115 | endpoints = endpointstrings() 116 | epsind = tokens(eps) 117 | permcheck = 0 118 | for (c=1; c<=length(endpoints[2,.]); c++){ 119 | parsedurls = parseurls(endpoints[2,c], "required") 120 | if (length(parsedurls) == length(epsind)){ 121 | check = 1 122 | for (r=1; r<=length(epsind); r++){ 123 | if (epsind[r] == parsedurls[r]) check = check * 1 124 | else check = check * 0 125 | } 126 | if (check == 1) permcheck = c 127 | } 128 | } 129 | return(permcheck) 130 | } 131 | 132 | // Helper function to parse years available for endpoint 133 | string rowvector parseyears(real scalar matid){ 134 | string matrix endpoints 135 | string rowvector getit 136 | string rowvector getit2 137 | string rowvector returnyears 138 | string scalar yrs 139 | string scalar yrstring 140 | string scalar yrstring2 141 | endpoints = endpointstrings() 142 | yrs = endpoints[3,matid] 143 | if (subinstr(subinstr(yrs, ",", ""), "–", "") == yrs){ 144 | returnyears = (yrs) 145 | } 146 | else if (subinstr(yrs, "and", "") != yrs){ 147 | yrs = subinstr(subinstr(yrs, " ", ""), "and", "") 148 | t = tokeninit(",") 149 | s = tokenset(t, yrs) 150 | getit = tokengetall(t) 151 | yrstring = subinstr(yrs, "," + getit[length(getit)], "") 152 | t = tokeninit("–") 153 | s = tokenset(t, getit[length(getit)]) 154 | getit = tokengetall(t) 155 | for (y=strtoreal(getit[1]); y<=strtoreal(getit[2]); y++){ 156 | yrstring = yrstring + "," + strofreal(y) 157 | } 158 | t = tokeninit(",") 159 | s = tokenset(t, yrstring) 160 | returnyears = tokengetall(t) 161 | } 162 | else if (subinstr(yrs, ",", "") != yrs){ 163 | t = tokeninit(", ") 164 | s = tokenset(t, yrs) 165 | getit = tokengetall(t) 166 | yrstring = "" 167 | for (c=1; c<=length(getit); c++){ 168 | if (subinstr(getit[c], "–", "") != getit[c]){ 169 | t = tokeninit("–") 170 | s = tokenset(t, getit[c]) 171 | getit2 = tokengetall(t) 172 | if (c == 1) yrstring = getit2[c] 173 | else yrstring = yrstring + "," + getit2[1] 174 | for (y=strtoreal(getit2[1])+1; y<=strtoreal(getit2[2]); y++){ 175 | yrstring = yrstring + "," + strofreal(y) 176 | } 177 | } 178 | else{ 179 | if (c == 1) yrstring = getit[c] 180 | else yrstring = yrstring + "," + getit[c] 181 | } 182 | } 183 | t = tokeninit(",") 184 | s = tokenset(t, yrstring) 185 | returnyears = tokengetall(t) 186 | } 187 | else { 188 | t = tokeninit("–") 189 | s = tokenset(t, yrs) 190 | getit = tokengetall(t) 191 | yrstring = getit[1] 192 | for (y=strtoreal(getit[1])+1; y<=strtoreal(getit[2]); y++){ 193 | yrstring = yrstring + "," + strofreal(y) 194 | } 195 | t = tokeninit(",") 196 | s = tokenset(t, yrstring) 197 | returnyears = tokengetall(t) 198 | } 199 | return(returnyears) 200 | } 201 | 202 | // Helper function to validate a single option against the list of valid options 203 | real scalar isvalid(string scalar test, string rowvector vopts){ 204 | real scalar isopt 205 | isopt = 0 206 | for (c = 1; c<=length(vopts); c++){ 207 | if (vopts[c] == test) return(1) 208 | } 209 | return(0) 210 | } 211 | 212 | // Helper function to get the position of a string in a list 213 | real scalar stringpos(string scalar test, string rowvector tlist){ 214 | for (r = 1; r<=length(tlist); r++){ 215 | if (test == tlist[r]) return(r) 216 | } 217 | return(0) 218 | } 219 | 220 | // Helper function to check if item is in a list 221 | real scalar iteminlist(string scalar i, string rowvector tlist){ 222 | real scalar isinlist 223 | isinlist = 0 224 | for (r=1; r<=length(tlist); r++){ 225 | if (i == tlist[r]) isinlist = 1 226 | } 227 | return(isinlist) 228 | } 229 | 230 | // Helper function to check number of item in list 231 | real scalar iteminlistnum(string scalar i, string rowvector tlist){ 232 | real scalar isinlist 233 | isinlist = 0 234 | for (r=1; r<=length(tlist); r++){ 235 | if (i == tlist[r]) isinlist = r 236 | } 237 | return(isinlist) 238 | } 239 | 240 | // Helper function to add mode logging to URLs for API tracking 241 | string scalar urlmode(string scalar url3){ 242 | string scalar strnum 243 | if (strpos(url3, "mode=stata") == 0){ 244 | if (subinstr(url3, "?", "") == url3) url3 = url3 + "?mode=stata" 245 | else url3 = url3 + "&mode=stata" 246 | } 247 | strnum = strofreal(round(runiform(1,1)*100000)) 248 | if (st_global("cc") == "1") url3 = url3 + "&a=" + strnum 249 | return(url3) 250 | } 251 | 252 | // Helper function to validate against list 253 | string rowvector checkinglist(string rowvector alist, string scalar tocheck, string rowvector yearlist){ 254 | string rowvector tochecklist 255 | string rowvector toaddlist 256 | string rowvector validlist 257 | string scalar returnlist 258 | if (tocheck == "grade") { 259 | tochecklist = ("grade-pk","grade-k","grade-1","grade-2","grade-3","grade-4","grade-5","grade-6","grade-7","grade-8","grade-9","grade-10","grade-11","grade-12","grade-13","grade-14","grade-15","grade-99","grade-999") 260 | toaddlist = ("pk","k","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","99","999") 261 | } 262 | else if (tocheck == "grade_edfacts") { 263 | tochecklist = ("grade-3","grade-4","grade-5","grade-6","grade-7","grade-8","grade-9","grade-99") 264 | toaddlist = ("3","4","5","6","7","8","9","99") 265 | } 266 | else if (tocheck == "level_of_study") tochecklist = ("undergraduate","graduate","first-professional","post-baccalaureate","1","2","3","4") 267 | else if (tocheck == "fed_aid_type") tochecklist = ("fed","sub-stafford","no-pell-stafford","1","2","3") 268 | else if (tocheck == "year") tochecklist = yearlist 269 | else return(alist) 270 | for (c=1; c<=length(alist); c++){ 271 | if (iteminlist(alist[c],tochecklist) == 0) { 272 | if ((tocheck == "grade" || tocheck == "grade_edfacts") && (alist[c] == "-1" || alist[c] == "0")){ 273 | if (alist[c] == "-1") alist[c] = "grade-pk" 274 | else alist[c] = "grade-k" 275 | } 276 | else if ((tocheck != "grade" && tocheck != "grade_edfacts") || iteminlist(alist[c],toaddlist) == 0){ 277 | if (tocheck == "grade" || tocheck == "grade_edfacts") validlist = toaddlist 278 | else validlist = tochecklist 279 | for (r=1; r<=length(validlist); r++){ 280 | if (r == 1) returnlist = validlist[r] 281 | else returnlist = returnlist + ", " + validlist[r] 282 | } 283 | return(("Error",alist[c],returnlist)) 284 | } 285 | else alist[c] = "grade-" + alist[c] 286 | } 287 | } 288 | return(alist) 289 | } 290 | 291 | // Helper function to parse optional data as inputs, taking a single optional data argument, check validity, and return all chosen options 292 | string rowvector validoptions(string scalar subset1, real scalar epid){ 293 | string matrix endpoints 294 | string rowvector grades 295 | string rowvector levels 296 | string rowvector fedaids 297 | string rowvector vopts 298 | string rowvector getit 299 | string rowvector tlev 300 | string rowvector years 301 | string rowvector checklist 302 | string scalar getstring 303 | string scalar tempadd 304 | string scalar keepg1 305 | real scalar isopt1 306 | real scalar spos1 307 | real scalar spos2 308 | real scalar isgrade 309 | endpoints = endpointstrings() 310 | t = tokeninit("=") 311 | s = tokenset(t, subset1) 312 | getit = tokengetall(t) 313 | vopts = parseurls(endpoints[2,epid], "optional") 314 | isopt1 = isvalid(getit[1], vopts) 315 | if (isopt1 == 1){ 316 | grades = ("pk","k","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","99","999") 317 | grades_alt = ("-1","0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","99","999") 318 | gradesed = ("3","4","5","6","7","8","9","99") 319 | gradesed_alt = ("3","4","5","6","7","8","9","99") 320 | levels = ("undergraduate","graduate","first-professional","post-baccalaureate") 321 | fedaids = ("fed","sub-stafford","no-pell-stafford") 322 | if (getit[1] == "year") years = parseyears(epid) 323 | else years = ("fake","data") 324 | if (getit[2] != "alldata"){ 325 | if (subinstr(subinstr(getit[2], ",", ""), ":", "") == getit[2]){ 326 | checklist = checkinglist((getit[2]), getit[1], years) 327 | if (checklist[1] == "Error") return(("Invalid Option: " + checklist[2] + " in " + getit[1] + "\nValid options are: " + checklist[3], "")) 328 | else return(checklist) 329 | } 330 | else if (subinstr(getit[2], ",", "") != getit[2]){ 331 | t = tokeninit(",") 332 | s = tokenset(t, getit[2]) 333 | checklist = checkinglist(tokengetall(t), getit[1], years) 334 | if (checklist[1] == "Error") return(("Invalid Option: " + checklist[2] + " in " + getit[1] + "\nValid options are: " + checklist[3], "")) 335 | else return(checklist) 336 | } 337 | else{ 338 | tempadd = "" 339 | isgrade = 0 340 | if (getit[1] == "year") tlev = years 341 | else if (getit[1] == "grade"){ 342 | tlev = grades 343 | tempadd = "grade-" 344 | isgrade = 1 345 | } 346 | else if (getit[1] == "grade_edfacts"){ 347 | tlev = gradesed 348 | tempadd = "grade-" 349 | isgrade = 2 350 | } 351 | else if (getit[1] == "level_of_study") tlev = levels 352 | else if (getit[1] == "fed_aid_type") tlev = fedaids 353 | keepg1 = getit[1] 354 | t = tokeninit(":") 355 | s = tokenset(t, getit[2]) 356 | getit = tokengetall(t) 357 | if (isvalid(getit[1], tlev) == 1 && isvalid(getit[2], tlev) == 1){ 358 | spos1 = stringpos(getit[1], tlev) 359 | spos2 = stringpos(getit[2], tlev) 360 | getstring = tempadd + tlev[spos1] 361 | for (c=spos1 + 1; c<=spos2; c++){ 362 | getstring = getstring + "," + tempadd + tlev[c] 363 | } 364 | t = tokeninit(",") 365 | s = tokenset(t, getstring) 366 | checklist = checkinglist(tokengetall(t), getit[1], years) 367 | if (checklist[1] == "Error") return(("Invalid Option: " + checklist[2] + " in " + getit[1] + "\nValid options are: " + checklist[3], "")) 368 | else return(checklist) 369 | } 370 | else if (isgrade == 1 && (isvalid(getit[1], grades_alt) == 1 && isvalid(getit[2], grades_alt) == 1)){ 371 | spos1 = stringpos(getit[1], grades_alt) 372 | spos2 = stringpos(getit[2], grades_alt) 373 | getstring = tempadd + tlev[spos1] 374 | for (c=spos1 + 1; c<=spos2; c++){ 375 | getstring = getstring + "," + tempadd + tlev[c] 376 | } 377 | t = tokeninit(",") 378 | s = tokenset(t, getstring) 379 | checklist = checkinglist(tokengetall(t), getit[1], years) 380 | if (checklist[1] == "Error") return(("Invalid Option: " + checklist[2] + " in " + getit[1] + "\nValid options are: " + checklist[3], "")) 381 | else return(checklist) 382 | } 383 | else if (isgrade == 2 && (isvalid(getit[1], gradesed_alt) == 1 && isvalid(getit[2], gradesed_alt) == 1)){ 384 | spos1 = stringpos(getit[1], gradesed_alt) 385 | spos2 = stringpos(getit[2], gradesed_alt) 386 | getstring = tempadd + tlev[spos1] 387 | for (c=spos1 + 1; c<=spos2; c++){ 388 | getstring = getstring + "," + tempadd + tlev[c] 389 | } 390 | t = tokeninit(",") 391 | s = tokenset(t, getstring) 392 | checklist = checkinglist(tokengetall(t), getit[1], years) 393 | if (checklist[1] == "Error") return(("Invalid Option: " + checklist[2] + " in " + getit[1] + "\nValid options are: " + checklist[3], "")) 394 | else return(checklist) 395 | } 396 | else { 397 | if (isvalid(getit[1], tlev) == 0){ 398 | checklist = checkinglist((getit[1]),keepg1, years) 399 | return(("Invalid Option selection: " + getit[1] + " in " + keepg1 + "\nValid options are: " + checklist[3], "")) 400 | } 401 | else{ 402 | checklist = checkinglist((getit[2]),keepg1, years) 403 | return(("Invalid Option selection: " + getit[2] + " in " + keepg1 + "\nValid options are: " + checklist[3], "")) 404 | } 405 | } 406 | } 407 | } 408 | else{ 409 | tempadd = "" 410 | if (getit[1] == "year") tlev = years 411 | else if (getit[1] == "grade"){ 412 | tlev = grades 413 | tempadd = "grade-" 414 | } 415 | else if (getit[1] == "grade_edfacts"){ 416 | tlev = gradesed 417 | tempadd = "grade-" 418 | } 419 | else if (getit[1] == "level_of_study") tlev = levels 420 | else if (getit[1] == "fed_aid_type") tlev = fedaids 421 | getstring = tempadd + tlev[1] 422 | for (c=2; c<=length(tlev); c++){ 423 | getstring = getstring + "," + tempadd + tlev[c] 424 | } 425 | t = tokeninit(",") 426 | s = tokenset(t, getstring) 427 | return(tokengetall(t)) 428 | } 429 | } 430 | else return(("Invalid Option: " + getit[1])) 431 | } 432 | 433 | // Helper function that returns string and real/integer variable names 434 | string rowvector getvartypes(string scalar typ, string matrix varinfo){ 435 | real scalar counting 436 | real scalar counter1 437 | string scalar varnametypes 438 | real scalar numvars 439 | numvars = length(varinfo[1,.]) 440 | counting = 0 441 | for (c=1; c<=numvars; c++){ 442 | if (subinstr(varinfo[3,c], "str", "") != varinfo[3,c]){ 443 | if (typ == "string") counting = counting + 1 444 | } 445 | else { 446 | if (typ != "string") counting = counting + 1 447 | } 448 | } 449 | varnametypes = J(1,counting,"") 450 | counter1 = 1 451 | for (c=1; c<=numvars; c++){ 452 | if (subinstr(varinfo[3,c], "str", "") != varinfo[3,c]){ 453 | if (typ == "string") { 454 | varnametypes[1,counter1] = varinfo[1,c] 455 | counter1 = counter1 + 1 456 | } 457 | } 458 | else { 459 | if (typ != "string") { 460 | varnametypes[1,counter1] = varinfo[1,c] 461 | counter1 = counter1 + 1 462 | } 463 | } 464 | } 465 | return(varnametypes) 466 | } 467 | 468 | // Helper function to get variable value definitions 469 | string matrix getvardefs(string scalar var1, string scalar format1){ 470 | pointer (class libjson scalar) scalar result 471 | pointer (class libjson scalar) scalar trow 472 | string matrix vardefs 473 | string rowvector tokenstemp 474 | string scalar tempvar 475 | string scalar tempstring 476 | real scalar numrows 477 | real scalar startvar 478 | if (format1 != var1 && format1 != "string" && format1 != "numeric"){ 479 | result = getresults(st_global("base_url") + "/api/v1/api-values/?format_name=" + format1) 480 | } 481 | else result = getresults(st_global("base_url") + "/api/v1/api-values/?format_name=" + var1) 482 | numrows = result->arrayLength() 483 | vardefs = J(2,numrows,"") 484 | for (r=1; r<=numrows; r++){ 485 | trow = result->getArrayValue(r) 486 | vardefs[1,r] = trow->getString("code", "") 487 | tempvar = trow->getString("code_label", "") 488 | tokenstemp = tokens(tempvar, "-") 489 | if (tokenstemp[1] == "-") startvar = 4 490 | else startvar = 3 491 | tempstring = "" 492 | for (i=startvar; i<=length(tokenstemp); i++){ 493 | tempstring = tempstring + tokenstemp[i] 494 | if (i != length(tokenstemp)) tempstring = tempstring + " " 495 | } 496 | vardefs[2,r] = subinstr(tempstring, "–", "–") 497 | vardefs[2,r] = subinstr(vardefs[2,r], "—", "—") 498 | vardefs[2,r] = subinstr(vardefs[2,r], " - ", "-") 499 | vardefs[2,r] = subinstr(vardefs[2,r], " – ", "–") 500 | vardefs[2,r] = subinstr(vardefs[2,r], " — ", "—") 501 | } 502 | return(vardefs) 503 | } 504 | 505 | // Get table just gets data we need for one table, this appends results to the stata dataset 506 | string scalar gettable(string scalar url, real scalar startpos, string matrix varinfo){ 507 | pointer (class libjson scalar) scalar root 508 | pointer (class libjson scalar) scalar result 509 | pointer (class libjson scalar) scalar trow 510 | string matrix sdata 511 | string rowvector varnames 512 | string scalar nextpage 513 | string scalar tval 514 | real matrix rdata 515 | real scalar numrows 516 | real scalar endpos 517 | if (st_global("debug_ind") == "1") printf(urlmode(url) + "\n") 518 | root = libjson::webcall(urlmode(url) ,""); 519 | result = root->getNode("results") 520 | numrows = result->arrayLength() 521 | varinfotemp = J(6,length(varinfo[1,.]),"") 522 | for (c=1; c<=length(varinfo[1,.]); c++){ 523 | varinfotemp[1,c] = strlower(varinfo[1,c]) 524 | varinfotemp[3,c] = varinfo[3,c] 525 | } 526 | if (numrows > 0){ 527 | st_addobs(numrows) 528 | endpos = startpos + numrows - 1 529 | svarnames = getvartypes("string", varinfo) 530 | rvarnames = getvartypes("other", varinfo) 531 | svarnamestemp = getvartypes("string", varinfotemp) 532 | rvarnamestemp = getvartypes("other", varinfotemp) 533 | sdata = J(numrows,length(svarnames),"") 534 | rdata = J(numrows,length(rvarnames),.) 535 | for (r=1; r<=numrows; r++) { 536 | trow = result->getArrayValue(r); 537 | for(c=1; c<=length(svarnames); c++) { 538 | tval = trow->getString(svarnames[c],""); 539 | if (tval == "null") tval = "" 540 | if (tval == `"""' + `"""') tval = "" 541 | sdata[r,c] = tval 542 | } 543 | for(c=1; c<=length(rvarnames); c++) { 544 | tval = trow->getString(rvarnames[c],""); 545 | if (tval == "null") rdata[r,c] = . 546 | else rdata[r,c] = strtoreal(tval) 547 | } 548 | } 549 | if (length(svarnames) > 0){ 550 | st_sview(SV,(startpos..endpos)',svarnamestemp) 551 | SV[.,.] = sdata[.,.] 552 | } 553 | if (length(rvarnames) > 0){ 554 | st_view(V,(startpos..endpos)',rvarnamestemp) 555 | V[.,.] = rdata[.,.] 556 | } 557 | nextpage = root->getString("next", "") 558 | return(nextpage) 559 | } 560 | else return("null") 561 | } 562 | 563 | 564 | string scalar gettable_summaries(string scalar url, real scalar startpos, string matrix varinfo){ 565 | pointer (class libjson scalar) scalar root 566 | pointer (class libjson scalar) scalar result 567 | pointer (class libjson scalar) scalar trow 568 | string matrix sdata 569 | string rowvector varnames 570 | string scalar nextpage 571 | string scalar tval 572 | real matrix rdata 573 | real scalar numrows 574 | real scalar endpos 575 | if (st_global("debug_ind") == "1") printf(urlmode(url) + "\n") 576 | root = libjson::webcall(url ,""); 577 | result = root->getNode("results") 578 | numrows = result->arrayLength() 579 | varinfotemp = J(6,length(varinfo[1,.]),"") 580 | for (c=1; c<=length(varinfo[1,.]); c++){ 581 | varinfotemp[1,c] = strlower(varinfo[1,c]) 582 | varinfotemp[3,c] = varinfo[3,c] 583 | } 584 | if (numrows > 0){ 585 | st_addobs(numrows) 586 | endpos = startpos + numrows - 1 587 | svarnames = getvartypes("string", varinfo) 588 | rvarnames = getvartypes("other", varinfo) 589 | svarnamestemp = getvartypes("string", varinfotemp) 590 | rvarnamestemp = getvartypes("other", varinfotemp) 591 | sdata = J(numrows,length(svarnames),"") 592 | rdata = J(numrows,length(rvarnames),.) 593 | for (r=1; r<=numrows; r++) { 594 | trow = result->getArrayValue(r); 595 | for(c=1; c<=length(svarnames); c++) { 596 | tval = trow->getString(svarnames[c],""); 597 | if (tval == "null") tval = "" 598 | if (tval == `"""' + `"""') tval = "" 599 | sdata[r,c] = tval 600 | } 601 | for(c=1; c<=length(rvarnames); c++) { 602 | tval = trow->getString(rvarnames[c],""); 603 | if (tval == "null") rdata[r,c] = . 604 | else rdata[r,c] = strtoreal(tval) 605 | } 606 | } 607 | if (length(svarnames) > 0){ 608 | st_sview(SV,(startpos..endpos)',svarnamestemp) 609 | SV[.,.] = sdata[.,.] 610 | } 611 | if (length(rvarnames) > 0){ 612 | st_view(V,(startpos..endpos)',rvarnamestemp) 613 | V[.,.] = rdata[.,.] 614 | } 615 | nextpage = root->getString("next", "") 616 | return(nextpage) 617 | } 618 | else return("null") 619 | } 620 | 621 | 622 | // Helper function to create query strings ?var=x for all potential subset combinations 623 | string scalar getquerystrings(string scalar additions){ 624 | string rowvector result1 625 | string rowvector result2 626 | string rowvector result3 627 | string scalar staticstring 628 | string scalar dynamicstring 629 | real scalar countstatic 630 | if (additions == "") return("") 631 | t = tokeninit(";") 632 | s = tokenset(t, additions) 633 | result1 = tokengetall(t) 634 | countstatic = 1 635 | staticstring = "" 636 | for (c=1; c<=length(result1); c++){ 637 | t = tokeninit("=") 638 | s = tokenset(t, result1[c]) 639 | result2 = tokengetall(t) 640 | if (subinstr(result2[2], ":", "") == result2[2]){ 641 | if (countstatic == 1) staticstring = staticstring + result1[c] 642 | else staticstring = staticstring + "&" + result1[c] 643 | countstatic = countstatic + 1 644 | } 645 | else{ 646 | t = tokeninit(":") 647 | s = tokenset(t, result2[2]) 648 | result3 = tokengetall(t) 649 | dynamicstring = "" 650 | for (r=strtoreal(result3[1]); r<=strtoreal(result3[2]); r++){ 651 | if (r == strtoreal(result3[1])) dynamicstring = dynamicstring + result2[1] + "=" + strofreal(r) 652 | else dynamicstring = dynamicstring + "," + strofreal(r) 653 | } 654 | if (countstatic == 1) staticstring = staticstring + dynamicstring 655 | else staticstring = staticstring + "&" + result1[c] 656 | countstatic = countstatic + 1 657 | } 658 | } 659 | return("?" + staticstring) 660 | } 661 | 662 | // Helper function to create dataset 663 | real scalar createdataset(string scalar eid){ 664 | string matrix varinfo 665 | string matrix vardef 666 | string scalar labeldef 667 | string scalar labelshort 668 | varinfo = getvarinfo(st_global("base_url") + "/api/v1/api-endpoint-varlist/?endpoint_id=" + eid) 669 | for (c=1; c<=length(varinfo[1,.]); c++){ 670 | varinfo[1,c] = strlower(varinfo[1,c]) 671 | } 672 | temp1 = st_addvar(varinfo[3,.],varinfo[1,.]) 673 | for (c=1; c<=length(varinfo[1,.]); c++){ 674 | varinfo[2,c] = subinstr(varinfo[2,c], "—", "—") 675 | varinfo[2,c] = subinstr(varinfo[2,c], "–", "–") 676 | stata("qui label var " + varinfo[1,c] + " " + `"""' + varinfo[2,c] + `"""') 677 | if (strlen(varinfo[1,c]) > 30) labelshort = substr(varinfo[1,c], 1, 30) + "df" 678 | else labelshort = varinfo[1,c] + "df" 679 | if (varinfo[4,c] == "1"){ 680 | vardef = getvardefs(varinfo[1,c], varinfo[5,c]) 681 | labeldef = "qui label define " + labelshort + " " 682 | for (r=1; r<=length(vardef[1,.]); r++){ 683 | labeldef = labeldef + vardef[1,r] + " " + `"""' + vardef[2,r] + `"""' 684 | if (r != length(vardef[1,.])) labeldef = labeldef + " " 685 | } 686 | stata(labeldef) 687 | stata("qui label values " + varinfo[1,c] + " " + labelshort) 688 | } 689 | else if (varinfo[3,c] == "float"){ 690 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' 691 | stata(labeldef) 692 | stata("qui label values " + varinfo[1,c] + " " + labelshort) 693 | } 694 | else if (varinfo[3,c] == "double"){ 695 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' 696 | stata(labeldef) 697 | stata("qui label values " + varinfo[1,c] + " " + labelshort + ", nofix") 698 | } 699 | } 700 | return(1) 701 | } 702 | 703 | 704 | // Helper function to create dataset 705 | real scalar createdataset_summaries_ep(string matrix varinfo){ 706 | string matrix vardef 707 | string scalar labeldef 708 | string scalar labelshort 709 | for (c=1; c<=length(varinfo[1,.]); c++){ 710 | varinfo[1,c] = strlower(varinfo[1,c]) 711 | } 712 | temp1 = st_addvar(varinfo[3,.],varinfo[1,.]) 713 | for (c=1; c<=length(varinfo[1,.]); c++){ 714 | varinfo[2,c] = subinstr(varinfo[2,c], "—", "—") 715 | varinfo[2,c] = subinstr(varinfo[2,c], "–", "–") 716 | stata("qui label var " + varinfo[1,c] + " " + `"""' + varinfo[2,c] + `"""') 717 | if (strlen(varinfo[1,c]) > 30) labelshort = substr(varinfo[1,c], 1, 30) + "df" 718 | else labelshort = varinfo[1,c] + "df" 719 | if (varinfo[4,c] == "1"){ 720 | vardef = getvardefs(varinfo[1,c], varinfo[5,c]) 721 | labeldef = "qui label define " + labelshort + " " 722 | for (r=1; r<=length(vardef[1,.]); r++){ 723 | labeldef = labeldef + vardef[1,r] + " " + `"""' + vardef[2,r] + `"""' 724 | if (r != length(vardef[1,.])) labeldef = labeldef + " " 725 | } 726 | stata(labeldef) 727 | stata("qui label values " + varinfo[1,c] + " " + labelshort) 728 | } 729 | else if (varinfo[3,c] == "float"){ 730 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' 731 | stata(labeldef) 732 | stata("qui label values " + varinfo[1,c] + " " + labelshort) 733 | } 734 | else if (varinfo[3,c] == "double"){ 735 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' 736 | stata(labeldef) 737 | stata("qui label values " + varinfo[1,c] + " " + labelshort + ", nofix") 738 | } 739 | } 740 | return(1) 741 | } 742 | 743 | // Helper function to translate short dataset name to full name 744 | string scalar shorttolongname(string scalar shortname, string matrix eps){ 745 | string rowvector voptions 746 | string rowvector result1 747 | string scalar toreturn 748 | result1 = tokens(shortname) 749 | if (length(result1) < 2) return("Error1") 750 | if (result1[1] == "school") st1 = "schools" 751 | else if (result1[1] == "district") st1 = "school-districts" 752 | else if (result1[1] == "college") st1 = "college-university" 753 | else return("Error2") 754 | result1[1] = st1 755 | toreturn = "" 756 | for (r=1; r<=length(result1); r++){ 757 | if (r == 1) toreturn = toreturn + result1[r] 758 | else toreturn = toreturn + " " + result1[r] 759 | } 760 | return(toreturn) 761 | } 762 | 763 | // Helper function to reformat summaries subcommand to endpoint URLs 764 | string scalar getsummariesurl(string scalar dataoptions, string scalar summaries_cmd){ 765 | string scalar ep_url 766 | string scalar agg_method 767 | string scalar var_to_agg 768 | string scalar agg_by 769 | string rowvector token_cmd 770 | 771 | ep_url = "/api/v1/" 772 | for (c=1; c<=length(tokens(dataoptions)); c++){ 773 | ep_url = ep_url + tokens(dataoptions)[c] + "/" 774 | } 775 | ep_url = ep_url + "summaries/" 776 | 777 | token_cmd = tokens(summaries_cmd) 778 | agg_method = token_cmd[1] 779 | var_to_agg = token_cmd[2] 780 | agg_by = token_cmd[4] 781 | ep_url = ep_url + "?stat=" + agg_method + "&by=" + agg_by + "&var=" + var_to_agg 782 | return(ep_url) 783 | } 784 | 785 | // Helper function for time taken 786 | string scalar timeit(real scalar timeper){ 787 | string scalar timetaken 788 | if (hhC(timeper) == 0 && mmC(timeper) == 0) timetaken = "less than one minute" 789 | else if (hhC(timeper) == 0) timetaken = strofreal(mmC(timeper)) + " minute(s)" 790 | else timetaken = strofreal(hhC(timeper)) + " hour(s) and " + strofreal(mmC(timeper)) + " minute(s)" 791 | return(timetaken) 792 | } 793 | 794 | // Provide CSV download with numbered list of columns that should be strings 795 | string scalar numliststr(string matrix varinfo2){ 796 | string rowvector varnames 797 | string scalar nliststr 798 | real scalar listnum 799 | varnames = st_varname((1..st_nvar())) 800 | nliststr = "" 801 | for (c=1; c<=length(varinfo2[1,.]); c++){ 802 | if (varinfo2[5,c] == "string"){ 803 | listnum = iteminlistnum(varinfo2[1,c], varnames) 804 | if (nliststr == "") nliststr = strofreal(listnum) 805 | else nliststr = nliststr + " " + strofreal(listnum) 806 | } 807 | } 808 | return(nliststr) 809 | } 810 | 811 | // Label CSV dataset appropriately when it is loaded in 812 | real scalar labelcsv(string matrix varinfo2, real scalar init1){ 813 | string matrix vardef 814 | string scalar labeldef 815 | string scalar labelshort 816 | for (c=1; c<=length(varinfo2[1,.]); c++){ 817 | stata("qui label var " + varinfo2[1,c] + " " + `"""' + varinfo2[2,c] + `"""') 818 | if (strlen(varinfo2[1,c]) > 30) labelshort = substr(varinfo2[1,c], 1, 30) + "df" 819 | else labelshort = varinfo2[1,c] + "df" 820 | if (varinfo2[4,c] == "1"){ 821 | if (init1 == 1){ 822 | vardef = getvardefs(varinfo2[1,c], varinfo2[5,c]) 823 | labeldef = "qui label define " + labelshort + " " 824 | for (r=1; r<=length(vardef[1,.]); r++){ 825 | labeldef = labeldef + vardef[1,r] + " " + `"""' + vardef[2,r] + `"""' 826 | if (r != length(vardef[1,.])) labeldef = labeldef + " " 827 | } 828 | stata(labeldef) 829 | } 830 | stata("qui label values " + varinfo2[1,c] + " " + labelshort) 831 | } 832 | else if (varinfo2[3,c] == "float"){ 833 | if (init1 == 1){ 834 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' 835 | stata(labeldef) 836 | } 837 | stata("qui label values " + varinfo2[1,c] + " " + labelshort) 838 | } 839 | else if (varinfo2[3,c] == "double"){ 840 | if (init1 == 1){ 841 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' 842 | stata(labeldef) 843 | } 844 | stata("qui label values " + varinfo2[1,c] + " " + labelshort + ", nofix") 845 | } 846 | } 847 | return(1) 848 | } 849 | 850 | // Correct grade list for subsetting CSV files 851 | string scalar correctgrade(string scalar vopt1){ 852 | if (vopt1 == "grade-pk" || vopt1 == "pk") return("-1") 853 | else if (vopt1 == "grade-k" || vopt1 == "k") return("0") 854 | else return(subinstr(vopt1, "grade-", "")) 855 | } 856 | 857 | // Subset and keep relevant variables - keep if inlist(varname,val1,val2,etc.) 858 | real scalar subsetkeep(string matrix spops2, string scalar querystring2, real scalar epid2, string scalar vlist2){ 859 | string rowvector spopsres 860 | string rowvector voptions 861 | string rowvector queryparams 862 | string rowvector queryparamvals 863 | string rowvector queryparamlist 864 | string scalar keepstate 865 | string scalar keepbase 866 | keepbase = "qui keep if inlist(" 867 | for (r=1; r<=length(spops2[1,.]); r++){ 868 | t = tokeninit("=") 869 | s = tokenset(t, spops2[2,r]) 870 | spopsres = tokengetall(t) 871 | keepstate = keepbase + spops2[1,r] 872 | if (spopsres[2] != "alldata"){ 873 | voptions = validoptions(spops2[2,r], epid2) 874 | for (c=1; c<=length(voptions); c++){ 875 | keepstate = keepstate + "," + correctgrade(voptions[c]) 876 | } 877 | keepstate = keepstate + ")" 878 | stata(keepstate) 879 | } 880 | } 881 | querystring2 = subinstr(querystring2, "?", "") 882 | t = tokeninit("&") 883 | s = tokenset(t, querystring2) 884 | queryparams = tokengetall(t) 885 | for (r=1; r<=length(queryparams); r++){ 886 | t = tokeninit("=") 887 | s = tokenset(t, queryparams[r]) 888 | queryparamvals = tokengetall(t) 889 | keepstate = keepbase + queryparamvals[1] + "," + queryparamvals[2] + ")" 890 | stata(keepstate) 891 | } 892 | if (vlist2 != "") stata("keep " + vlist2) 893 | return(1) 894 | } 895 | 896 | // Download Local CSV to parse column order, keep it 897 | real scalar copycsv(string scalar tval1, string scalar tbaseurl1){ 898 | stata("qui copy " + tbaseurl1 + subinstr(tval1, " ", "") + " temp_eddata_file_gen_012345.csv, replace") 899 | stata("qui import delimited temp_eddata_file_gen_012345.csv, clear rowrange(1:1)") 900 | return(1) 901 | } 902 | 903 | // Download from CSV instead 904 | real scalar downloadcsv(string scalar eid1, string matrix spops1, string scalar ds1, real scalar epid1, string matrix varinfo1, string scalar querystring1, string scalar vlist1){ 905 | pointer (class libjson scalar) scalar results1 906 | pointer (class libjson scalar) scalar trow 907 | string rowvector yearslist 908 | string rowvector relfiles 909 | string scalar tval 910 | string scalar tbaseurl 911 | string scalar addstrings 912 | string scalar liststrings 913 | string scalar relfilesstr 914 | real scalar dlyear 915 | real scalar numresults 916 | real scalar temp1 917 | real scalar temp2 918 | real scalar temp3 919 | real scalar countfiles 920 | tbaseurl = st_global("base_url") + "/csv/" + ds1 + "/" 921 | results1 = getresults(st_global("base_url") + "/api/v1/api-downloads/?endpoint_id=" + eid1) 922 | yearslist = validoptions(spops1[2,1], epid1) 923 | numresults = results1->arrayLength() 924 | temp1 = 0 925 | if (numresults == 1){ 926 | return(0) 927 | } 928 | if (numresults == 2){ 929 | printf("Downloading file, please wait...\n") 930 | tval = results1->getArrayValue(2)->getString("file_name", "") 931 | temp3 = copycsv(tval, tbaseurl) 932 | liststrings = numliststr(varinfo1) 933 | if (liststrings == "") addstrings = "" 934 | else addstrings = " stringcols(" + liststrings + ")" 935 | stata("clear") 936 | stata("qui import delimited temp_eddata_file_gen_012345.csv, clear" + addstrings) 937 | stata("qui rm temp_eddata_file_gen_012345.csv") 938 | temp1 = labelcsv(varinfo1, 1) 939 | temp2 = subsetkeep(spops1, querystring1, epid1, vlist1) 940 | } 941 | else{ 942 | printf("Progress for each CSV file will print to your screen. Please wait...\n\n") 943 | relfilesstr = "" 944 | for (r=1; r<=numresults; r++){ 945 | trow = results1->getArrayValue(r); 946 | tval = trow->getString("file_name",""); 947 | dlyear = 0 948 | if (subinstr(tval, ".csv", "") != tval) dlyear = dlyear + 1 949 | else dlyear = -10 950 | for (c=1; c<=length(yearslist); c++){ 951 | if (subinstr(tval, yearslist[c], "") != tval) dlyear = dlyear + 1 952 | } 953 | if (dlyear == 2){ 954 | if (relfilesstr == "") relfilesstr = tval 955 | else relfilesstr = relfilesstr + ";" + tval 956 | } 957 | } 958 | t = tokeninit(";") 959 | s = tokenset(t, relfilesstr) 960 | relfiles = tokengetall(t) 961 | for (r=1; r<=length(relfiles); r++){ 962 | if (r == 1){ 963 | temp3 = copycsv(relfiles[r], tbaseurl) 964 | liststrings = numliststr(varinfo1) 965 | if (liststrings == "") addstrings = "" 966 | else addstrings = " stringcols(" + liststrings + ")" 967 | stata("clear") 968 | } 969 | printf("Processing file " + strofreal(r) + " of " + strofreal(length(relfiles)) + "\n") 970 | stata("qui preserve") 971 | if (r == 1) { 972 | stata("qui import delimited temp_eddata_file_gen_012345.csv, clear" + addstrings) 973 | stata("qui rm temp_eddata_file_gen_012345.csv") 974 | } 975 | else stata("qui import delimited " + tbaseurl + subinstr(relfiles[r], " ", "") + ", clear" + addstrings) 976 | if (temp1 == 0) temp1 = labelcsv(varinfo1, 1) 977 | else temp1 = labelcsv(varinfo1, 0) 978 | temp2 = subsetkeep(spops1, querystring1, epid1, vlist1) 979 | stata("qui save temp_eddata_file_gen_012345, replace") 980 | stata("qui restore") 981 | stata("qui append using temp_eddata_file_gen_012345") 982 | } 983 | stata("qui rm temp_eddata_file_gen_012345.dta") 984 | } 985 | stata("qui compress") 986 | return(1) 987 | } 988 | 989 | // Gets all tables, using API to get the varlist and vartypes, and looping through all "nexts", calling gettable 990 | real scalar getalltables(string scalar eid, string scalar url2, real scalar totallen1, real scalar epcount1){ 991 | pointer (class libjson scalar) scalar root 992 | pointer (class libjson scalar) scalar results1 993 | string matrix varinfo 994 | string scalar nextpage 995 | string scalar timea 996 | string scalar timetaken1 997 | string scalar timetaken2 998 | real scalar pagesize 999 | real scalar totalpages 1000 | real scalar countpage 1001 | real scalar timeper1 1002 | real scalar timeper2 1003 | varinfo = getvarinfo(st_global("base_url") + "/api/v1/api-endpoint-varlist/?endpoint_id=" + eid) 1004 | if (st_global("debug_ind") == "1") printf(urlmode(st_global("base_url") + url2) + "\n") 1005 | root = libjson::webcall(urlmode(st_global("base_url") + url2),""); 1006 | results1 = root->getNode("results") 1007 | pagesize = results1->arrayLength() 1008 | totalpages = floor((strtoreal(root->getString("count", ""))) / pagesize) + 1 1009 | spos = 1 1010 | if (st_nobs() > 0) spos = st_nobs() + 1 1011 | countpage = 1 1012 | if (epcount1 == 1){ 1013 | if (totalpages == .) totalpages = 1 1014 | timeper1 = 1500 * totalpages * totallen1 1015 | timeper2 = 10000 * totalpages * totallen1 1016 | timetaken1 = timeit(timeper1) 1017 | timetaken2 = timeit(timeper2) 1018 | timea = "\nI estimate that the download for the entire file you requested will take " 1019 | if (timetaken1 == "less than one minute" && timetaken2 == "less than one minute") printf(timea + "less than one minute.\n") 1020 | else if (timetaken1 == "less than one minute" && timetaken2 != "less than one minute") printf(timea + "less than " + timetaken2 + ".\n") 1021 | else printf(timea + "between %s and %s.\n", timetaken1, timetaken2) 1022 | printf("This is only an estimate, so actual time may vary due to internet speed and file size differences.\n\n") 1023 | printf("Progress for each endpoint and call to the API will print to your screen. Please wait...\n") 1024 | printf("If this time is too long for you to wait, try adding the " + `"""' + "csv" + `"""' + " option to the end of your command to download the full csv directly.\n") 1025 | } 1026 | printf("\nGetting data from %s, endpoint %s of %s (%s records).\n", url2, strofreal(epcount1), strofreal(totallen1), root->getString("count", "")) 1027 | nextpage = gettable(st_global("base_url") + url2, spos, varinfo) 1028 | if (nextpage!="null"){ 1029 | do { 1030 | spos = spos + pagesize 1031 | countpage = countpage + 1 1032 | printf("Endpoint %s of %s: On page %s of %s\n", strofreal(epcount1), strofreal(totallen1), strofreal(countpage), strofreal(totalpages)) 1033 | nextpage = gettable(nextpage, spos, varinfo) 1034 | } while (nextpage!="null") 1035 | } 1036 | return(1) 1037 | } 1038 | 1039 | // Gets all tables, using API to get the varlist and vartypes, and looping through all "nexts", calling gettable 1040 | real scalar getalltables_summaries(string matrix varinfo, string scalar url2, real scalar totallen1, real scalar epcount1){ 1041 | pointer (class libjson scalar) scalar root 1042 | pointer (class libjson scalar) scalar results1 1043 | string scalar nextpage 1044 | string scalar timea 1045 | string scalar timetaken1 1046 | string scalar timetaken2 1047 | real scalar pagesize 1048 | real scalar totalpages 1049 | real scalar countpage 1050 | real scalar timeper1 1051 | real scalar timeper2 1052 | if (st_global("debug_ind") == "1") printf(st_global("base_url") + url2 + "\n") 1053 | root = libjson::webcall(st_global("base_url") + url2,""); 1054 | results1 = root->getNode("results") 1055 | pagesize = results1->arrayLength() 1056 | totalpages = floor((strtoreal(root->getString("count", ""))) / pagesize) + 1 1057 | spos = 1 1058 | if (st_nobs() > 0) spos = st_nobs() + 1 1059 | countpage = 1 1060 | nextpage = gettable_summaries(st_global("base_url") + url2, spos, varinfo) 1061 | return(1) 1062 | } 1063 | 1064 | // helper function to get data from summary endpoints 1065 | string scalar getsummarydata(string scalar dataoptions, string scalar summaries, string scalar opts, string scalar vlist){ 1066 | string matrix varinfo 1067 | real scalar spos 1068 | string matrix summary_ep_url 1069 | string rowvector allopts 1070 | string rowvector token_cmd 1071 | string matrix varinfo1 1072 | string matrix varinfo2 1073 | string matrix varinfo3 1074 | real scalar tempdata 1075 | real scalar totallen 1076 | real scalar epcount 1077 | summary_ep_url = getsummariesurl(dataoptions, summaries) 1078 | printf("\nGetting data from: " + st_global("base_url") + summary_ep_url) 1079 | allopts = tokens(opts) 1080 | for (i=1; i<=length(allopts); i++){ 1081 | summary_ep_url = summary_ep_url + "&" + allopts[i] 1082 | } 1083 | token_cmd = tokens(summaries) 1084 | var_to_agg = token_cmd[2] 1085 | agg_by = token_cmd[4] 1086 | varinfo1 = getvarinfo(st_global("base_url") + "/api/v1/api-variables/?variable=year") 1087 | varinfo2 = getvarinfo(st_global("base_url") + "/api/v1/api-variables/?variable=" + agg_by) 1088 | varinfo3 = getvarinfo(st_global("base_url") + "/api/v1/api-variables/?variable=" + var_to_agg) 1089 | numrows = 3 1090 | var_attr = 6 1091 | varinfo = J(var_attr, numrows, "") 1092 | for (r=1; r<=numrows; r++){ 1093 | varinfo[r, 1] = varinfo1[r, 1] 1094 | varinfo[r, 2] = varinfo2[r, 1] 1095 | varinfo[r, 3] = varinfo3[r, 1] 1096 | } 1097 | spos = 1 1098 | epcount = 0 1099 | totallen = 1 1100 | tempdata = createdataset_summaries_ep(varinfo) 1101 | for (i=1; i<=totallen; i++){ 1102 | epcount = epcount + 1 1103 | alldata = getalltables_summaries(varinfo, summary_ep_url, totallen, epcount) 1104 | } 1105 | stata("qui compress") 1106 | if (vlist != "") stata("keep " + vlist) 1107 | else printf("\n\nData successfully loaded into Stata and ready to use.") 1108 | return("") 1109 | } 1110 | 1111 | // Main function to get data based on Stata request - calls other helper functions 1112 | string scalar getalldata(string scalar dataoptions, string scalar vlist, string scalar opts, string scalar summaries, real scalar clearme, real scalar metadataonly, real scalar staging, real scalar csv, real scalar clearcache, real scalar debugind){ 1113 | string matrix endpoints 1114 | string matrix spops 1115 | string matrix varinfo 1116 | string rowvector allopts 1117 | string rowvector validopts 1118 | string rowvector res2 1119 | string rowvector respre 1120 | string rowvector temp1 1121 | string rowvector temp2 1122 | string scalar eid 1123 | string scalar urltemp 1124 | string scalar urladds 1125 | string scalar querystring 1126 | string scalar dataoptions1 1127 | string scalar validfilters 1128 | string scalar ds 1129 | string scalar summary_ep_url 1130 | real scalar epid 1131 | real scalar spos 1132 | real scalar spos1 1133 | real scalar hidereturn 1134 | real scalar totallen 1135 | real scalar epcount 1136 | real scalar tempdata 1137 | real scalar temp3 1138 | real scalar test 1139 | st_global("base_url","https://educationdata.urban.org") 1140 | st_global("staging_url","https://educationdata-stg.urban.org") 1141 | if (staging > 0) st_global("base_url", st_global("staging_url")) 1142 | st_global("cc","0") 1143 | if (clearcache > 0) st_global("cc","1") 1144 | st_global("debug_ind", "0") 1145 | if (debugind > 0) st_global("debug_ind", "1") 1146 | X = st_data(.,.) 1147 | if (clearme > 0) stata("clear") 1148 | else{ 1149 | if (length(X[.,.]) > 0) { 1150 | printf("Error: You currently have data loaded in Stata. Please add " + `"""' + "clear" + `"""' + " to the end of this command if you wish to remove your current data.") 1151 | return("") 1152 | } 1153 | else stata("clear") 1154 | } 1155 | endpoints = endpointstrings() 1156 | dataoptions1 = shorttolongname(strlower(dataoptions), endpoints) 1157 | 1158 | if (strlen(summaries) > 0){ 1159 | getsummarydata(dataoptions1, summaries, opts, vlist) 1160 | return("") 1161 | } 1162 | else{ 1163 | if (dataoptions1 == "Error1"){ 1164 | printf("Error: You must enter the complete name of a dataset in the 'using' statement. The first is the 'short' name for the data category, and the remaining words are the unique name of the dataset. E.g., using " + `"""' + "school directory" + `"""' + ". Type " + `"""' + "help educationdata" + `"""' + " to learn more.") 1165 | return("") 1166 | } 1167 | else if (dataoptions1 == "Error2"){ 1168 | printf("Error: The option you selected was invalid. The three options are: " + `"""' + "school" + `"""' + ", " + `"""' + "district" + `"""' + ", and " + `"""' + "college" + `"""' + ". Type " + `"""' + "help educationdata" + `"""' + " to learn more.") 1169 | return("") 1170 | } 1171 | epid = validendpoints(dataoptions1) 1172 | if (epid == 0 || dataoptions1 == "Error3"){ 1173 | printf("Error: The name of the category ('school', 'district', or 'college') is correct, but the name of the dataset you chose is not. Please verify the list of allowed options by typing " + `"""' + "help educationdata" + `"""' + ".") 1174 | return("") 1175 | } 1176 | eid = endpoints[1,epid] 1177 | varinfo = getvarinfo(st_global("base_url") + "/api/v1/api-endpoint-varlist/?endpoint_id=" + eid) 1178 | for (c=1; c<=length(varinfo[1,.]); c++){ 1179 | varinfo[1,c] = strlower(varinfo[1,c]) 1180 | } 1181 | validfilters = "" 1182 | for (c=1; c<=length(varinfo[6,.]); c++){ /* varinfo[6,c] indicates is_filter */ 1183 | if (varinfo[6,c] == "1" && varinfo[3,c] == "double"){ /* note that no float variables are filters per metadata */ 1184 | if (validfilters == "") validfilters = varinfo[1,c] 1185 | else validfilters = validfilters + ", " + varinfo[1,c] 1186 | } 1187 | } 1188 | t = tokeninit(", ") 1189 | s = tokenset(t, validfilters) 1190 | respre = tokengetall(t) 1191 | allopts = tokens(opts) 1192 | validopts = parseurls(endpoints[2,epid], "optional") 1193 | spops = J(2,length(validopts),"") 1194 | spops[1,.] = validopts[1,.] 1195 | urladds = "" 1196 | if (length(varinfo[1,.]) > 0){ 1197 | for (i=1; i<=length(allopts); i++){ 1198 | t = tokeninit("=") 1199 | s = tokenset(t, allopts[i]) 1200 | res2 = tokengetall(t) 1201 | spos = stringpos(res2[1], validopts) 1202 | if (spos > 0) spops[2,spos] = allopts[i] 1203 | else{ 1204 | spos1 = stringpos(res2[1], varinfo[1,.]) 1205 | if (spos1 > 0){ 1206 | if (res2[2] != subinstr(subinstr(res2[2], ":", ""), ",", "")){ 1207 | if (iteminlist(res2[1], respre) == 0){ 1208 | printf("Error, option " + allopts[i] + " is not valid, because it may only be filtered on a single value, not multiple values.\n") 1209 | printf("Decimal variables may not be filtered at all. The variables that can be filtered on multiple values in this dataset are as follows:\n\n") 1210 | printf(validfilters) 1211 | return("\n\nDownload failed. Please try again.") 1212 | } 1213 | } 1214 | if (urladds == "") { 1215 | urladds = urladds + allopts[i] 1216 | } 1217 | else { 1218 | urladds = urladds + ";" + allopts[i] 1219 | } 1220 | } 1221 | else { 1222 | printf("Error, option " + allopts[i] + " is not valid. Valid variable selections are as follows:\n") 1223 | urladds = "" 1224 | for (c=1; c<=length(varinfo[1,.]); c++){ 1225 | if (stringpos(strofreal(c),("1","6","11","16","21","26","31","36","41","46","51","56","61","66","71","76","81","86","91","96","101")) > 0) urladds = urladds + varinfo[1,c] 1226 | else urladds = urladds + ", " + varinfo[1,c] 1227 | if (stringpos(strofreal(c),("5","10","15","20","25","30","35","40","45","50","55","60","65","70","75","80","85","90","95","100")) > 0) urladds = urladds + "\n" 1228 | } 1229 | return("\n\nDownload failed. Please try again.") 1230 | } 1231 | } 1232 | } 1233 | } 1234 | querystring = getquerystrings(urladds) 1235 | for (i=1; i<=length(spops[1,.]); i++){ 1236 | if (spops[2,i] == "") spops[2,i] = spops[1,i] + "=alldata" 1237 | } 1238 | temp1 = validoptions(spops[2,1], epid) 1239 | if (tokens(temp1[1])[1] == "Invalid"){ 1240 | printf(temp1[1]) 1241 | return("") 1242 | } 1243 | epcount = 0 1244 | if (metadataonly <= 0) printf("Please be patient - downloading data.\n") 1245 | if (csv > 0 && metadataonly <= 0){ 1246 | printf("\nNote that this function temporarily writes data to the current working directory.\n") 1247 | printf("If you do not have read and write privileges to the current directory, please change your working directory.\n") 1248 | printf("For example, you can enter " + `"""' + "cd D:/Users/[Your username here]/Documents" + `"""' + ".\n\n") 1249 | ds = tokens(dataoptions)[2] 1250 | temp3 = downloadcsv(eid,spops,ds,epid,varinfo,querystring,vlist) 1251 | if (temp3 == 0){ 1252 | printf("Error: Sorry, there is no CSV file available for download for this dataset at this time.") 1253 | } 1254 | } 1255 | else{ 1256 | tempdata = createdataset(eid) 1257 | if (metadataonly <= 0){ 1258 | if (length(spops[1,.]) == 1){ 1259 | totallen = length(temp1) 1260 | for (i=1; i<=length(temp1); i++){ 1261 | epcount = epcount + 1 1262 | urltemp = subinstr(endpoints[2,epid], "{" + spops[1,1] + "}", temp1[i]) + querystring 1263 | hidereturn = getalltables(eid, urltemp, totallen, epcount) 1264 | } 1265 | } 1266 | else{ 1267 | temp2 = validoptions(spops[2,2], epid) 1268 | if (tokens(temp2[1])[1] == "Invalid"){ 1269 | printf(temp2[1]) 1270 | return("") 1271 | } 1272 | totallen = length(temp1) * length(temp2) 1273 | for (i=1; i<=length(temp1); i++){ 1274 | for (j=1; j<=length(temp2); j++){ 1275 | epcount = epcount + 1 1276 | urltemp = subinstr(subinstr(endpoints[2,epid], "{" + spops[1,1] + "}", temp1[i]), "{" + spops[1,2] + "}", temp2[j]) + querystring 1277 | hidereturn = getalltables(eid, urltemp, totallen, epcount) 1278 | } 1279 | } 1280 | } 1281 | stata("qui compress") 1282 | } 1283 | if (metadataonly > 0) { 1284 | printf("Metadata successfully loaded into Stata and ready to view. Remove the " + `"""' + "metadata" + `"""' + " argument if you want to load the data itself.\n\n") 1285 | printf("Note: You may filter this dataset on any variable (as long as it does not have a decimal value) using a single value (e.g. grade=1), however only the following variables allow filtering on multiple values (e.g.grade=1:3 or grade=1,2):\n\n") 1286 | printf(validfilters) 1287 | } 1288 | } 1289 | if (vlist != "") stata("keep " + vlist) 1290 | else printf("\nData successfully loaded into Stata and ready to use.") 1291 | return("") 1292 | } 1293 | } 1294 | 1295 | end -------------------------------------------------------------------------------- /docs/educationdata.ado: -------------------------------------------------------------------------------- 1 | *! version 0.4.3 2 | program educationdata 3 | version 11.0 4 | mata: if (findfile("libjson.mlib") != "") {} else stata("ssc install libjson"); 5 | mata: if (libjson::checkVersion((1,0,2))) {} else printf("{err: The JSON library version is not compatible with this command and so will likely fail. Please update libjson by running the following: ado uninstall libjson, then run: ssc install libjson}\n"); 6 | syntax using/ , [SUBset(string)] [COLumns(string)] [SUMMARIES(string)] [CLEAR] [METAdata] [STAGING] [CSV] [CACHE] [DEBUG] 7 | mata: dummy=getalldata("`using'", "`columns'", "`subset'", "`summaries'", strlen("`clear'"),strlen("`metadata'"),strlen("`staging'"),strlen("`csv'"),strlen("`cache'"),strlen("`debug'")); 8 | end 9 | 10 | mata 11 | 12 | // Beginning section above and some structure borrowed from insheetjson - thanks!; 13 | // Helper function that returns results node 14 | pointer (class libjson scalar) scalar getresults(string scalar url){ 15 | pointer (class libjson scalar) scalar root 16 | pointer (class libjson scalar) scalar result 17 | if (st_global("debug_ind") == "1") printf(urlmode(url) + "\n") 18 | root = libjson::webcall(urlmode(url) ,""); 19 | result = root->getNode("results") 20 | return(result) 21 | } 22 | 23 | // Helper function that returns matrix of variable information from API 24 | string matrix getvarinfo(string scalar url){ 25 | pointer (class libjson scalar) scalar res 26 | pointer (class libjson scalar) scalar trow 27 | pointer (class libjson scalar) scalar result 28 | string scalar tempvar 29 | string scalar tempind 30 | real scalar numrows 31 | real scalar numrowscheck 32 | res = getresults(url) 33 | numrows = res->arrayLength() 34 | if (numrows == 0){ 35 | temp = raise_error() 36 | return("") 37 | } 38 | varinfo = J(6,numrows,"") 39 | for (r=1; r<=numrows; r++) { 40 | trow = res->getArrayValue(r) 41 | varinfo[1,r] = trow->getString("variable", "") 42 | varinfo[2,r] = trow->getString("label", "") 43 | tempvar = trow->getString("data_type", "") 44 | if (tempvar == "integer" || tempvar == "float") varinfo[3,r] = "double" 45 | else if (tempvar == "string"){ 46 | varinfo[3,r] = "str" + trow->getString("string_length", "") 47 | } 48 | varinfo[5,r] = trow->getString("format", "") 49 | if (varinfo[5,r] != varinfo[1,r] && varinfo[5,r] != "string" && varinfo[5,r] != "numeric"){ 50 | result = getresults(st_global("base_url") + "/api/v1/api-values/?format_name=" + varinfo[5,r]) 51 | } 52 | else result = getresults(st_global("base_url") + "/api/v1/api-values/?format_name=" + varinfo[1,r]) 53 | numrowscheck = result->arrayLength() 54 | if (numrowscheck == 0) varinfo[4,r] = "0" 55 | else varinfo[4,r] = "1" 56 | varinfo[6,r] = trow->getString("is_filter", "") 57 | } 58 | return(varinfo) 59 | } 60 | 61 | 62 | // Parse metadata to get api endpoint strings, years, and required selectors from enpoint URL 63 | string matrix endpointstrings(){ 64 | pointer (class libjson scalar) scalar res1 65 | pointer (class libjson scalar) scalar trow 66 | string matrix endpointdata 67 | res1 = getresults(st_global("base_url") + "/api/v1/api-endpoints/") 68 | numrows = res1->arrayLength() 69 | endpointdata = J(3,numrows,"") 70 | for (r=1; r<=numrows; r++){ 71 | trow = res1->getArrayValue(r) 72 | endpointdata[1,r] = trow->getString("endpoint_id", "") 73 | endpointdata[2,r] = trow->getString("endpoint_url", "") 74 | endpointdata[3,r] = trow->getString("years_available", "") 75 | } 76 | return(endpointdata) 77 | } 78 | 79 | // Helper function to parse url endpoint strings into required variables 80 | string rowvector parseurls(string scalar url, string scalar typevar){ 81 | string rowvector splits 82 | string scalar splitr 83 | string scalar keepvars 84 | real scalar stopme 85 | url = subinstr(url, "/api/v1/", "") 86 | t = tokeninit("/") 87 | s = tokenset(t, url) 88 | splits = tokengetall(t) 89 | keepvars = "" 90 | if (typevar == "optional"){ 91 | for (r=1; r<=length(splits); r++){ 92 | splitr = subinstr(subinstr(splits[r], "{", ""), "}", "") 93 | if (splitr != splits[r]){ 94 | if (keepvars == "") keepvars = keepvars + splitr 95 | else keepvars = keepvars + "," + splitr 96 | } 97 | } 98 | } 99 | else{ 100 | for (r=1; r<=length(splits); r++){ 101 | splitr = subinstr(subinstr(splits[r], "{", ""), "}", "") 102 | if (splitr == splits[r]){ 103 | if (keepvars == "") keepvars = keepvars + splits[r] 104 | else keepvars = keepvars + "," + splits[r] 105 | } 106 | } 107 | } 108 | t = tokeninit(",") 109 | s = tokenset(t, keepvars) 110 | return(tokengetall(t)) 111 | } 112 | 113 | // Helper function to parse required data as inputs, check validity, and return endpoint chosen 114 | real scalar validendpoints(string scalar eps){ 115 | string matrix endpoints 116 | string rowvector epsind 117 | string rowvector parsedurls 118 | real scalar check 119 | real scalar permcheck 120 | endpoints = endpointstrings() 121 | epsind = tokens(eps) 122 | permcheck = 0 123 | for (c=1; c<=length(endpoints[2,.]); c++){ 124 | parsedurls = parseurls(endpoints[2,c], "required") 125 | if (length(parsedurls) == length(epsind)){ 126 | check = 1 127 | for (r=1; r<=length(epsind); r++){ 128 | if (epsind[r] == parsedurls[r]) check = check * 1 129 | else check = check * 0 130 | } 131 | if (check == 1) permcheck = c 132 | } 133 | } 134 | return(permcheck) 135 | } 136 | 137 | // Helper function to parse years available for endpoint 138 | string rowvector parseyears(real scalar matid){ 139 | string matrix endpoints 140 | string rowvector getit 141 | string rowvector getit2 142 | string rowvector returnyears 143 | string scalar yrs 144 | string scalar yrstring 145 | string scalar yrstring2 146 | endpoints = endpointstrings() 147 | yrs = endpoints[3,matid] 148 | if (subinstr(subinstr(yrs, ",", ""), "–", "") == yrs){ 149 | returnyears = (yrs) 150 | } 151 | else if (subinstr(yrs, "and", "") != yrs){ 152 | yrs = subinstr(subinstr(yrs, " ", ""), "and", "") 153 | t = tokeninit(",") 154 | s = tokenset(t, yrs) 155 | getit = tokengetall(t) 156 | yrstring = subinstr(yrs, "," + getit[length(getit)], "") 157 | t = tokeninit("–") 158 | s = tokenset(t, getit[length(getit)]) 159 | getit = tokengetall(t) 160 | for (y=strtoreal(getit[1]); y<=strtoreal(getit[2]); y++){ 161 | yrstring = yrstring + "," + strofreal(y) 162 | } 163 | t = tokeninit(",") 164 | s = tokenset(t, yrstring) 165 | returnyears = tokengetall(t) 166 | } 167 | else if (subinstr(yrs, ",", "") != yrs){ 168 | t = tokeninit(", ") 169 | s = tokenset(t, yrs) 170 | getit = tokengetall(t) 171 | yrstring = "" 172 | for (c=1; c<=length(getit); c++){ 173 | if (subinstr(getit[c], "–", "") != getit[c]){ 174 | t = tokeninit("–") 175 | s = tokenset(t, getit[c]) 176 | getit2 = tokengetall(t) 177 | if (c == 1) yrstring = getit2[c] 178 | else yrstring = yrstring + "," + getit2[1] 179 | for (y=strtoreal(getit2[1])+1; y<=strtoreal(getit2[2]); y++){ 180 | yrstring = yrstring + "," + strofreal(y) 181 | } 182 | } 183 | else{ 184 | if (c == 1) yrstring = getit[c] 185 | else yrstring = yrstring + "," + getit[c] 186 | } 187 | } 188 | t = tokeninit(",") 189 | s = tokenset(t, yrstring) 190 | returnyears = tokengetall(t) 191 | } 192 | else { 193 | t = tokeninit("–") 194 | s = tokenset(t, yrs) 195 | getit = tokengetall(t) 196 | yrstring = getit[1] 197 | for (y=strtoreal(getit[1])+1; y<=strtoreal(getit[2]); y++){ 198 | yrstring = yrstring + "," + strofreal(y) 199 | } 200 | t = tokeninit(",") 201 | s = tokenset(t, yrstring) 202 | returnyears = tokengetall(t) 203 | } 204 | return(returnyears) 205 | } 206 | 207 | // Helper function to validate a single option against the list of valid options 208 | real scalar isvalid(string scalar test, string rowvector vopts){ 209 | real scalar isopt 210 | isopt = 0 211 | for (c = 1; c<=length(vopts); c++){ 212 | if (vopts[c] == test) return(1) 213 | } 214 | return(0) 215 | } 216 | 217 | // Helper function to get the position of a string in a list 218 | real scalar stringpos(string scalar test, string rowvector tlist){ 219 | for (r = 1; r<=length(tlist); r++){ 220 | if (test == tlist[r]) return(r) 221 | } 222 | return(0) 223 | } 224 | 225 | // Helper function to check if item is in a list 226 | real scalar iteminlist(string scalar i, string rowvector tlist){ 227 | real scalar isinlist 228 | isinlist = 0 229 | for (r=1; r<=length(tlist); r++){ 230 | if (i == tlist[r]) isinlist = 1 231 | } 232 | return(isinlist) 233 | } 234 | 235 | // Helper function to check number of item in list 236 | real scalar iteminlistnum(string scalar i, string rowvector tlist){ 237 | real scalar isinlist 238 | isinlist = 0 239 | for (r=1; r<=length(tlist); r++){ 240 | if (i == tlist[r]) isinlist = r 241 | } 242 | return(isinlist) 243 | } 244 | 245 | // Helper function to add mode logging to URLs for API tracking 246 | string scalar urlmode(string scalar url3){ 247 | string scalar strnum 248 | if (strpos(url3, "mode=stata") == 0){ 249 | if (subinstr(url3, "?", "") == url3) url3 = url3 + "?mode=stata" 250 | else url3 = url3 + "&mode=stata" 251 | } 252 | strnum = strofreal(round(runiform(1,1)*100000)) 253 | if (st_global("cc") == "1") url3 = url3 + "&a=" + strnum 254 | return(url3) 255 | } 256 | 257 | // Helper function to validate against list 258 | string rowvector checkinglist(string rowvector alist, string scalar tocheck, string rowvector yearlist){ 259 | string rowvector tochecklist 260 | string rowvector toaddlist 261 | string rowvector validlist 262 | string scalar returnlist 263 | if (tocheck == "grade") { 264 | tochecklist = ("grade-pk","grade-k","grade-1","grade-2","grade-3","grade-4","grade-5","grade-6","grade-7","grade-8","grade-9","grade-10","grade-11","grade-12","grade-13","grade-14","grade-15","grade-99","grade-999") 265 | toaddlist = ("pk","k","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","99","999") 266 | } 267 | else if (tocheck == "grade_edfacts") { 268 | tochecklist = ("grade-3","grade-4","grade-5","grade-6","grade-7","grade-8","grade-9","grade-99") 269 | toaddlist = ("3","4","5","6","7","8","9","99") 270 | } 271 | else if (tocheck == "level_of_study") tochecklist = ("undergraduate","graduate","first-professional","post-baccalaureate","1","2","3","4") 272 | else if (tocheck == "fed_aid_type") tochecklist = ("fed","sub-stafford","no-pell-stafford","1","2","3") 273 | else if (tocheck == "year") tochecklist = yearlist 274 | else return(alist) 275 | for (c=1; c<=length(alist); c++){ 276 | if (iteminlist(alist[c],tochecklist) == 0) { 277 | if ((tocheck == "grade" || tocheck == "grade_edfacts") && (alist[c] == "-1" || alist[c] == "0")){ 278 | if (alist[c] == "-1") alist[c] = "grade-pk" 279 | else alist[c] = "grade-k" 280 | } 281 | else if ((tocheck != "grade" && tocheck != "grade_edfacts") || iteminlist(alist[c],toaddlist) == 0){ 282 | if (tocheck == "grade" || tocheck == "grade_edfacts") validlist = toaddlist 283 | else validlist = tochecklist 284 | for (r=1; r<=length(validlist); r++){ 285 | if (r == 1) returnlist = validlist[r] 286 | else returnlist = returnlist + ", " + validlist[r] 287 | } 288 | return(("Error",alist[c],returnlist)) 289 | } 290 | else alist[c] = "grade-" + alist[c] 291 | } 292 | } 293 | return(alist) 294 | } 295 | 296 | // Helper function to parse optional data as inputs, taking a single optional data argument, check validity, and return all chosen options 297 | string rowvector validoptions(string scalar subset1, real scalar epid){ 298 | string matrix endpoints 299 | string rowvector grades 300 | string rowvector levels 301 | string rowvector fedaids 302 | string rowvector vopts 303 | string rowvector getit 304 | string rowvector tlev 305 | string rowvector years 306 | string rowvector checklist 307 | string scalar getstring 308 | string scalar tempadd 309 | string scalar keepg1 310 | real scalar isopt1 311 | real scalar spos1 312 | real scalar spos2 313 | real scalar isgrade 314 | endpoints = endpointstrings() 315 | t = tokeninit("=") 316 | s = tokenset(t, subset1) 317 | getit = tokengetall(t) 318 | vopts = parseurls(endpoints[2,epid], "optional") 319 | isopt1 = isvalid(getit[1], vopts) 320 | if (isopt1 == 1){ 321 | grades = ("pk","k","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","99","999") 322 | grades_alt = ("-1","0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","99","999") 323 | gradesed = ("3","4","5","6","7","8","9","99") 324 | gradesed_alt = ("3","4","5","6","7","8","9","99") 325 | levels = ("undergraduate","graduate","first-professional","post-baccalaureate") 326 | fedaids = ("fed","sub-stafford","no-pell-stafford") 327 | if (getit[1] == "year") years = parseyears(epid) 328 | else years = ("fake","data") 329 | if (getit[2] != "alldata"){ 330 | if (subinstr(subinstr(getit[2], ",", ""), ":", "") == getit[2]){ 331 | checklist = checkinglist((getit[2]), getit[1], years) 332 | if (checklist[1] == "Error") return(("Invalid Option: " + checklist[2] + " in " + getit[1] + "\nValid options are: " + checklist[3], "")) 333 | else return(checklist) 334 | } 335 | else if (subinstr(getit[2], ",", "") != getit[2]){ 336 | t = tokeninit(",") 337 | s = tokenset(t, getit[2]) 338 | checklist = checkinglist(tokengetall(t), getit[1], years) 339 | if (checklist[1] == "Error") return(("Invalid Option: " + checklist[2] + " in " + getit[1] + "\nValid options are: " + checklist[3], "")) 340 | else return(checklist) 341 | } 342 | else{ 343 | tempadd = "" 344 | isgrade = 0 345 | if (getit[1] == "year") tlev = years 346 | else if (getit[1] == "grade"){ 347 | tlev = grades 348 | tempadd = "grade-" 349 | isgrade = 1 350 | } 351 | else if (getit[1] == "grade_edfacts"){ 352 | tlev = gradesed 353 | tempadd = "grade-" 354 | isgrade = 2 355 | } 356 | else if (getit[1] == "level_of_study") tlev = levels 357 | else if (getit[1] == "fed_aid_type") tlev = fedaids 358 | keepg1 = getit[1] 359 | t = tokeninit(":") 360 | s = tokenset(t, getit[2]) 361 | getit = tokengetall(t) 362 | if (isvalid(getit[1], tlev) == 1 && isvalid(getit[2], tlev) == 1){ 363 | spos1 = stringpos(getit[1], tlev) 364 | spos2 = stringpos(getit[2], tlev) 365 | getstring = tempadd + tlev[spos1] 366 | for (c=spos1 + 1; c<=spos2; c++){ 367 | getstring = getstring + "," + tempadd + tlev[c] 368 | } 369 | t = tokeninit(",") 370 | s = tokenset(t, getstring) 371 | checklist = checkinglist(tokengetall(t), getit[1], years) 372 | if (checklist[1] == "Error") return(("Invalid Option: " + checklist[2] + " in " + getit[1] + "\nValid options are: " + checklist[3], "")) 373 | else return(checklist) 374 | } 375 | else if (isgrade == 1 && (isvalid(getit[1], grades_alt) == 1 && isvalid(getit[2], grades_alt) == 1)){ 376 | spos1 = stringpos(getit[1], grades_alt) 377 | spos2 = stringpos(getit[2], grades_alt) 378 | getstring = tempadd + tlev[spos1] 379 | for (c=spos1 + 1; c<=spos2; c++){ 380 | getstring = getstring + "," + tempadd + tlev[c] 381 | } 382 | t = tokeninit(",") 383 | s = tokenset(t, getstring) 384 | checklist = checkinglist(tokengetall(t), getit[1], years) 385 | if (checklist[1] == "Error") return(("Invalid Option: " + checklist[2] + " in " + getit[1] + "\nValid options are: " + checklist[3], "")) 386 | else return(checklist) 387 | } 388 | else if (isgrade == 2 && (isvalid(getit[1], gradesed_alt) == 1 && isvalid(getit[2], gradesed_alt) == 1)){ 389 | spos1 = stringpos(getit[1], gradesed_alt) 390 | spos2 = stringpos(getit[2], gradesed_alt) 391 | getstring = tempadd + tlev[spos1] 392 | for (c=spos1 + 1; c<=spos2; c++){ 393 | getstring = getstring + "," + tempadd + tlev[c] 394 | } 395 | t = tokeninit(",") 396 | s = tokenset(t, getstring) 397 | checklist = checkinglist(tokengetall(t), getit[1], years) 398 | if (checklist[1] == "Error") return(("Invalid Option: " + checklist[2] + " in " + getit[1] + "\nValid options are: " + checklist[3], "")) 399 | else return(checklist) 400 | } 401 | else { 402 | if (isvalid(getit[1], tlev) == 0){ 403 | checklist = checkinglist((getit[1]),keepg1, years) 404 | return(("Invalid Option selection: " + getit[1] + " in " + keepg1 + "\nValid options are: " + checklist[3], "")) 405 | } 406 | else{ 407 | checklist = checkinglist((getit[2]),keepg1, years) 408 | return(("Invalid Option selection: " + getit[2] + " in " + keepg1 + "\nValid options are: " + checklist[3], "")) 409 | } 410 | } 411 | } 412 | } 413 | else{ 414 | tempadd = "" 415 | if (getit[1] == "year") tlev = years 416 | else if (getit[1] == "grade"){ 417 | tlev = grades 418 | tempadd = "grade-" 419 | } 420 | else if (getit[1] == "grade_edfacts"){ 421 | tlev = gradesed 422 | tempadd = "grade-" 423 | } 424 | else if (getit[1] == "level_of_study") tlev = levels 425 | else if (getit[1] == "fed_aid_type") tlev = fedaids 426 | getstring = tempadd + tlev[1] 427 | for (c=2; c<=length(tlev); c++){ 428 | getstring = getstring + "," + tempadd + tlev[c] 429 | } 430 | t = tokeninit(",") 431 | s = tokenset(t, getstring) 432 | return(tokengetall(t)) 433 | } 434 | } 435 | else return(("Invalid Option: " + getit[1])) 436 | } 437 | 438 | // Helper function that returns string and real/integer variable names 439 | string rowvector getvartypes(string scalar typ, string matrix varinfo){ 440 | real scalar counting 441 | real scalar counter1 442 | string scalar varnametypes 443 | real scalar numvars 444 | numvars = length(varinfo[1,.]) 445 | counting = 0 446 | for (c=1; c<=numvars; c++){ 447 | if (subinstr(varinfo[3,c], "str", "") != varinfo[3,c]){ 448 | if (typ == "string") counting = counting + 1 449 | } 450 | else { 451 | if (typ != "string") counting = counting + 1 452 | } 453 | } 454 | varnametypes = J(1,counting,"") 455 | counter1 = 1 456 | for (c=1; c<=numvars; c++){ 457 | if (subinstr(varinfo[3,c], "str", "") != varinfo[3,c]){ 458 | if (typ == "string") { 459 | varnametypes[1,counter1] = varinfo[1,c] 460 | counter1 = counter1 + 1 461 | } 462 | } 463 | else { 464 | if (typ != "string") { 465 | varnametypes[1,counter1] = varinfo[1,c] 466 | counter1 = counter1 + 1 467 | } 468 | } 469 | } 470 | return(varnametypes) 471 | } 472 | 473 | // Helper function to get variable value definitions 474 | string matrix getvardefs(string scalar var1, string scalar format1){ 475 | pointer (class libjson scalar) scalar result 476 | pointer (class libjson scalar) scalar trow 477 | string matrix vardefs 478 | string rowvector tokenstemp 479 | string scalar tempvar 480 | string scalar tempstring 481 | real scalar numrows 482 | real scalar startvar 483 | if (format1 != var1 && format1 != "string" && format1 != "numeric"){ 484 | result = getresults(st_global("base_url") + "/api/v1/api-values/?format_name=" + format1) 485 | } 486 | else result = getresults(st_global("base_url") + "/api/v1/api-values/?format_name=" + var1) 487 | numrows = result->arrayLength() 488 | vardefs = J(2,numrows,"") 489 | for (r=1; r<=numrows; r++){ 490 | trow = result->getArrayValue(r) 491 | vardefs[1,r] = trow->getString("code", "") 492 | tempvar = trow->getString("code_label", "") 493 | tokenstemp = tokens(tempvar, "-") 494 | if (tokenstemp[1] == "-") startvar = 4 495 | else startvar = 3 496 | tempstring = "" 497 | for (i=startvar; i<=length(tokenstemp); i++){ 498 | tempstring = tempstring + tokenstemp[i] 499 | if (i != length(tokenstemp)) tempstring = tempstring + " " 500 | } 501 | vardefs[2,r] = subinstr(tempstring, "–", "–") 502 | vardefs[2,r] = subinstr(vardefs[2,r], "—", "—") 503 | vardefs[2,r] = subinstr(vardefs[2,r], " - ", "-") 504 | vardefs[2,r] = subinstr(vardefs[2,r], " – ", "–") 505 | vardefs[2,r] = subinstr(vardefs[2,r], " — ", "—") 506 | } 507 | return(vardefs) 508 | } 509 | 510 | // Get table just gets data we need for one table, this appends results to the stata dataset 511 | string scalar gettable(string scalar url, real scalar startpos, string matrix varinfo){ 512 | pointer (class libjson scalar) scalar root 513 | pointer (class libjson scalar) scalar result 514 | pointer (class libjson scalar) scalar trow 515 | string matrix sdata 516 | string rowvector varnames 517 | string scalar nextpage 518 | string scalar tval 519 | real matrix rdata 520 | real scalar numrows 521 | real scalar endpos 522 | if (st_global("debug_ind") == "1") printf(urlmode(url) + "\n") 523 | root = libjson::webcall(urlmode(url) ,""); 524 | result = root->getNode("results") 525 | numrows = result->arrayLength() 526 | varinfotemp = J(6,length(varinfo[1,.]),"") 527 | for (c=1; c<=length(varinfo[1,.]); c++){ 528 | varinfotemp[1,c] = strlower(varinfo[1,c]) 529 | varinfotemp[3,c] = varinfo[3,c] 530 | } 531 | if (numrows > 0){ 532 | st_addobs(numrows) 533 | endpos = startpos + numrows - 1 534 | svarnames = getvartypes("string", varinfo) 535 | rvarnames = getvartypes("other", varinfo) 536 | svarnamestemp = getvartypes("string", varinfotemp) 537 | rvarnamestemp = getvartypes("other", varinfotemp) 538 | sdata = J(numrows,length(svarnames),"") 539 | rdata = J(numrows,length(rvarnames),.) 540 | for (r=1; r<=numrows; r++) { 541 | trow = result->getArrayValue(r); 542 | for(c=1; c<=length(svarnames); c++) { 543 | tval = trow->getString(svarnames[c],""); 544 | if (tval == "null") tval = "" 545 | if (tval == `"""' + `"""') tval = "" 546 | sdata[r,c] = tval 547 | } 548 | for(c=1; c<=length(rvarnames); c++) { 549 | tval = trow->getString(rvarnames[c],""); 550 | if (tval == "null") rdata[r,c] = . 551 | else rdata[r,c] = strtoreal(tval) 552 | } 553 | } 554 | if (length(svarnames) > 0){ 555 | st_sview(SV,(startpos..endpos)',svarnamestemp) 556 | SV[.,.] = sdata[.,.] 557 | } 558 | if (length(rvarnames) > 0){ 559 | st_view(V,(startpos..endpos)',rvarnamestemp) 560 | V[.,.] = rdata[.,.] 561 | } 562 | nextpage = root->getString("next", "") 563 | return(nextpage) 564 | } 565 | else return("null") 566 | } 567 | 568 | 569 | string scalar gettable_summaries(string scalar url, real scalar startpos, string matrix varinfo){ 570 | pointer (class libjson scalar) scalar root 571 | pointer (class libjson scalar) scalar result 572 | pointer (class libjson scalar) scalar trow 573 | string matrix sdata 574 | string rowvector varnames 575 | string scalar nextpage 576 | string scalar tval 577 | real matrix rdata 578 | real scalar numrows 579 | real scalar endpos 580 | if (st_global("debug_ind") == "1") printf(urlmode(url) + "\n") 581 | root = libjson::webcall(url ,""); 582 | result = root->getNode("results") 583 | numrows = result->arrayLength() 584 | varinfotemp = J(6,length(varinfo[1,.]),"") 585 | for (c=1; c<=length(varinfo[1,.]); c++){ 586 | varinfotemp[1,c] = strlower(varinfo[1,c]) 587 | varinfotemp[3,c] = varinfo[3,c] 588 | } 589 | if (numrows > 0){ 590 | st_addobs(numrows) 591 | endpos = startpos + numrows - 1 592 | svarnames = getvartypes("string", varinfo) 593 | rvarnames = getvartypes("other", varinfo) 594 | svarnamestemp = getvartypes("string", varinfotemp) 595 | rvarnamestemp = getvartypes("other", varinfotemp) 596 | sdata = J(numrows,length(svarnames),"") 597 | rdata = J(numrows,length(rvarnames),.) 598 | for (r=1; r<=numrows; r++) { 599 | trow = result->getArrayValue(r); 600 | for(c=1; c<=length(svarnames); c++) { 601 | tval = trow->getString(svarnames[c],""); 602 | if (tval == "null") tval = "" 603 | if (tval == `"""' + `"""') tval = "" 604 | sdata[r,c] = tval 605 | } 606 | for(c=1; c<=length(rvarnames); c++) { 607 | tval = trow->getString(rvarnames[c],""); 608 | if (tval == "null") rdata[r,c] = . 609 | else rdata[r,c] = strtoreal(tval) 610 | } 611 | } 612 | if (length(svarnames) > 0){ 613 | st_sview(SV,(startpos..endpos)',svarnamestemp) 614 | SV[.,.] = sdata[.,.] 615 | } 616 | if (length(rvarnames) > 0){ 617 | st_view(V,(startpos..endpos)',rvarnamestemp) 618 | V[.,.] = rdata[.,.] 619 | } 620 | nextpage = root->getString("next", "") 621 | return(nextpage) 622 | } 623 | else return("null") 624 | } 625 | 626 | 627 | // Helper function to create query strings ?var=x for all potential subset combinations 628 | string scalar getquerystrings(string scalar additions){ 629 | string rowvector result1 630 | string rowvector result2 631 | string rowvector result3 632 | string scalar staticstring 633 | string scalar dynamicstring 634 | real scalar countstatic 635 | if (additions == "") return("") 636 | t = tokeninit(";") 637 | s = tokenset(t, additions) 638 | result1 = tokengetall(t) 639 | countstatic = 1 640 | staticstring = "" 641 | for (c=1; c<=length(result1); c++){ 642 | t = tokeninit("=") 643 | s = tokenset(t, result1[c]) 644 | result2 = tokengetall(t) 645 | if (subinstr(result2[2], ":", "") == result2[2]){ 646 | if (countstatic == 1) staticstring = staticstring + result1[c] 647 | else staticstring = staticstring + "&" + result1[c] 648 | countstatic = countstatic + 1 649 | } 650 | else{ 651 | t = tokeninit(":") 652 | s = tokenset(t, result2[2]) 653 | result3 = tokengetall(t) 654 | dynamicstring = "" 655 | for (r=strtoreal(result3[1]); r<=strtoreal(result3[2]); r++){ 656 | if (r == strtoreal(result3[1])) dynamicstring = dynamicstring + result2[1] + "=" + strofreal(r) 657 | else dynamicstring = dynamicstring + "," + strofreal(r) 658 | } 659 | if (countstatic == 1) staticstring = staticstring + dynamicstring 660 | else staticstring = staticstring + "&" + result1[c] 661 | countstatic = countstatic + 1 662 | } 663 | } 664 | return("?" + staticstring) 665 | } 666 | 667 | // Helper function to create dataset 668 | real scalar createdataset(string scalar eid){ 669 | string matrix varinfo 670 | string matrix vardef 671 | string scalar labeldef 672 | string scalar labelshort 673 | varinfo = getvarinfo(st_global("base_url") + "/api/v1/api-endpoint-varlist/?endpoint_id=" + eid) 674 | for (c=1; c<=length(varinfo[1,.]); c++){ 675 | varinfo[1,c] = strlower(varinfo[1,c]) 676 | } 677 | temp1 = st_addvar(varinfo[3,.],varinfo[1,.]) 678 | for (c=1; c<=length(varinfo[1,.]); c++){ 679 | varinfo[2,c] = subinstr(varinfo[2,c], "—", "—") 680 | varinfo[2,c] = subinstr(varinfo[2,c], "–", "–") 681 | stata("qui label var " + varinfo[1,c] + " " + `"""' + varinfo[2,c] + `"""') 682 | if (strlen(varinfo[1,c]) > 30) labelshort = substr(varinfo[1,c], 1, 30) + "df" 683 | else labelshort = varinfo[1,c] + "df" 684 | if (varinfo[4,c] == "1"){ 685 | vardef = getvardefs(varinfo[1,c], varinfo[5,c]) 686 | labeldef = "qui label define " + labelshort + " " 687 | for (r=1; r<=length(vardef[1,.]); r++){ 688 | labeldef = labeldef + vardef[1,r] + " " + `"""' + vardef[2,r] + `"""' 689 | if (r != length(vardef[1,.])) labeldef = labeldef + " " 690 | } 691 | labeldef = labeldef + ", replace" 692 | stata(labeldef) 693 | stata("qui label values " + varinfo[1,c] + " " + labelshort) 694 | } 695 | else if (varinfo[3,c] == "float"){ 696 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' + ", replace" 697 | stata(labeldef) 698 | stata("qui label values " + varinfo[1,c] + " " + labelshort) 699 | } 700 | else if (varinfo[3,c] == "double"){ 701 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' + ", replace" 702 | stata(labeldef) 703 | stata("qui label values " + varinfo[1,c] + " " + labelshort + ", nofix") 704 | } 705 | } 706 | return(1) 707 | } 708 | 709 | 710 | // Helper function to create dataset 711 | real scalar createdataset_summaries_ep(string matrix varinfo){ 712 | string matrix vardef 713 | string scalar labeldef 714 | string scalar labelshort 715 | for (c=1; c<=length(varinfo[1,.]); c++){ 716 | varinfo[1,c] = strlower(varinfo[1,c]) 717 | } 718 | temp1 = st_addvar(varinfo[3,.],varinfo[1,.]) 719 | for (c=1; c<=length(varinfo[1,.]); c++){ 720 | varinfo[2,c] = subinstr(varinfo[2,c], "—", "—") 721 | varinfo[2,c] = subinstr(varinfo[2,c], "–", "–") 722 | stata("qui label var " + varinfo[1,c] + " " + `"""' + varinfo[2,c] + `"""') 723 | if (strlen(varinfo[1,c]) > 30) labelshort = substr(varinfo[1,c], 1, 30) + "df" 724 | else labelshort = varinfo[1,c] + "df" 725 | if (varinfo[4,c] == "1"){ 726 | vardef = getvardefs(varinfo[1,c], varinfo[5,c]) 727 | labeldef = "qui label define " + labelshort + " " 728 | for (r=1; r<=length(vardef[1,.]); r++){ 729 | labeldef = labeldef + vardef[1,r] + " " + `"""' + vardef[2,r] + `"""' 730 | if (r != length(vardef[1,.])) labeldef = labeldef + " " 731 | } 732 | labeldef = labeldef + ", replace" 733 | stata(labeldef) 734 | stata("qui label values " + varinfo[1,c] + " " + labelshort) 735 | } 736 | else if (varinfo[3,c] == "float"){ 737 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' + ", replace" 738 | stata(labeldef) 739 | stata("qui label values " + varinfo[1,c] + " " + labelshort) 740 | } 741 | else if (varinfo[3,c] == "double"){ 742 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' + ", replace" 743 | stata(labeldef) 744 | stata("qui label values " + varinfo[1,c] + " " + labelshort + ", nofix") 745 | } 746 | } 747 | return(1) 748 | } 749 | 750 | // Helper function to translate short dataset name to full name 751 | string scalar shorttolongname(string scalar shortname, string matrix eps){ 752 | string rowvector voptions 753 | string rowvector result1 754 | string scalar toreturn 755 | result1 = tokens(shortname) 756 | if (length(result1) < 2) return("Error1") 757 | if (result1[1] == "school") st1 = "schools" 758 | else if (result1[1] == "district") st1 = "school-districts" 759 | else if (result1[1] == "college") st1 = "college-university" 760 | else return("Error2") 761 | result1[1] = st1 762 | toreturn = "" 763 | for (r=1; r<=length(result1); r++){ 764 | if (r == 1) toreturn = toreturn + result1[r] 765 | else toreturn = toreturn + " " + result1[r] 766 | } 767 | return(toreturn) 768 | } 769 | 770 | // Helper function to reformat summaries subcommand to endpoint URLs 771 | string scalar getsummariesurl(string scalar dataoptions, string scalar summaries_cmd){ 772 | string scalar ep_url 773 | string scalar agg_method 774 | string scalar var_to_agg 775 | string scalar agg_by 776 | string rowvector token_cmd 777 | 778 | ep_url = "/api/v1/" 779 | for (c=1; c<=length(tokens(dataoptions)); c++){ 780 | ep_url = ep_url + tokens(dataoptions)[c] + "/" 781 | } 782 | ep_url = ep_url + "summaries/" 783 | 784 | token_cmd = tokens(summaries_cmd) 785 | agg_method = token_cmd[1] 786 | var_to_agg = token_cmd[2] 787 | agg_by = "" 788 | 789 | for (c=4; c<=length(token_cmd); c++){ 790 | if (c != length(token_cmd)){ 791 | agg_by = agg_by + token_cmd[c] + "," 792 | } else { 793 | agg_by = agg_by + token_cmd[c] 794 | } 795 | } 796 | 797 | ep_url = ep_url + "?stat=" + agg_method + "&by=" + agg_by + "&var=" + var_to_agg 798 | 799 | return(ep_url) 800 | } 801 | 802 | // Helper function for time taken 803 | string scalar timeit(real scalar timeper){ 804 | string scalar timetaken 805 | if (hhC(timeper) == 0 && mmC(timeper) == 0) timetaken = "less than one minute" 806 | else if (hhC(timeper) == 0) timetaken = strofreal(mmC(timeper)) + " minute(s)" 807 | else timetaken = strofreal(hhC(timeper)) + " hour(s) and " + strofreal(mmC(timeper)) + " minute(s)" 808 | return(timetaken) 809 | } 810 | 811 | // Provide CSV download with numbered list of columns that should be strings 812 | string scalar numliststr(string matrix varinfo2){ 813 | string rowvector varnames 814 | string scalar nliststr 815 | real scalar listnum 816 | varnames = st_varname((1..st_nvar())) 817 | nliststr = "" 818 | for (c=1; c<=length(varinfo2[1,.]); c++){ 819 | if (varinfo2[5,c] == "string"){ 820 | listnum = iteminlistnum(varinfo2[1,c], varnames) 821 | if (nliststr == "") nliststr = strofreal(listnum) 822 | else nliststr = nliststr + " " + strofreal(listnum) 823 | } 824 | } 825 | return(nliststr) 826 | } 827 | 828 | // Label CSV dataset appropriately when it is loaded in 829 | real scalar labelcsv(string matrix varinfo2, real scalar init1){ 830 | string matrix vardef 831 | string scalar labeldef 832 | string scalar labelshort 833 | for (c=1; c<=length(varinfo2[1,.]); c++){ 834 | stata("qui label var " + varinfo2[1,c] + " " + `"""' + varinfo2[2,c] + `"""') 835 | if (strlen(varinfo2[1,c]) > 30) labelshort = substr(varinfo2[1,c], 1, 30) + "df" 836 | else labelshort = varinfo2[1,c] + "df" 837 | if (varinfo2[4,c] == "1"){ 838 | if (init1 == 1){ 839 | vardef = getvardefs(varinfo2[1,c], varinfo2[5,c]) 840 | labeldef = "qui label define " + labelshort + " " 841 | for (r=1; r<=length(vardef[1,.]); r++){ 842 | labeldef = labeldef + vardef[1,r] + " " + `"""' + vardef[2,r] + `"""' 843 | if (r != length(vardef[1,.])) labeldef = labeldef + " " 844 | } 845 | labeldef = labeldef + ", replace" 846 | stata(labeldef) 847 | } 848 | stata("qui label values " + varinfo2[1,c] + " " + labelshort) 849 | } 850 | else if (varinfo2[3,c] == "float"){ 851 | if (init1 == 1){ 852 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' + ", replace" 853 | stata(labeldef) 854 | } 855 | stata("qui label values " + varinfo2[1,c] + " " + labelshort) 856 | } 857 | else if (varinfo2[3,c] == "double"){ 858 | if (init1 == 1){ 859 | labeldef = "qui label define " + labelshort + " -1 " + `"""' + "Missing/Not reported" + `"""' + " -2 " + `"""' + "Not applicable" + `"""' + " -3 " + `"""' + "Suppressed data" + `"""' + ", replace" 860 | stata(labeldef) 861 | } 862 | stata("qui label values " + varinfo2[1,c] + " " + labelshort + ", nofix") 863 | } 864 | } 865 | return(1) 866 | } 867 | 868 | // Correct grade list for subsetting CSV files 869 | string scalar correctgrade(string scalar vopt1){ 870 | if (vopt1 == "grade-pk" || vopt1 == "pk") return("-1") 871 | else if (vopt1 == "grade-k" || vopt1 == "k") return("0") 872 | else return(subinstr(vopt1, "grade-", "")) 873 | } 874 | 875 | // Subset and keep relevant variables - keep if inlist(varname,val1,val2,etc.) 876 | real scalar subsetkeep(string matrix spops2, string scalar querystring2, real scalar epid2, string scalar vlist2){ 877 | string rowvector spopsres 878 | string rowvector voptions 879 | string rowvector queryparams 880 | string rowvector queryparamvals 881 | string rowvector queryparamlist 882 | string scalar keepstate 883 | string scalar keepbase 884 | keepbase = "qui keep if inlist(" 885 | for (r=1; r<=length(spops2[1,.]); r++){ 886 | t = tokeninit("=") 887 | s = tokenset(t, spops2[2,r]) 888 | spopsres = tokengetall(t) 889 | keepstate = keepbase + spops2[1,r] 890 | if (spopsres[2] != "alldata"){ 891 | voptions = validoptions(spops2[2,r], epid2) 892 | for (c=1; c<=length(voptions); c++){ 893 | keepstate = keepstate + "," + correctgrade(voptions[c]) 894 | } 895 | keepstate = keepstate + ")" 896 | stata(keepstate) 897 | } 898 | } 899 | querystring2 = subinstr(querystring2, "?", "") 900 | t = tokeninit("&") 901 | s = tokenset(t, querystring2) 902 | queryparams = tokengetall(t) 903 | for (r=1; r<=length(queryparams); r++){ 904 | t = tokeninit("=") 905 | s = tokenset(t, queryparams[r]) 906 | queryparamvals = tokengetall(t) 907 | keepstate = keepbase + queryparamvals[1] + "," + queryparamvals[2] + ")" 908 | stata(keepstate) 909 | } 910 | if (vlist2 != "") stata("keep " + vlist2) 911 | return(1) 912 | } 913 | 914 | // Download Local CSV to parse column order, keep it 915 | real scalar copycsv(string scalar tval1, string scalar tbaseurl1){ 916 | stata("qui copy " + tbaseurl1 + subinstr(tval1, " ", "") + " temp_eddata_file_gen_012345.csv, replace") 917 | stata("qui import delimited temp_eddata_file_gen_012345.csv, clear rowrange(1:1)") 918 | return(1) 919 | } 920 | 921 | // Download from CSV instead 922 | real scalar downloadcsv(string scalar eid1, string matrix spops1, string scalar ds1, real scalar epid1, string matrix varinfo1, string scalar querystring1, string scalar vlist1){ 923 | pointer (class libjson scalar) scalar results1 924 | pointer (class libjson scalar) scalar trow 925 | string rowvector yearslist 926 | string rowvector relfiles 927 | string scalar tval 928 | string scalar tbaseurl 929 | string scalar addstrings 930 | string scalar liststrings 931 | string scalar relfilesstr 932 | real scalar dlyear 933 | real scalar numresults 934 | real scalar temp1 935 | real scalar temp2 936 | real scalar temp3 937 | real scalar countfiles 938 | tbaseurl = st_global("base_url") + "/csv/" + ds1 + "/" 939 | results1 = getresults(st_global("base_url") + "/api/v1/api-downloads/?endpoint_id=" + eid1) 940 | yearslist = validoptions(spops1[2,1], epid1) 941 | numresults = results1->arrayLength() 942 | temp1 = 0 943 | if (numresults == 1){ 944 | return(0) 945 | } 946 | if (numresults == 2){ 947 | printf("Downloading file, please wait...\n") 948 | tval = results1->getArrayValue(2)->getString("file_name", "") 949 | temp3 = copycsv(tval, tbaseurl) 950 | liststrings = numliststr(varinfo1) 951 | if (liststrings == "") addstrings = "" 952 | else addstrings = " stringcols(" + liststrings + ")" 953 | stata("clear") 954 | stata("qui import delimited temp_eddata_file_gen_012345.csv, clear" + addstrings) 955 | stata("qui rm temp_eddata_file_gen_012345.csv") 956 | temp1 = labelcsv(varinfo1, 1) 957 | temp2 = subsetkeep(spops1, querystring1, epid1, vlist1) 958 | } 959 | else{ 960 | printf("Progress for each CSV file will print to your screen. Please wait...\n\n") 961 | relfilesstr = "" 962 | for (r=1; r<=numresults; r++){ 963 | trow = results1->getArrayValue(r); 964 | tval = trow->getString("file_name",""); 965 | dlyear = 0 966 | if (subinstr(tval, ".csv", "") != tval) dlyear = dlyear + 1 967 | else dlyear = -10 968 | for (c=1; c<=length(yearslist); c++){ 969 | if (subinstr(tval, yearslist[c], "") != tval) dlyear = dlyear + 1 970 | } 971 | if (dlyear == 2){ 972 | if (relfilesstr == "") relfilesstr = tval 973 | else relfilesstr = relfilesstr + ";" + tval 974 | } 975 | } 976 | t = tokeninit(";") 977 | s = tokenset(t, relfilesstr) 978 | relfiles = tokengetall(t) 979 | for (r=1; r<=length(relfiles); r++){ 980 | if (r == 1){ 981 | temp3 = copycsv(relfiles[r], tbaseurl) 982 | liststrings = numliststr(varinfo1) 983 | if (liststrings == "") addstrings = "" 984 | else addstrings = " stringcols(" + liststrings + ")" 985 | stata("clear") 986 | } 987 | printf("Processing file " + strofreal(r) + " of " + strofreal(length(relfiles)) + "\n") 988 | stata("qui preserve") 989 | if (r == 1) { 990 | stata("qui import delimited temp_eddata_file_gen_012345.csv, clear" + addstrings) 991 | stata("qui rm temp_eddata_file_gen_012345.csv") 992 | } 993 | else stata("qui import delimited " + tbaseurl + subinstr(relfiles[r], " ", "") + ", clear" + addstrings) 994 | if (temp1 == 0) temp1 = labelcsv(varinfo1, 1) 995 | else temp1 = labelcsv(varinfo1, 0) 996 | temp2 = subsetkeep(spops1, querystring1, epid1, vlist1) 997 | stata("qui save temp_eddata_file_gen_012345, replace") 998 | stata("qui restore") 999 | stata("qui append using temp_eddata_file_gen_012345") 1000 | } 1001 | stata("qui rm temp_eddata_file_gen_012345.dta") 1002 | } 1003 | stata("qui compress") 1004 | return(1) 1005 | } 1006 | 1007 | // Gets all tables, using API to get the varlist and vartypes, and looping through all "nexts", calling gettable 1008 | real scalar getalltables(string scalar eid, string scalar url2, real scalar totallen1, real scalar epcount1){ 1009 | pointer (class libjson scalar) scalar root 1010 | pointer (class libjson scalar) scalar results1 1011 | string matrix varinfo 1012 | string scalar nextpage 1013 | string scalar timea 1014 | string scalar timetaken1 1015 | string scalar timetaken2 1016 | real scalar pagesize 1017 | real scalar totalpages 1018 | real scalar countpage 1019 | real scalar timeper1 1020 | real scalar timeper2 1021 | varinfo = getvarinfo(st_global("base_url") + "/api/v1/api-endpoint-varlist/?endpoint_id=" + eid) 1022 | if (st_global("debug_ind") == "1") printf(urlmode(st_global("base_url") + url2) + "\n") 1023 | root = libjson::webcall(urlmode(st_global("base_url") + url2),""); 1024 | results1 = root->getNode("results") 1025 | pagesize = results1->arrayLength() 1026 | totalpages = floor((strtoreal(root->getString("count", ""))) / pagesize) + 1 1027 | spos = 1 1028 | if (st_nobs() > 0) spos = st_nobs() + 1 1029 | countpage = 1 1030 | if (epcount1 == 1){ 1031 | if (totalpages == .) totalpages = 1 1032 | timeper1 = 1500 * totalpages * totallen1 1033 | timeper2 = 10000 * totalpages * totallen1 1034 | timetaken1 = timeit(timeper1) 1035 | timetaken2 = timeit(timeper2) 1036 | timea = "\nI estimate that the download for the entire file you requested will take " 1037 | if (timetaken1 == "less than one minute" && timetaken2 == "less than one minute") printf(timea + "less than one minute.\n") 1038 | else if (timetaken1 == "less than one minute" && timetaken2 != "less than one minute") printf(timea + "less than " + timetaken2 + ".\n") 1039 | else printf(timea + "between %s and %s.\n", timetaken1, timetaken2) 1040 | printf("This is only an estimate, so actual time may vary due to internet speed and file size differences.\n\n") 1041 | printf("Progress for each endpoint and call to the API will print to your screen. Please wait...\n") 1042 | printf("If this time is too long for you to wait, try adding the " + `"""' + "csv" + `"""' + " option to the end of your command to download the full csv directly.\n") 1043 | } 1044 | printf("\nGetting data from %s, endpoint %s of %s (%s records).\n", url2, strofreal(epcount1), strofreal(totallen1), root->getString("count", "")) 1045 | nextpage = gettable(st_global("base_url") + url2, spos, varinfo) 1046 | if (nextpage!="null"){ 1047 | do { 1048 | spos = spos + pagesize 1049 | countpage = countpage + 1 1050 | printf("Endpoint %s of %s: On page %s of %s\n", strofreal(epcount1), strofreal(totallen1), strofreal(countpage), strofreal(totalpages)) 1051 | nextpage = gettable(nextpage, spos, varinfo) 1052 | } while (nextpage!="null") 1053 | } 1054 | return(1) 1055 | } 1056 | 1057 | real scalar raise_error(){ 1058 | printf("\nThis Stata command is invalid. Please open the URL above in a browser for detailed error messages. More instructions can be found on the Education Data Portal Documentation Website (https://educationdata.urban.org/documentation/).") 1059 | return(0) 1060 | } 1061 | 1062 | // Gets all tables, using API to get the varlist and vartypes, and looping through all "nexts", calling gettable 1063 | real scalar getalltables_summaries(string matrix varinfo, string scalar url2, real scalar totallen1, real scalar epcount1){ 1064 | pointer (class libjson scalar) scalar root 1065 | pointer (class libjson scalar) scalar results1 1066 | string scalar nextpage 1067 | string scalar timea 1068 | string scalar timetaken1 1069 | string scalar timetaken2 1070 | real scalar pagesize 1071 | real scalar totalpages 1072 | real scalar countpage 1073 | real scalar timeper1 1074 | real scalar timeper2 1075 | if (st_global("debug_ind") == "1") printf(st_global("base_url") + url2 + "\n") 1076 | root = libjson::webcall(st_global("base_url") + url2, ""); 1077 | if (root){ 1078 | results1 = root->getNode("results") 1079 | pagesize = results1->arrayLength() 1080 | totalpages = floor((strtoreal(root->getString("count", ""))) / pagesize) + 1 1081 | spos = 1 1082 | if (st_nobs() > 0) spos = st_nobs() + 1 1083 | countpage = 1 1084 | nextpage = gettable_summaries(st_global("base_url") + url2, spos, varinfo) 1085 | return(1) 1086 | } else{ 1087 | error=raise_error() 1088 | return(0) 1089 | } 1090 | } 1091 | 1092 | // helper function to get data from summary endpoints 1093 | string scalar getsummarydata(string scalar dataoptions, string scalar summaries, string scalar opts, string scalar vlist){ 1094 | string matrix varinfo 1095 | real scalar spos 1096 | string matrix summary_ep_url 1097 | string rowvector allopts 1098 | string rowvector token_cmd 1099 | string matrix varinfo1 1100 | string matrix varinfo2 1101 | string matrix varinfo_groupby_var 1102 | real scalar tempdata 1103 | real scalar totallen 1104 | real scalar epcount 1105 | summary_ep_url = getsummariesurl(dataoptions, summaries) 1106 | allopts = tokens(opts) 1107 | for (i=1; i<=length(allopts); i++){ 1108 | summary_ep_url = summary_ep_url + "&" + allopts[i] 1109 | } 1110 | printf("\n\nGetting data from: " + st_global("base_url") + summary_ep_url + "\n") 1111 | token_cmd = tokens(summaries) 1112 | var_to_agg = token_cmd[2] 1113 | agg_by = "" 1114 | token_cmd = select(token_cmd, token_cmd[1,.]:!="year") 1115 | for (c=4; c<=length(token_cmd); c++){ 1116 | if (c != length(token_cmd)){ 1117 | agg_by = agg_by + token_cmd[c] + "," 1118 | } else { 1119 | agg_by = agg_by + token_cmd[c] 1120 | } 1121 | } 1122 | if (strmatch(agg_by, "*,*") == 1) { 1123 | groupby_lst = tokens(agg_by, ",") 1124 | } else { 1125 | groupby_lst = tokens(agg_by) 1126 | } 1127 | varinfo1 = getvarinfo(st_global("base_url") + "/api/v1/api-variables/?variable=year") 1128 | varinfo_var_to_agg = getvarinfo(st_global("base_url") + "/api/v1/api-variables/?variable=" + var_to_agg) 1129 | num_var = 2 + (length(groupby_lst) + 1)/2 1130 | var_attr = 6 1131 | varinfo = J(var_attr, num_var, "") 1132 | for (r=1; r<=var_attr; r++){ 1133 | varinfo[r, 1] = varinfo1[r, 1] 1134 | } 1135 | temp = 2 1136 | for (j=1; j<=length(groupby_lst); j++){ 1137 | if (groupby_lst[j] != ","){ 1138 | varinfo_groupby_var = getvarinfo(st_global("base_url") + "/api/v1/api-variables/?variable=" + groupby_lst[j]) 1139 | for (r=1; r<=var_attr; r++){ 1140 | varinfo[r, temp] = varinfo_groupby_var[r, 1] 1141 | } 1142 | temp = temp + 1 1143 | } 1144 | } 1145 | for (r=1; r<=var_attr; r++){ 1146 | varinfo[r, temp] = varinfo_var_to_agg[r, 1] 1147 | } 1148 | spos = 1 1149 | epcount = 0 1150 | totallen = 1 1151 | tempdata = createdataset_summaries_ep(varinfo) 1152 | for (i=1; i<=totallen; i++){ 1153 | epcount = epcount + 1 1154 | alldata = getalltables_summaries(varinfo, summary_ep_url, totallen, epcount) 1155 | } 1156 | if (alldata == 1){ 1157 | stata("qui compress") 1158 | if (vlist != "") stata("keep " + vlist) 1159 | else printf("\n\nData successfully loaded into Stata and ready to use.") 1160 | } 1161 | return("") 1162 | } 1163 | 1164 | // Main function to get data based on Stata request - calls other helper functions 1165 | string scalar getalldata(string scalar dataoptions, string scalar vlist, string scalar opts, string scalar summaries, real scalar clearme, real scalar metadataonly, real scalar staging, real scalar csv, real scalar clearcache, real scalar debugind){ 1166 | string matrix endpoints 1167 | string matrix spops 1168 | string matrix varinfo 1169 | string rowvector allopts 1170 | string rowvector validopts 1171 | string rowvector res2 1172 | string rowvector respre 1173 | string rowvector temp1 1174 | string rowvector temp2 1175 | string scalar eid 1176 | string scalar urltemp 1177 | string scalar urladds 1178 | string scalar querystring 1179 | string scalar dataoptions1 1180 | string scalar validfilters 1181 | string scalar ds 1182 | string scalar summary_ep_url 1183 | real scalar epid 1184 | real scalar spos 1185 | real scalar spos1 1186 | real scalar hidereturn 1187 | real scalar totallen 1188 | real scalar epcount 1189 | real scalar tempdata 1190 | real scalar temp3 1191 | real scalar test 1192 | st_global("base_url","https://educationdata.urban.org") 1193 | st_global("staging_url","https://educationdata-stg.urban.org") 1194 | if (staging > 0) st_global("base_url", st_global("staging_url")) 1195 | st_global("cc","0") 1196 | if (clearcache > 0) st_global("cc","1") 1197 | st_global("debug_ind", "0") 1198 | if (debugind > 0) st_global("debug_ind", "1") 1199 | X = st_data(.,.) 1200 | if (clearme > 0) stata("clear") 1201 | else{ 1202 | if (length(X[.,.]) > 0) { 1203 | printf("Error: You currently have data loaded in Stata. Please add " + `"""' + "clear" + `"""' + " to the end of this command if you wish to remove your current data.") 1204 | return("") 1205 | } 1206 | else stata("clear") 1207 | } 1208 | endpoints = endpointstrings() 1209 | dataoptions1 = shorttolongname(strlower(dataoptions), endpoints) 1210 | 1211 | if (strlen(summaries) > 0){ 1212 | getsummarydata(dataoptions1, summaries, opts, vlist) 1213 | return("") 1214 | } 1215 | else{ 1216 | if (dataoptions1 == "Error1"){ 1217 | printf("Error: You must enter the complete name of a dataset in the 'using' statement. The first is the 'short' name for the data category, and the remaining words are the unique name of the dataset. E.g., using " + `"""' + "school directory" + `"""' + ". Type " + `"""' + "help educationdata" + `"""' + " to learn more.") 1218 | return("") 1219 | } 1220 | else if (dataoptions1 == "Error2"){ 1221 | printf("Error: The option you selected was invalid. The three options are: " + `"""' + "school" + `"""' + ", " + `"""' + "district" + `"""' + ", and " + `"""' + "college" + `"""' + ". Type " + `"""' + "help educationdata" + `"""' + " to learn more.") 1222 | return("") 1223 | } 1224 | epid = validendpoints(dataoptions1) 1225 | if (epid == 0 || dataoptions1 == "Error3"){ 1226 | printf("Error: The name of the category ('school', 'district', or 'college') is correct, but the name of the dataset you chose is not. Please verify the list of allowed options by typing " + `"""' + "help educationdata" + `"""' + ".") 1227 | return("") 1228 | } 1229 | eid = endpoints[1,epid] 1230 | varinfo = getvarinfo(st_global("base_url") + "/api/v1/api-endpoint-varlist/?endpoint_id=" + eid) 1231 | for (c=1; c<=length(varinfo[1,.]); c++){ 1232 | varinfo[1,c] = strlower(varinfo[1,c]) 1233 | } 1234 | validfilters = "" 1235 | for (c=1; c<=length(varinfo[6,.]); c++){ /* varinfo[6,c] indicates is_filter */ 1236 | if (varinfo[6,c] == "1" && varinfo[3,c] == "double"){ /* note that no float variables are filters per metadata */ 1237 | if (validfilters == "") validfilters = varinfo[1,c] 1238 | else validfilters = validfilters + ", " + varinfo[1,c] 1239 | } 1240 | } 1241 | t = tokeninit(", ") 1242 | s = tokenset(t, validfilters) 1243 | respre = tokengetall(t) 1244 | allopts = tokens(opts) 1245 | validopts = parseurls(endpoints[2,epid], "optional") 1246 | spops = J(2,length(validopts),"") 1247 | spops[1,.] = validopts[1,.] 1248 | urladds = "" 1249 | if (length(varinfo[1,.]) > 0){ 1250 | for (i=1; i<=length(allopts); i++){ 1251 | t = tokeninit("=") 1252 | s = tokenset(t, allopts[i]) 1253 | res2 = tokengetall(t) 1254 | spos = stringpos(res2[1], validopts) 1255 | if (spos > 0) spops[2,spos] = allopts[i] 1256 | else{ 1257 | spos1 = stringpos(res2[1], varinfo[1,.]) 1258 | if (spos1 > 0){ 1259 | if (res2[2] != subinstr(subinstr(res2[2], ":", ""), ",", "")){ 1260 | if (iteminlist(res2[1], respre) == 0){ 1261 | printf("Error, option " + allopts[i] + " is not valid, because it may only be filtered on a single value, not multiple values.\n") 1262 | printf("Decimal variables may not be filtered at all. The variables that can be filtered on multiple values in this dataset are as follows:\n\n") 1263 | printf(validfilters) 1264 | return("\n\nDownload failed. Please try again.") 1265 | } 1266 | } 1267 | if (urladds == "") { 1268 | urladds = urladds + allopts[i] 1269 | } 1270 | else { 1271 | urladds = urladds + ";" + allopts[i] 1272 | } 1273 | } 1274 | else { 1275 | printf("Error, option " + allopts[i] + " is not valid. Valid variable selections are as follows:\n") 1276 | urladds = "" 1277 | for (c=1; c<=length(varinfo[1,.]); c++){ 1278 | if (stringpos(strofreal(c),("1","6","11","16","21","26","31","36","41","46","51","56","61","66","71","76","81","86","91","96","101")) > 0) urladds = urladds + varinfo[1,c] 1279 | else urladds = urladds + ", " + varinfo[1,c] 1280 | if (stringpos(strofreal(c),("5","10","15","20","25","30","35","40","45","50","55","60","65","70","75","80","85","90","95","100")) > 0) urladds = urladds + "\n" 1281 | } 1282 | return("\n\nDownload failed. Please try again.") 1283 | } 1284 | } 1285 | } 1286 | } 1287 | querystring = getquerystrings(urladds) 1288 | for (i=1; i<=length(spops[1,.]); i++){ 1289 | if (spops[2,i] == "") spops[2,i] = spops[1,i] + "=alldata" 1290 | } 1291 | temp1 = validoptions(spops[2,1], epid) 1292 | if (tokens(temp1[1])[1] == "Invalid"){ 1293 | printf(temp1[1]) 1294 | return("") 1295 | } 1296 | epcount = 0 1297 | if (metadataonly <= 0) printf("Please be patient - downloading data.\n") 1298 | if (csv > 0 && metadataonly <= 0){ 1299 | printf("\nNote that this function temporarily writes data to the current working directory.\n") 1300 | printf("If you do not have read and write privileges to the current directory, please change your working directory.\n") 1301 | printf("For example, you can enter " + `"""' + "cd D:/Users/[Your username here]/Documents" + `"""' + ".\n\n") 1302 | ds = tokens(dataoptions)[2] 1303 | temp3 = downloadcsv(eid,spops,ds,epid,varinfo,querystring,vlist) 1304 | if (temp3 == 0){ 1305 | printf("Error: Sorry, there is no CSV file available for download for this dataset at this time.") 1306 | } 1307 | } 1308 | else{ 1309 | tempdata = createdataset(eid) 1310 | if (metadataonly <= 0){ 1311 | if (length(spops[1,.]) == 1){ 1312 | totallen = length(temp1) 1313 | for (i=1; i<=length(temp1); i++){ 1314 | epcount = epcount + 1 1315 | urltemp = subinstr(endpoints[2,epid], "{" + spops[1,1] + "}", temp1[i]) + querystring 1316 | hidereturn = getalltables(eid, urltemp, totallen, epcount) 1317 | } 1318 | } 1319 | else{ 1320 | temp2 = validoptions(spops[2,2], epid) 1321 | if (tokens(temp2[1])[1] == "Invalid"){ 1322 | printf(temp2[1]) 1323 | return("") 1324 | } 1325 | totallen = length(temp1) * length(temp2) 1326 | for (i=1; i<=length(temp1); i++){ 1327 | for (j=1; j<=length(temp2); j++){ 1328 | epcount = epcount + 1 1329 | urltemp = subinstr(subinstr(endpoints[2,epid], "{" + spops[1,1] + "}", temp1[i]), "{" + spops[1,2] + "}", temp2[j]) + querystring 1330 | hidereturn = getalltables(eid, urltemp, totallen, epcount) 1331 | } 1332 | } 1333 | } 1334 | stata("qui compress") 1335 | } 1336 | if (metadataonly > 0) { 1337 | printf("Metadata successfully loaded into Stata and ready to view. Remove the " + `"""' + "metadata" + `"""' + " argument if you want to load the data itself.\n\n") 1338 | printf("Note: You may filter this dataset on any variable (as long as it does not have a decimal value) using a single value (e.g. grade=1), however only the following variables allow filtering on multiple values (e.g.grade=1:3 or grade=1,2):\n\n") 1339 | printf(validfilters) 1340 | } 1341 | } 1342 | if (vlist != "") stata("keep " + vlist) 1343 | else printf("\nData successfully loaded into Stata and ready to use.") 1344 | return("") 1345 | } 1346 | } 1347 | 1348 | end --------------------------------------------------------------------------------