├── test ├── stata_tests.do ├── simple.do ├── test_detect.py ├── bad.do └── bad_corrected.do ├── .gitattributes ├── stata.toc ├── stata_linter.pkg ├── src-py └── setup.py ├── LICENSE ├── admin ├── ssc-meta-info.md └── checklist-submitting-SSC.md ├── .github └── workflows │ └── python_test.yaml ├── .gitignore ├── run └── lint.do ├── src ├── stata_linter_utils.py ├── lint.sthlp ├── lint.ado ├── stata_linter_correct.py └── stata_linter_detect.py └── README.md /test/stata_tests.do: -------------------------------------------------------------------------------- 1 | lint bad.do 2 | 3 | lint simple.do 4 | 5 | -------------------------------------------------------------------------------- /test/simple.do: -------------------------------------------------------------------------------- 1 | set obs 3 2 | gen x = _n 3 | 4 | summary x, det 5 | 6 | exit, clear -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /stata.toc: -------------------------------------------------------------------------------- 1 | v 1.02 2 | d DIME Analytics, World Bank Group, Development Economics Research 3 | p stata_linter 4 | -------------------------------------------------------------------------------- /stata_linter.pkg: -------------------------------------------------------------------------------- 1 | v 1.02 2 | d DIME Analytics, World Bank Group, Development Economics Research 3 | p stata_linter 4 | f /src/stata_linter_detect.py 5 | f /src/stata_linter_correct.py 6 | f /src/stata_linter_utils.py 7 | f /src/lint.ado 8 | f /src/lint.sthlp 9 | e 10 | -------------------------------------------------------------------------------- /src-py/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | setup( 3 | name='stata_linter', 4 | version='1.0', 5 | entry_points={ 6 | 'console_scripts': [ 7 | 'stata_linter_detect=stata_linter_detect:run' 8 | ] 9 | }, 10 | install_requires=[ 11 | 'pandas', 12 | 'openpyxl' 13 | ] 14 | ) 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) DIME Analytics, DIME, DEC, The World Bank Group. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/test_detect.py: -------------------------------------------------------------------------------- 1 | from stata_linter_detect import stata_linter_detect_py 2 | import subprocess 3 | 4 | class TestCLI: 5 | def test_cli_bad(self): 6 | assert subprocess.run(["stata_linter_detect", "test/bad.do"]).returncode == 1 7 | def test_cli_simple(self): 8 | assert subprocess.run(["stata_linter_detect", "test/simple.do"]).returncode == 0 9 | 10 | class TestDetect: 11 | def test_basic(self): 12 | assert stata_linter_detect_py( 13 | input_file="test/bad.do", 14 | indent=4, 15 | suppress="0", 16 | summary="0", 17 | excel="", 18 | linemax=80, 19 | tab_space=4 20 | ) == 1 21 | 22 | def test_excel(self): 23 | assert stata_linter_detect_py( 24 | input_file="test/bad.do", 25 | indent=4, 26 | suppress="0", 27 | summary="0", 28 | excel="linter.xlsx", 29 | linemax=80, 30 | tab_space=4 31 | ) == 1 32 | 33 | def test_simple(self): 34 | assert stata_linter_detect_py( 35 | input_file="test/simple.do", 36 | indent=4, 37 | suppress="0", 38 | summary="0", 39 | excel="", 40 | linemax=80, 41 | tab_space=4 42 | ) == 0 43 | -------------------------------------------------------------------------------- /admin/ssc-meta-info.md: -------------------------------------------------------------------------------- 1 | ### PACKAGE NAME: 2 | STATA_LINTER 3 | 4 | ### TITLE: 5 | 'STATA_LINTER': tool to detect and correct bad Stata coding practices 6 | 7 | ### DESCRIPTION: 8 | The stata_linter package provides a linter for Stata code. 9 | Read about what a linter is here: https://en.wikipedia.org/wiki/Lint_(software). 10 | The package contains a command that detects bad Stata coding practices in a do-file so that users can manually correct them. 11 | The command can also correct some of the issues flagged in a new do-file. 12 | The purpose of the command is to help users improve code clarity, readability, and organization in Stata do-files. 13 | This linter is based on the best practices outlined in The DIME Analytics Coding Guide published as an appendix to the book Development Research in Practice. 14 | See here https://worldbank.github.io/dime-data-handbook/coding.html. For more info about this linter, see https://github.com/worldbank/stata-linter. 15 | 16 | ### AUTHOR: 17 | "DIME Analytics, DIME, The World Bank Group", dimeanalytics@worldbank.org 18 | 19 | ### KEYWORDS: 20 | - linter 21 | - style guide 22 | - code best practices 23 | 24 | ### STATA VERSION REQUIREMENT: 25 | Stata 16 26 | 27 | ### FILES REQUIRED TO BE IN PACKAGE: 28 | - lint.ado 29 | - lint.sthlp 30 | - stata_linter_correct.py 31 | - stata_linter_detect.py 32 | - stata_linter_utils.py 33 | -------------------------------------------------------------------------------- /.github/workflows/python_test.yaml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | python-version: [3.6, 3.7, 3.8, 3.9] 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install flake8 pytest pandas openpyxl 29 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 30 | - name: Lint with flake8 31 | run: | 32 | # stop the build if there are Python syntax errors or undefined names 33 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 34 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 35 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 36 | - name: Install Module 37 | run: python -m pip install -e src 38 | - name: Test with pytest 39 | run: | 40 | pytest --doctest-modules --junitxml=junit/test-results-${{ matrix.python-version }}.xml 41 | - name: Publish Unit Test Results 42 | uses: EnricoMi/publish-unit-test-result-action@v2 43 | if: always() 44 | with: 45 | files: junit/test-results-*.xml 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # 3 | # Based on DIME .gitignore template. Follow the instructions in the URL 4 | # below to set up this template in your own repository 5 | # https://github.com/worldbank/dime-github-trainings/tree/master/GitHub-resources/DIME-GitHub-Templates 6 | # 7 | # Note that if you are using GitKraken, you need to use version 5.x or more 8 | # recent for this template to work properly 9 | # 10 | ######################################################################## 11 | 12 | ####################### 13 | # Start by ignoring everything, and below we are explicitly saying 14 | # what to not ignore 15 | * 16 | 17 | ####################### 18 | # List of files with GitHub functionality anywhere in the repo 19 | # that we do not want to ignore 20 | 21 | # These files include GitHub settings 22 | !.gitignore 23 | !.gitattributes 24 | !.github/workflows/*.yaml 25 | 26 | # Keep markdown files used for documentation on GitHub 27 | !README.md 28 | !CONTRIBUTING.md 29 | !LICENSE* 30 | 31 | ####################### 32 | # For performance reasons, if a folder is already ignored, then 33 | # GitHub does not check the content for that folder for matches 34 | # with additional rules. The line below includes folder in the 35 | # top folder (but not their content), so that anything matching 36 | # the rules below will still not be ignored. 37 | !*/ 38 | 39 | ####################### 40 | # The following file types are code that should always be 41 | # included no matter where in the repository folder they are 42 | # located unless you explicitly ignore that folder 43 | 44 | # Stata 45 | !/**/*.do 46 | !/**/*.ado 47 | !/**/*.sthlp 48 | !/**/*.pkg 49 | !/**/stata.toc 50 | 51 | # R 52 | !/**/*.R 53 | !/**/*.Rmd 54 | 55 | # LaTeX 56 | !/**/*.tex 57 | !/**/*.bib 58 | 59 | # Python 60 | !/**/*.py 61 | !/**/*.ipynb 62 | # Still ignore .ipynb files in checkpoint folders 63 | .ipynb_checkpoints 64 | 65 | # Matlab 66 | !/**/*.m 67 | 68 | # Markdown 69 | !/**/*.md 70 | 71 | # Julia 72 | !/**/*.jl 73 | 74 | 75 | ####################### 76 | # Some admin data in txt formet 77 | !/**/admin/**/*.txt 78 | 79 | 80 | ####################### 81 | # Include all the files with passwords or tokens here. All files named 82 | # password or passwords are with this template ignored no matter which 83 | # format you are using. Additionally, all content in any folder called 84 | # password or passwords are also ignored. NOTE that your project might be 85 | # using different names and then you must edit the lines below accordingly. 86 | password.* 87 | passwords.* 88 | password/ 89 | passwords/ 90 | .Rproj.user 91 | -------------------------------------------------------------------------------- /run/lint.do: -------------------------------------------------------------------------------- 1 | * Set the global to folder where test files are stored 2 | 3 | global project "/Users/bbdaniels/GitHub/stata-linter" 4 | global test_dir "${project}/test" 5 | adopath ++ "${project}/src" 6 | 7 | // net install stata_linter, from("https://raw.githubusercontent.com/worldbank/stata-linter/develop") replace 8 | run "${project}/src/lint.ado" 9 | 10 | // Detect -------------------------------------------------------------------- 11 | lint "${test_dir}/bad.do", 12 | lint "${test_dir}/bad.do", verbose 13 | lint "${test_dir}/bad.do", verbose nosummary 14 | lint "${test_dir}/bad.do", nosummary 15 | 16 | // Lint with results in excel file 17 | lint "${test_dir}/bad.do", nosummary /// 18 | excel("${test_dir}/detect_lint.xlsx") 19 | 20 | // Lint a folder 21 | lint "${test_dir}" 22 | lint "${test_dir}", verbose 23 | 24 | // Lint a folder and create an excel file 25 | lint "${test_dir}", /// 26 | excel("${test_dir}/detect_output_all.xlsx") 27 | 28 | // Correct ------------------------------------------------------------------- 29 | lint "${test_dir}/bad.do" /// 30 | using "${test_dir}/bad_corrected.do", /// 31 | nosummary /// 32 | replace 33 | 34 | lint "${test_dir}/bad.do" /// 35 | using "${test_dir}/bad_corrected.do", /// 36 | nosummary /// 37 | replace automatic 38 | 39 | 40 | // detecting + correcting + excel file results 41 | lint "${test_dir}/bad.do" /// 42 | using "${test_dir}/bad_corrected.do", /// 43 | excel("${test_dir}/detect_lint.xlsx") /// 44 | replace /// 45 | automatic 46 | 47 | // Check errors -------------------------------------------------------------- 48 | 49 | // Invalid file paths 50 | 51 | cap lint "oi" 52 | assert _rc == 601 53 | 54 | cap lint oi 55 | assert _rc == 601 56 | 57 | cap lint "oi.do" 58 | assert _rc == 601 59 | 60 | cap lint oi.do 61 | assert _rc == 601 62 | 63 | cap lint "C:\Users\wb501238\Documents\GitHub\iefieldkit\run\output/iecorrect-template.xlsx" 64 | assert _rc == 198 65 | 66 | // This should return an error. Input file is not a do file 67 | cap lint "${test_dir}" /// 68 | using "${test_dir}/bad_corrected.do", /// 69 | nosummary /// 70 | replace automatic debug 71 | 72 | assert _rc == 198 73 | 74 | // ----------------------------------------------------------------------------- 75 | 76 | adopath - "${project}/src" 77 | -------------------------------------------------------------------------------- /test/bad.do: -------------------------------------------------------------------------------- 1 | * Rules ===================== 2 | * Hard tabs should not be used 3 | * "delimit" should not be used 4 | * In brackets after "for" or "if", indentation should be used 5 | * Too long lines should be divided into multiple lines 6 | * Before an opening curly bracket "{", put a whitespace 7 | * Remove blank lines before closing brackets 8 | * Remove duplicated blank lines 9 | 10 | * Stata codes to be corrected ================= 11 | 12 | * All hard tabs are replaced with soft tabs (= whitespaces) 13 | 14 | * delimit is corrected and three forward slashes will be used instead 15 | #delimit ; 16 | 17 | foreach something in something something something something something something 18 | something something{ ; // some comment 19 | do something ; 20 | } ; 21 | 22 | #delimit cr 23 | 24 | * Add indentation in brackets 25 | if something { 26 | do something 27 | if another == 1 { 28 | do that 29 | } 30 | } 31 | 32 | foreach ii in potato potato cassava maize potato /// 33 | cassava maize potato cassava maize potato cassava maize potato cassava maize potato cassava maize potato cassava maize potato cassava maize { 34 | if something ~= 1 & something != . { 35 | do something // some very very very very very very very very very very very very very very very very very very very very very very long comment 36 | } 37 | } 38 | 39 | * Split a long line into multiple lines 40 | * (for now, too long comments are not corrected) 41 | foreach ii in potato potato cassava maize potato cassava maize potato cassava maize potato cassava maize potato cassava maize potato cassava maize potato cassava maize potato cassava maize { 42 | if something ~= 1 & something != . { 43 | do something // some very very very very very very very very very very very very very very very very very very very very very very long comment 44 | } 45 | } 46 | 47 | * Add a whitespace before an opening curly bracket "{" 48 | if something ~= 1 & something != .{ 49 | do something 50 | } 51 | 52 | * Remove blank lines before a closing bracket "}" 53 | if something ~= 1 & something != .{ 54 | 55 | do something 56 | 57 | } 58 | 59 | * Remove duplicated blank lines 60 | if something ~= 1 & something != .{ /* some comment */ 61 | 62 | 63 | do something 64 | 65 | 66 | } 67 | 68 | * Forvalues with quietly option 69 | qui forv i = 1/`theN' { 70 | ivregress 2sls indiv_theta_mean hh_faultdist /// 71 | ( m_indiv_edu_binary m_edu_fault = instrument i_d ) /// 72 | `fault_controls' `other_controls' `mother_controls' /// 73 | if group != `i' /// 74 | , cl(village_code) 75 | noi noi di "`i'/`theN' done!" 76 | 77 | mat a = r(table) 78 | local lower = a[5,2] 79 | local upper = a[6,2] 80 | 81 | replace b_alt = _b[m_edu_fault] if group == `i' 82 | replace b_min = `lower' if group == `i' 83 | replace b_max = `upper' if group == `i' 84 | } 85 | -------------------------------------------------------------------------------- /test/bad_corrected.do: -------------------------------------------------------------------------------- 1 | * Rules ===================== 2 | * Hard tabs should not be used 3 | * "delimit" should not be used 4 | * In brackets after "for" or "if", indentation should be used 5 | * Too long lines should be divided into multiple lines 6 | * Before an opening curly bracket " {", put a whitespace 7 | * Remove blank lines before closing brackets 8 | * Remove duplicated blank lines 9 | 10 | * Stata codes to be corrected ================= 11 | 12 | * All hard tabs are replaced with soft tabs (= whitespaces) 13 | 14 | * delimit is corrected and three forward slashes will be used instead 15 | 16 | foreach something in something something something something something something /// 17 | something something { // some comment 18 | do something 19 | } 20 | 21 | * Add indentation in brackets 22 | if something { 23 | do something 24 | if another == 1 { 25 | do that 26 | } 27 | } 28 | 29 | foreach ii in potato potato cassava maize potato /// 30 | cassava maize potato cassava maize potato cassava maize /// 31 | potato cassava maize potato cassava maize potato cassava maize /// 32 | potato cassava maize { 33 | if something ~= 1 & something != . { 34 | do something // some very very very very very very very very very very very very very very very very very very very very very very long comment 35 | } 36 | } 37 | 38 | * Split a long line into multiple lines 39 | * (for now, too long comments are not corrected) 40 | foreach ii in potato potato cassava maize potato cassava maize /// 41 | potato cassava maize potato cassava maize potato cassava maize potato /// 42 | cassava maize potato cassava maize potato cassava maize { 43 | if something ~= 1 & something != . { 44 | do something // some very very very very very very very very very very very very very very very very very very very very very very long comment 45 | } 46 | } 47 | 48 | * Add a whitespace before an opening curly bracket " {" 49 | if something ~= 1 & something != . { 50 | do something 51 | } 52 | 53 | * Remove blank lines before a closing bracket "}" 54 | if something ~= 1 & something != . { 55 | 56 | do something 57 | } 58 | 59 | * Remove duplicated blank lines 60 | if something ~= 1 & something != . { /* some comment */ 61 | 62 | do something 63 | } 64 | 65 | * Forvalues with quietly option 66 | qui forv i = 1/`theN' { 67 | ivregress 2sls indiv_theta_mean hh_faultdist /// 68 | ( m_indiv_edu_binary m_edu_fault = instrument i_d ) /// 69 | `fault_controls' `other_controls' `mother_controls' /// 70 | if group != `i' /// 71 | , cl(village_code) 72 | noi noi di "`i'/`theN' done!" 73 | 74 | mat a = r(table) 75 | local lower = a[5,2] 76 | local upper = a[6,2] 77 | 78 | replace b_alt = _b[m_edu_fault] if group == `i' 79 | replace b_min = `lower' if group == `i' 80 | replace b_max = `upper' if group == `i' 81 | } 82 | 83 | -------------------------------------------------------------------------------- /src/stata_linter_utils.py: -------------------------------------------------------------------------------- 1 | # version 1.02 06apr2023 DIME Analytics dimeanalytics@worldbank.org 2 | # Import packages ==================== 3 | import re 4 | import pandas as pd 5 | import stata_linter_detect as sld 6 | 7 | # functions 8 | 9 | def read_dofile(file, include_comments=False): 10 | 11 | ''' 12 | Returns a list of the lines in the dofile 13 | Omits comment lines or commented-out code by default 14 | ''' 15 | 16 | with open(file, "r") as f: 17 | dofile_lines = f.readlines() 18 | 19 | if include_comments: 20 | return dofile_lines 21 | 22 | dofile_lines2 = [] 23 | comment_delimiter = 0 24 | 25 | for line in dofile_lines: 26 | 27 | comment_delimiter = sld.update_comment_delimiter(comment_delimiter, line) 28 | 29 | if comment_delimiter == 0: 30 | # Removing end-of-line comments 31 | filtered_line = re.sub(r"\s*((\/\/)|(\/\*)).*", r"", line) 32 | dofile_lines2.append(filtered_line) 33 | 34 | return dofile_lines2 35 | 36 | def detect_duplicated_blank_line_in_file(file): 37 | 38 | dofile_lines = read_dofile(file, include_comments=True) 39 | 40 | for line_index, line in enumerate(dofile_lines): 41 | 42 | if sld.detect_duplicated_blank_line(line_index, line, dofile_lines): 43 | return True 44 | 45 | return False 46 | 47 | def detect_blank_line_before_curly_close_in_file(file): 48 | 49 | dofile_lines = read_dofile(file, include_comments=True) 50 | 51 | for line_index, line in enumerate(dofile_lines): 52 | 53 | if sld.detect_blank_line_before_curly_close(line_index, line, dofile_lines): 54 | return True 55 | 56 | return False 57 | 58 | def detect_no_space_before_curly_bracket_in_file(file): 59 | 60 | dofile_lines = read_dofile(file) 61 | 62 | for line in dofile_lines: 63 | 64 | if sld.detect_no_space_before_curly_bracket(line): 65 | return True 66 | 67 | return False 68 | 69 | def detect_line_too_long_in_file(file, linemax): 70 | 71 | dofile_lines = read_dofile(file) 72 | linemax = int(linemax) 73 | 74 | for line in dofile_lines: 75 | 76 | if sld.detect_line_too_long(line, linemax): 77 | return True 78 | 79 | return False 80 | 81 | def detect_bad_indent_in_file(file, indent, tab_space): 82 | 83 | dofile_lines = read_dofile(file) 84 | indent = int(indent) 85 | tab_space = int(tab_space) 86 | 87 | for line_index, line in enumerate(dofile_lines): 88 | 89 | if sld.detect_bad_indent(line_index, line, dofile_lines, indent, tab_space): 90 | return True 91 | 92 | return False 93 | 94 | def detect_hard_tab_in_file(file): 95 | 96 | dofile_lines = read_dofile(file) 97 | 98 | for line in dofile_lines: 99 | 100 | if sld.detect_hard_tab(line): 101 | return True 102 | 103 | # No hard tabs detected in any line 104 | return False 105 | 106 | def detect_delimit_in_file(file): 107 | 108 | dofile_lines = read_dofile(file) 109 | 110 | for line in dofile_lines: 111 | 112 | if sld.detect_delimit(line): 113 | # whenever the first delimiter is detected, return True 114 | # and interrupt script 115 | return True 116 | 117 | # if delimiters were never detected, return False 118 | return False 119 | -------------------------------------------------------------------------------- /admin/checklist-submitting-SSC.md: -------------------------------------------------------------------------------- 1 | # Checklist for submitting new versions to SSC 2 | 3 | *Copy the list below to an issue when starting the process of publishing a new version of stata_linter* 4 | 5 | - [ ] 1. **Merge to *develop*** - Merge all branches with the changes that should be included in the new version first to the `develop` branch. 6 | - [ ] 2. **Create version branch** - This branch _MUST_ be created from the `master` branch. Name this branch the same as the version number you are about to release. For example, `v1.1`, `v2.32` etc. 7 | - [ ] 3. **Merge *develop* to the version branch** - Solve all the conflicts in the version branch and then make sure that step 3.1-3.4 are done in the version branch and nowhere else. 8 | - [ ] 3.1 **Test in different operative systems** - This step is not necessary every time, but testing the commands in Stata on each of the PC, Mac and Linux operative systems should be done from time to time. A particularly good time to do this is after writing or editing code that depends on file paths, the console, special settings etc. If small updates are needed, then do them in the _version_ branch, otherwise do them in branches of the `develop` branch, merge those to `develop` and then re-merge `develop` to the version branch and test again. 9 | - [ ] 3.2 **Update version and date** - In the _version_ branch, update the version number and date in all ado-files and all dates in all help files. See section below for details. 10 | - [ ] 3.3 **Update version globals** - Update the _version_ado_ local in the file _lint.ado_ and the _VERSION_ global in _stata_linter_detect.py_ and _stata_linter_correct.py_. 11 | - [ ] 3.4 **Update version in .pkg and .toc** - This has nothing to do with SSC but should be kept up to date to. This is for when people install directly through GitHub using `net install`. If any new command has been added, remember to add the files for that command to the `.pkg` file. 12 | - [ ] 3.5 **Create a .zip file** - Create a .zip file with the files listed below (ado-files, Python scripts, and help files). If a version update ever includes a new ado-file or Python script necessary to run the linter, include that new file in the .zip too. These files are not allowed to be in a sub-folder in this .zip file. No other files should be in this folder. Make a copy of this file in the archive folder of this package. 13 | - [ ] 4. **Email Prof. Baum** - Email the .zip file created in step 3.5 to **kit.baum@bc.edu**. 14 | - [ ] 4.1 - If any commands are added or deleted, make note of that in the email. 15 | - [ ] 4.2 - If any of the meta info (title, description, keywords, version or author/contact) has changed then include those updates in your email. 16 | - [ ] 5. **Draft release note** - Go to the [release notes](https://github.com/worldbank/stata-linter/releases) and draft a new release note for the new version. Follow the format from previous releases with links to [issues](https://github.com/worldbank/stata-linter/issues) solved. 17 | - [ ] 6. **Wait for publication confirmation** - Do not proceed pass this step until Prof. Baum has confirmed that the new version is uploaded to the servers. 18 | - [ ] 7. **Merge version branch to *master*** - If step 2 and 3 was done correctly, then there should not be any merge conflicts in this step. Once merged, delete the `version` branch. 19 | - [ ] 8. **Rebase *develop* to *master*** - This step brings edits done in 3 and 3.1, as well as version updates done in 3.2 and 3.3 into the *develop* branch. The same result can be accomplished - although by creating a slightly messier history - by merging *master* into *develop*. Regardless if the branches are merged or rebased, if any branches created of *develop* was not included in this version, make sure to rebase them to *develop* afterwards, otherwise there is a big risk for very messy conflicts in the future. 20 | - [ ] 9. **Publish release note** - Once the new version is up on SSC, publish the release note. 21 | - [ ] 10. **Close issues** - When the new version is up, close all the [issues](https://github.com/worldbank/stata-linter/issues) that was solved in the new version. 22 | - [ ] 11. **Send announce email** - If it is a major release (new commands or significant updates to existing commands), send an email to DIME Team to announce the new version. 23 | 24 | ### Version number and dates in ado-files, Python files in src, and help files. 25 | 26 | The version number is on the format `number.number` where the first number is incremented if it is a major release. If the first number is incremented the second number is reset to 0. If it is not a major release, then the first number is left unchanged and the second number is incremented. 27 | 28 | Version number and date in ado-file. Change both version number and date. Make sure that this line is the very first line in the ado-file. 29 | ``` 30 | *! version 1.0 06dec2022 DIME Analytics dimeanalytics@worldbank.org 31 | 32 | 33 | capture program drop lint 34 | program lint 35 | ``` 36 | 37 | Date at the top of the help file. Change only the date, there is no version number in the help file. 38 | ``` 39 | {smcl} 40 | {* 06 Dec 2022}{...} 41 | {hline} 42 | help for {hi:ietoolkit} 43 | {hline} 44 | ``` 45 | -------------------------------------------------------------------------------- /src/lint.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* 06 Apr 2023}{...} 3 | {hline} 4 | help for {hi:lint} 5 | {hline} 6 | 7 | {title:Title} 8 | 9 | {p 4 4 2} 10 | 11 | {cmdab:lint} {hline 2} detects and corrects bad coding practices in Stata do-files following the {browse "https://worldbank.github.io/dime-data-handbook/coding.html#the-dime-analytics-stata-style-guide":DIME Analytics Stata Style Guide}. 12 | 13 | {p 4 4 2} 14 | For this command to run, you will need Stata version 16 or greater, Python, 15 | and the Python package {browse "https://pandas.pydata.org/":Pandas} installed. {break} 16 | To install Python and integrate it with Stata, refer to {browse "https://blog.stata.com/2020/08/18/stata-python-integration-part-1-setting-up-stata-to-use-python/":this page}. {break} 17 | To install Python packages, refer to {browse "https://blog.stata.com/2020/09/01/stata-python-integration-part-3-how-to-install-python-packages/":this page}. 18 | 19 | {title:Basic syntax} 20 | 21 | {p 4 6 6} 22 | {cmdab:lint} "{it:input_file}" [using "{it:output_file}"] , [{it:options}] 23 | {p_end} 24 | {break} 25 | {p 4 4 2} The lint command can be broken into two functionalities: 26 | {break}1. {hi:Detection} identifies bad coding practices in a Stata do-files 27 | {break}2. {hi:Correction} corrects bad coding practices in a Stata do-file. 28 | {p_end} 29 | {break} 30 | {p 4 4 6} If an {it:output_file} is specified with {opt using}, 31 | then the linter will apply the {hi:Correction} functionality and will write 32 | a new file with corrections.{break} 33 | If not, the command will only apply the {hi:Detection} functionality, returning 34 | a report of suggested corrections and potential issues of the do-file 35 | in Stata's Results window.{break} 36 | Users should note that not all the bad practices identified in {hi:Detection} 37 | can be amended by {hi:Correction}.{p_end} 38 | 39 | {marker opts}{...} 40 | {synoptset 25}{...} 41 | {synopthdr:Option} 42 | {synoptline} 43 | 44 | {synopt :{cmdab:v:erbose}}Report bad practices and issues found on each line of the do-file.{p_end} 45 | {synopt :{cmdab:nosum:mary}}Suppress summary table of bad practices and potential issues.{p_end} 46 | {synopt :{cmdab:i:ndent(}{it:integer}{cmd:)}}Number of whitespaces used when checking indentation coding practices (default: 4).{p_end} 47 | {synopt :{cmdab:s:pace(}{it:integer}{cmd:)}}Number of whitespaces used instead of hard tabs when checking indentation practices (default: same as {it:indent}).{p_end} 48 | {synopt :{cmdab:l:inemax(}{it:integer}{cmd:)}}Maximum number of characters in a line when checking line extension practices (default: 80).{p_end} 49 | {synopt :{cmdab:e:xcel(}{it:{help filename}}{cmd:)}}Save an Excel file of line-by-line results.{p_end} 50 | {synopt :{cmdab:force}}Allow the output file name to be the same as the name of the input file; 51 | overwriting the original do-file. {hi:The use of this option is not recommended} because it is 52 | slightly possible that the corrected do-file created by the command will break something 53 | in your code and you should always keep a backup of it.{p_end} 54 | {synopt :{cmdab:auto:matic}}Correct all bad coding practices without asking 55 | if you want each bad coding practice to be corrected or not. 56 | By default, the command will ask the user about each correction interactively 57 | after producing the summary report.{p_end} 58 | {synopt :{cmdab:replace}}Overwrite any existing {it:output} file.{p_end} 59 | 60 | {synoptline} 61 | 62 | 63 | {title:{it:Detect} functionality: Bad style practices and potential issues detected} 64 | 65 | {pstd}{hi:Use whitespaces instead of hard tabs} 66 | {break} 67 | Use whitespaces (usually 2 or 4) instead of hard tabs. 68 | 69 | {pstd}{hi:Avoid abstract index names} 70 | {break} 71 | In for-loop statements, index names should describe what the code is looping over. 72 | For example, avoid writing code like this: 73 | 74 | {pmore}{input:foreach i of varlist cassava maize wheat { }} 75 | 76 | {pstd}Instead, looping commands should name the index local descriptively: 77 | 78 | {pmore}{input:foreach crop of varlist cassava maize wheat { }} 79 | 80 | {pstd}{hi:Use proper indentations} 81 | {break} 82 | After declaring for-loop statements or if-else statements, add indentation with 83 | whitespaces (usually 2 or 4) in the lines inside the loop. 84 | 85 | {pstd}{hi:Use indentations after declaring newline symbols (///)} 86 | {break} 87 | After a new line statement (///), add indentation (usually 2 or 4 whitespaces). 88 | 89 | {pstd}{hi:Use the "{cmdab:!missing()}" function for conditions with missing values} 90 | {break} 91 | For clarity, use {cmdab:!missing(var)} instead of {cmdab:var < .} or {cmdab:var != .} 92 | 93 | {pstd}{hi:Add whitespaces around math symbols ({cmdab:+, =, <, >})} 94 | {break} 95 | For better readability, add whitespaces around math symbols. 96 | For example, do {cmdab:gen a = b + c if d == e} instead of {cmdab:gen a=b+c if d==e}. 97 | 98 | {pstd}{hi:Specify the condition in an "if" statement} 99 | {break} 100 | Always explicitly specify the condition in the if statement. 101 | For example, declare {cmdab:if var == 1} instead of just using {cmdab:if var}. 102 | 103 | {pstd}{hi:Do not use "{cmdab:#delimit}", instead use "///" for line breaks} 104 | {break} 105 | More information about the use of line breaks {browse "https://worldbank.github.io/dime-data-handbook/coding.html#line-breaks":here}. 106 | 107 | {pstd}{hi:Do not use cd to change current folder} 108 | {break} 109 | Use absolute and dynamic file paths. More about this {browse "https://worldbank.github.io/dime-data-handbook/coding.html#writing-file-paths":here}. 110 | 111 | {pstd}{hi:Use line breaks in long lines} 112 | {break} 113 | For lines that are too long, use {cmdab:///} to divide them into multiple lines. 114 | It is recommended to restrict the number of characters in a line to 80 or less. 115 | 116 | {pstd}{hi:Use curly brackets for global macros} 117 | {break} 118 | Always use {cmdab:${ }} for global macros. 119 | For exmaple, use {cmdab:${global_name}} instead of {cmdab:$global_name}. 120 | 121 | {pstd}{hi:Include missing values in condition expressions} 122 | {break} 123 | Condition expressions like {cmdab:var != 0} or {cmdab:var > 0} are evaluated to true for missing values. 124 | Make sure to explicitly take missing values into account by using {cmdab:missing(var)} in expressions. 125 | 126 | {pstd}{hi:Check if backslashes are not used in file paths} 127 | {break} 128 | Check if backslashes ({cmdab:\}) are not used in file paths. 129 | If you are using them, then replace them with forward slashes ({cmdab:/}). 130 | Users should note that the linter might not distinguish perfectly which uses of 131 | a backslash are file paths. In general, this flag will come up every time a 132 | backslash is used in the same line as a local, glocal, or the {it:cd} command. 133 | 134 | {pstd}{hi:Check if tildes (~) are not used for negations} 135 | {break} 136 | If you are using tildes ({cmdab:~}) are used for negations, replace them with bangs ({cmdab:!}). 137 | 138 | {title:{it:Correct} functionality: coding practices to be corrected} 139 | 140 | {p 4 4 2} 141 | Users should note that the {it:Correct} feature does not correct all the bad practices detected. 142 | It only corrects the following: 143 | 144 | {pstd}- Replaces the use of {cmdab:#delimit} with three forward slashes ({cmdab:///}) in each line affected by {cmdab:#delimit} 145 | 146 | {pstd}- Replaces hard tabs with soft spaces (4 by default). The amount of spaces can be set with the {cmdab:tab_space()} option 147 | 148 | {pstd}- Indents lines inside curly brackets with 4 spaces by default. The amount of spaces can be set with the {cmdab:indent()} option 149 | 150 | {pstd}- Breaks long lines into multiple lines. Long lines are considered to have more than 80 characters by default, 151 | but this setting can be changed with the option {cmdab:linemax()}. 152 | Note that lines can only be split in whitespaces that are not inside 153 | parentheses, curly brackets, or double quotes. If a line does not have any 154 | whitespaces, the linter will not be able to break a long line. 155 | 156 | {pstd}- Adds a whitespace before opening curly brackets, except for globals 157 | 158 | {pstd}- Removes redundant blank lines after closing curly brackets 159 | 160 | {pstd}- Removes duplicated blank lines 161 | 162 | {p 4 4 2} 163 | If the option {cmdab:automatic} is omitted, Stata will prompt the user to confirm that 164 | they want to correct each of these bad practices only in case they are detected. 165 | If none of these are detected, it will show a message saying that none of the 166 | bad practices it can correct were detected. 167 | 168 | {marker exa} 169 | {title:Examples} 170 | 171 | {p 4 4 2} 172 | The following examples illustrate the basic usage of {cmd:lint}. 173 | Additional examples can be found at 174 | {browse "https://github.com/worldbank/stata-linter/"}. 175 | 176 | {pstd}{hi:1. Detecting bad coding practices} 177 | 178 | {p 4 4 2} The basic usage is to point to a do-file that requires revision as follows: 179 | 180 | {com}. lint "test/bad.do" 181 | 182 | {p 4 4 2} For the detection feature you can use all the options but {it:automatic}, {it:force}, and {it:replace}, which are part of the correction functionality. 183 | 184 | Options: 185 | 186 | 1. Show bad coding practices line-by-line 187 | {com}. lint "test/bad.do", verbose 188 | 189 | 2. Remove the summary of bad practices 190 | {com}. lint "test/bad.do", nosummary 191 | 192 | 3. Specify the number of whitespaces used for detecting indentation practices (default: 4): 193 | {com}. lint "test/bad.do", indent(2) 194 | 195 | 4. Specify the number of whitespaces used instead of hard tabs for detecting indentation practices (default: same value used in {it:indent}): 196 | {com}. lint "test/bad.do", tab_space(6) 197 | 198 | 5. Specify the maximum number of characters in a line allowed when detecting line extension (default: 80): 199 | {com}. lint "test/bad.do", linemax(100) 200 | 201 | 6. Export to Excel the results of the line by line analysis 202 | {com}. lint "test/bad.do", excel("test_dir/detect_output.xlsx") 203 | 204 | 7. You can also use this command to test all the do-files in a folder: 205 | {com}. lint "test/" 206 | 207 | {pstd}{hi:2. Correcting bad coding practices} 208 | 209 | {p 4 4 2} The basic usage of the correction feature requires to specify the input do-file 210 | and the output do-file that will have the corrections. 211 | If you do not include any options, the linter will ask you confirm if you want a specific bad practice to be corrected 212 | for each bad practice detected: 213 | 214 | 1. Basic correction use (the linter will ask what to correct): 215 | {com}. lint "test/bad.do" using "test/bad_corrected.do" 216 | 217 | 2. Automatic use (Stata will correct the file automatically): 218 | {com}. lint "test/bad.do" using "test/bad_corrected.do", automatic 219 | 220 | 3. Use the same name for the output file (note that this will overwrite the input file, this is not recommended): 221 | {com}. lint "test/bad.do" using "test/bad.do", automatic force 222 | 223 | 4. Replace the output file if it already exists 224 | {com}. lint "test/bad.do" using "test/bad_corrected.do", automatic replace 225 | 226 | {title:Acknowledgements} 227 | 228 | {phang}This work is a product of the initial idea and work of Mizuhiro Suzuki. 229 | Rony Rodriguez Ramirez, Luiza Cardoso de Andrade and Luis Eduardo San Martin also contributed to this command, 230 | and Kristoffer Bjärkefur and Benjamin B. Daniels provided comments and code reviews. 231 | 232 | {title:Authors} 233 | 234 | {phang}This command was developed by DIME Analytics at DIME, The World Bank's department for Development Impact Evaluations. 235 | 236 | {phang}Please send bug reports, suggestions, and requests for clarifications 237 | writing "Stata linter" in the subject line to:{break} 238 | dimeanalytics@worldbank.org 239 | 240 | {phang}You can also see the code, make comments to the code, see the version 241 | history of the code, and submit additions or edits to the code through {browse "https://github.com/worldbank/stata-linter":the GitHub repository of this package}.{p_end} 242 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stata_linter - Stata command for do file linter 2 | 3 | ## Installation 4 | 5 | ### Installing published versions of `stata_linter` 6 | 7 | To install `stata_linter`, type `ssc install stata_linter` and restart Stata. 8 | 9 | This will install the most recent published version of `stata_linter`. 10 | The main version of the code in this repository (the `master` branch) is what 11 | is published on SSC as well. 12 | 13 | ### Python stand-alone installation 14 | 15 | To install the linter to run directly with Python and not via Stata, clone this repository and then run the following command on your terminal: 16 | 17 | ```python 18 | pip install -e src-py/ 19 | ``` 20 | 21 | This will also install `pandas` and `openpyxl` if they are not currently installed. 22 | 23 | ## Requirements 24 | 25 | 1. Stata version 16 or higher. 26 | 2. Python 3 or higher 27 | 28 | For setting up Stata to use Python, refer to [this web page](https://blog.stata.com/2020/08/18/stata-python-integration-part-1-setting-up-stata-to-use-python/). 29 | `stata_linter` also requires the Python package `pandas` and `openpyxl`. 30 | Refer to [this web page](https://blog.stata.com/2020/09/01/stata-python-integration-part-3-how-to-install-python-packages/) to know more about installing Python packages. 31 | 32 | ## Content 33 | 34 | The `stata_linter` package works through the `lint` command. 35 | `lint` is an opinionated detector that attempts to improve the readability and organization of Stata do files. 36 | The command is written based on the good coding practices of the Development Impact Evaluation Unit at The World Bank. 37 | For these standards, refer to [DIME's Stata Coding practices](https://dimewiki.worldbank.org/wiki/Stata_Coding_Practices) and _Appendix: The DIME Analytics Coding Guide_ of [Development Research in Practice](https://worldbank.github.io/dime-data-handbook/). 38 | 39 | The `lint` command can be broken into two functionalities: 40 | 41 | 1. **detection** identifies bad coding practices in one or multiple Stata do-files 42 | 2. **correction** corrects a few of the bad coding practices detected in a Stata do-file 43 | 44 | > _Disclaimer_: Please note that this command is not guaranteed to correct codes without changing results. 45 | It is strongly recommended that after using this command you check if results of the do file do not change. 46 | 47 | ## Syntax and basic usage 48 | 49 | ```stata 50 | lint "input_file" using "output_file", options 51 | ``` 52 | 53 | ### 1. Detection 54 | 55 | To detect bad practices in a do-file you can run the following: 56 | 57 | ```stata 58 | lint "test/bad.do" 59 | ``` 60 | 61 | and on your Stata console you will get a summary of bad coding practices that were found in your code: 62 | 63 | ```stata 64 | ------------------------------------------------------------------------------------- 65 | Bad practice Occurrences 66 | ------------------------------------------------------------------------------------- 67 | Hard tabs used instead of soft tabs: Yes 68 | One-letter local name in for-loop: 3 69 | Non-standard indentation in { } code block: 7 70 | No indentation on line following ///: 1 71 | Missing whitespaces around operators: 0 72 | Implicit logic in if-condition: 1 73 | Delimiter changed: 1 74 | Working directory changed: 0 75 | Lines too long: 5 76 | Global macro reference without { }: 0 77 | Use of . where missing() is appropriate: 6 78 | Backslash detected in potential file path: 0 79 | Tilde (~) used instead of bang (!) in expression: 5 80 | ------------------------------------------------------------------------------------- 81 | ``` 82 | 83 | If you want to get the lines where those bad coding practices appear you can use the option `verbose`. For example: 84 | 85 | ```stata 86 | lint "test/bad.do", verbose 87 | ``` 88 | 89 | Gives the following information before the regular output of the command. 90 | 91 | ```stata 92 | (line 14): Use 4 white spaces instead of tabs. (This may apply to other lines as well.) 93 | (line 15): Avoid to use "delimit". For line breaks, use "///" instead. 94 | (line 17): This line is too long (82 characters). Use "///" for line breaks so that one line has at m 95 | > ost 80 characters. 96 | (line 25): After declaring for loop statement or if-else statement, add indentation (4 whitespaces). 97 | (line 25): Always explicitly specify the condition in the if statement. (For example, declare "if var 98 | > == 1" instead of "if var".) 99 | ... 100 | ``` 101 | 102 | You can also pass a folder path to detect all the bad practices in all the do-files that are in the same folder. 103 | 104 | ### 2. Correction 105 | 106 | If you would like to correct bad practices in a do-file you can run the following: 107 | 108 | ```stata 109 | lint "test/bad.do" using "test/bad_corrected.do" 110 | ``` 111 | 112 | In this case, the lint command will create a do-file called `bad_corrected.do`. 113 | Stata will ask you if you would like to perform a set of corrections for each bad practice detected, one by one. 114 | You can add the option `automatic` to perform the corrections automatically and skip the manual confirmations. 115 | It is strongly recommended that the output file has a different name from the input file, as the original do-file should be kept as a backup. 116 | 117 | As a result of this command, a piece of Stata code as the following: 118 | 119 | ```stata 120 | #delimit ; 121 | 122 | foreach something in something something something something something something 123 | something something{ ; // some comment 124 | do something ; 125 | } ; 126 | 127 | #delimit cr 128 | 129 | ``` 130 | 131 | becomes: 132 | 133 | ```stata 134 | foreach something in something something something something something something /// 135 | something something { // some comment 136 | do something 137 | } 138 | ``` 139 | 140 | and 141 | 142 | ```stata 143 | if something ~= 1 & something != . { 144 | do something 145 | if another == 1 { 146 | do that 147 | } 148 | } 149 | ``` 150 | 151 | becomes 152 | 153 | ```stata 154 | if something ~= 1 & something != . { 155 | do something 156 | if another == 1 { 157 | do that 158 | } 159 | } 160 | ``` 161 | 162 | ### Other options 163 | 164 | You can use the following options with the `lint` command: 165 | 166 | - Options related to the **detection** feature: 167 | - `verbose`: show all the lines where bad practices appear. 168 | - `nosummary`: suppress the summary of bad practices. 169 | - `excel()`: export detection results to Excel. 170 | 171 | - Options exclusive to the **correction** feature: 172 | - `automatic`: correct all bad coding practices without asking if you want each bad coding practice detected to be corrected or not. 173 | - `replace`: replace the existing output file. 174 | - `force`: allow the output file name to be the same as the name of the input file (not recommended). 175 | 176 | - Options for **both** features: 177 | - `indent()`: specify the number of whitespaces used for indentation (default is 4). 178 | - `linemax()`: maximum number of characters in a line (default: 80) 179 | - `tab_space()`: number of whitespaces used instead of hard tabs (default is 4). 180 | 181 | ## Coding practices to be detected 182 | 183 | - **Use soft tabs (i.e., whitespaces), not hard tabs:** 184 | Use white spaces (usually 2 or 4 whitespaces are used) instead of hard tabs. 185 | You can change this option in the do-file editor preferences. 186 | 187 | - **Avoid using abstract index names:** 188 | In *for loops*, index names should describe what the code is looping over. 189 | Hence, for example, avoid coding like this: 190 | 191 | ```{stata} 192 | foreach i of var cassava maize wheat { } 193 | ``` 194 | 195 | Instead, looping commands should name the index local descriptively: 196 | 197 | ```{stata} 198 | foreach crop of var cassava maize wheat { } 199 | ``` 200 | 201 | - **Use proper indentations:** 202 | After declaring a for loop statement or if-else statement, add indentation with whitespaces (usually 2 or 4 whitespaces). 203 | 204 | - **Use indentations after declaring newline symbols `///`:** 205 | After a new line statement `(///)`, add indentation (usually 2 or 4 whitespaces). 206 | 207 | - **Use `!missing()` function for conditions of missing values:** 208 | For clarity, use `!missing(var)` instead of `var < .` or `var != .` 209 | 210 | - **Add whitespaces around math symbols (`+`, `=`, `<`, `>`):** 211 | For better readability, add whitespaces around math symbols. 212 | For example, write `gen a = b + c if d == e` instead of `gen a=b+c if d==e`. 213 | 214 | - **Specify the condition in the if statement:** 215 | Always explicitly specify the condition in the if statement. 216 | For example, declare `if var == 1` instead of `if var`. 217 | 218 | - **Do not use `delimit`, instead use `///` for line breaks:** 219 | More information about the use of line breaks [here](https://worldbank.github.io/dime-data-handbook/coding.html#line-breaks). 220 | 221 | - **Do not use the `cd` command to change the current folder:** 222 | Use absolute and dynamic file paths. More about this [here](https://worldbank.github.io/dime-data-handbook/coding.html#writing-file-paths). 223 | 224 | - **Use line breaks for too long lines:** 225 | For lines that are too long, use `///` for line breaks and divide them into multiple lines. 226 | It is recommended to restrict the number of characters in a line under 80. 227 | Though sometimes this is difficult since, for example, Stata does not allow line 228 | breaks within double quotes, try to follow this rule when possible. 229 | 230 | - **Use curly brackets for global macros:** 231 | Always use `${ }` for global macros. 232 | For instance, use `${global}` instead of `$global`. 233 | 234 | - **Include missing values in condition expressions:** 235 | Condition expressions like `var != 0` or `var > 0` are evaluated to true for missing values. 236 | Make sure to explicitly take missing values into account by using `missing()` in expressions. 237 | 238 | - **Check if backslashes are not used in file paths:** 239 | Check if backslashes `(\)` are not used in file paths. 240 | If you are using them, then replace them with forward slashes `(/)`. 241 | 242 | - **Check if tildes `(~)` are not used for negations:** 243 | If you are using tildes `(~)` for negations, replace them with the bang symbol `(!)`. 244 | 245 | ## Coding practices to be corrected 246 | 247 | The `correction` feature does not correct all the bad practices detected by `detect`. 248 | It only corrects the following: 249 | 250 | - Replaces the use of `delimit` with three forward slashes (`///`) in each line affected by `delimit` 251 | - Replaces hard tabs with soft spaces (4 by default). The amount of spaces can be set with the `tab_space()` option 252 | - Indents lines inside curly brackets with 4 spaces by default. The amount of spaces can be set with the `indent()` option 253 | - Breaks long lines into two lines. Long lines are considered to have more than 80 characters by default, but this setting can be changed with the option `linemax()` 254 | - Adds a whitespace before opening curly brackets, except for globals 255 | - Removes redundant blank lines after closing curly brackets 256 | - Removes duplicated blank lines 257 | 258 | If the option `automatic` is omitted, `lint` will prompt the user to confirm that they want to correct each of these bad practices only in case they are detected. If none of these are detected, it will show the message: 259 | 260 | ```{stata} 261 | Nothing to correct. 262 | The issues lint is able to correct are not present in your dofile. 263 | No output files were generated. 264 | ``` 265 | 266 | ## Recommended use 267 | 268 | To minimize the risk of crashing a do-file, the `correction` feature works based on fewer rules than the `detection` feature. 269 | That is, we can can detect more bad coding practices with `lint "input_file"` in comparison to `lint "input_file" using "output_file"`. 270 | Therefore, after writing a do-file, you can first `detect` bad practices to check how many bad coding practices are contained in the do-file and later decide whether you would like to use the correction feature. 271 | 272 | If there are not too many bad practices, you can go through the lines flagged by the `detection` feature and manually correct them. 273 | This also avoids potential crashes by the `correction` feature. 274 | 275 | If there are many bad practices detected, you can use the `correction` feature first to correct some of the flagged lines, and then you can `detect` again and `correct` the remaining bad practices manually. 276 | We strongly recommend not overwriting the original input do-file so it can remain as a backup in case `correct` introduces unintended changes in the code. 277 | Additionally, we recommend checking that the results of the do-file are not changed by the correction feature. 278 | 279 | ## Bug Reports and Feature Requests 280 | 281 | If you are familiar with GitHub go to the [**Contributions**](https://github.com/worldbank/stata-linter#contributions) section below for advanced instructions. 282 | 283 | An easy but still very efficient way to provide any feedback on these commands is to create an *issue* in GitHub. You can read *issues* submitted by other users or create a new *issue* in the top menu below [**worldbank**/**stata-linter**](https://github.com/worldbank/stata-linter). If you have an idea for a new command, or a new feature on an existing command, creating an *issue* is a great tool for suggesting that. Please read already existing *issues* to check whether someone else has made the same suggestion or reported the same error before creating a new *issue*. 284 | 285 | While we have a slight preference for receiving feedback here on GitHub, you are still very welcome to send a regular email with your feedback to [dimeanalytics@worldbank.org](mailto:dimeanalytics@worldbank.org). 286 | 287 | ## Contributions 288 | 289 | If you are not familiar with GitHub see the [**Bug reports and feature requests**](https://github.com/worldbank/stata-linter#bug-reports-and-feature-requests) section above for a less technical but still very helpful way to contribute to **stata-linter**. 290 | 291 | We appreciate contributions directly to the code and will give credit to anyone providing contributions that we merge to the master branch. 292 | If you have any questions on anything in this section, please do not hesitate to email [dimeanalytics@worldbank.org](mailto:dimeanalytics@worldbank.org). 293 | 294 | The files on the `master` branch are the files most recently released on the SSC server. 295 | README, LICENSE and similar files are updated directly to `master` in between releases. 296 | All the other files are updated in the `develop` branch before being merged into `master`. 297 | Check out the `develop` branch if you want to see what future updates we are currently working on. 298 | 299 | Please make pull requests to the `master` branch **only** if you wish to contribute to README, LICENSE or similar meta data files. 300 | If you wish to make a contribution to any other file, then please **do not** use the `master` branch. 301 | Instead, please fork this repository from `develop` and make your pull request to that branch. 302 | The `develop` branch includes all minor edits we have made to already published commands since the last release that we will include in the next version released on the SSC server. 303 | 304 | ## License 305 | 306 | **stata_linter** is developed under MIT license. See http://adampritchard.mit-license.org/ or see [the `LICENSE` file](https://github.com/worldbank/ietoolkit/blob/master/LICENSE) for details. 307 | 308 | ## Main Contact 309 | 310 | Luis Eduardo San Martin ([dimeanalytics@worldbank.org](mailto:dimeanalytics@worldbank.org)) 311 | 312 | ## **Authors** 313 | 314 | This command is developed by DIME Analytics at DIME, The World Bank's department for Development Impact Evaluations. 315 | 316 | ## About DIME Analytics 317 | 318 | [DIME](https://www.worldbank.org/en/research/dime) is the World Bank's impact evaluation department. Part of DIME’s mission is to intensify the production of and access to public goods that improve the quantity and quality of global development research, while lowering the costs of doing IE for the entire research community. This Library is developed and maintained by [DIME Analytics](https://www.worldbank.org/en/research/dime/data-and-analytics). DIME Analytics supports quality research processes across the DIME portfolio, offers public trainings, and develops tools for the global community of development researchers. 319 | 320 | Other DIME Analytics public goods are: 321 | 322 | - [Development Research in Practice:](https://worldbank.github.io/dime-data-handbook/) the DIME Analytics Data Handbook 323 | - [DIME Wiki:](https://dimewiki.worldbank.org/wiki/Main_Page) a one-stop-shop for impact evaluation resources 324 | - [ietoolkit:](https://github.com/worldbank/ietoolkit) Stata package for impact evaluations 325 | - [iefieldkit:](https://github.com/worldbank/iefieldkit) Stata package for primary data collection 326 | - [Stata Visual Library](https://github.com/worldbank/stata-visual-library) 327 | - [R Econ Visual Library](https://github.com/worldbank/r-econ-visual-library) 328 | - [DIME Research Standards:](https://github.com/worldbank/dime-standards/blob/master/dime-research-standards/) DIME's commitments to best practices 329 | -------------------------------------------------------------------------------- /src/lint.ado: -------------------------------------------------------------------------------- 1 | *! version 1.02 06apr2023 DIME Analytics dimeanalytics@worldbank.org 2 | 3 | capture program drop lint 4 | program lint 5 | 6 | version 16 7 | 8 | syntax anything [using/], /// 9 | /// Options 10 | [ /// 11 | Verbose /// 12 | NOSUMmary /// 13 | Indent(string) /// 14 | Linemax(string) /// 15 | Space(string) /// 16 | Correct(string) /// 17 | Excel(string) /// 18 | AUTOmatic /// 19 | replace /// 20 | force /// 21 | debug /// 22 | ] 23 | 24 | /******************************************************************************* 25 | ******************************************************************************** 26 | 27 | PART 1: Prepare inputs 28 | 29 | ******************************************************************************** 30 | *******************************************************************************/ 31 | 32 | /******************************************************************************* 33 | Set defaults 34 | *******************************************************************************/ 35 | 36 | * set indent size = 4 if missing 37 | if missing("`indent'") local indent "4" 38 | 39 | * set whitespaces for tab (space) = indent size if space is missing 40 | if missing("`space'") local space "`indent'" 41 | 42 | * set linemax = 80 if missing 43 | if missing("`linemax'") local linemax "80" 44 | 45 | * if !missing("`excel'") cap erase `excel' 46 | if !missing("`excel'") cap rm `excel' 47 | 48 | * set excel = "" if excel is missing 49 | if missing("`excel'") local excel "" 50 | 51 | * set a constant for the suppress option being used 52 | local suppress_flag "1" 53 | if !missing("`verbose'") local suppress_flag "0" 54 | 55 | * set a constant for the summary option being used 56 | local summary_flag "1" 57 | if !missing("`nosummary'") local summary_flag "0" 58 | 59 | * In debug mode, print status 60 | if !missing("`debug'") di "Inputs prepared" 61 | 62 | 63 | /******************************************************************************* 64 | Prepare file paths 65 | *******************************************************************************/ 66 | 67 | // Check format of do-file to be linted ---------------------------------------- 68 | 69 | * File or Folder to be detected 70 | gettoken anything : anything 71 | 72 | * Check if main input is a file or a folder 73 | local input = `"`anything'"' 74 | 75 | _testpath "`input'", ext(`"".do", ".ado""') argument(lint's main argument) exists `debug' 76 | local folder = "`r(folder)'" 77 | local file = "`r(file)'" 78 | 79 | // Check do-file with corrections ---------------------------------------------- 80 | 81 | if !missing("`using'") { 82 | 83 | * Can only be used when linting a do-file 84 | if missing("`file'") { 85 | di as error "{phang}Option [using] cannot be used when linting a directory. To use this option, specify a do-file as lint's main argument.{p_end}" 86 | error 198 87 | } 88 | 89 | _testpath "`using'", ext(`"".do", ".ado""') argument(lint's [using] argument) `debug' 90 | local output = "`r(file)'" 91 | 92 | * Unless force is used, the output file should have a different name than the input 93 | if missing("`force'") & ("`input'" == "`output'") { 94 | di as error "{phang}It is recommended to use different file names for lint's main argument and its [using] argument. This is because it is slightly possible that the corrected do-file created by the command will break something in your code, and you may want to keep a backup. If you want still to replace the current do-file with the do-file corrected by lint, use the option [force]. {p_end}" 95 | error 198 96 | } 97 | } 98 | 99 | // Check Excel with corrections ------------------------------------------------ 100 | 101 | if !missing("`excel'") { 102 | 103 | _checkopenpyxlinstall 104 | 105 | _testpath "`excel'", ext(`"".xls", ".xlsx""') argument(lint's [excel] argument) `debug' 106 | local excel = "`r(file)'" 107 | } 108 | 109 | // In debug mode, print file paths --------------------------------------------- 110 | 111 | if !missing("`debug'") { 112 | di "Folder: `folder'" 113 | di "File: `file'" 114 | di "Excel: `excel'" 115 | di "Input: `input'" 116 | di "Output: `output'" 117 | } 118 | 119 | // Check if python is installed ------------------------------------------------ 120 | 121 | _checkpyinstall 122 | 123 | * Check that the Python function is defined 124 | qui: findfile stata_linter_detect.py 125 | if c(os) == "Windows" { 126 | local ado_path = subinstr(r(fn), "\", "/", .) 127 | } 128 | else { 129 | local ado_path = r(fn) 130 | } 131 | 132 | // Check that versions of all auxiliary files are the same --------------------- 133 | 134 | _checkversions 135 | 136 | /******************************************************************************* 137 | ******************************************************************************** 138 | 139 | PART 2: Execute linter 140 | 141 | ******************************************************************************** 142 | *******************************************************************************/ 143 | 144 | /******************************************************************************* 145 | Detect issues 146 | *******************************************************************************/ 147 | 148 | * Check a single do-file 149 | if !missing("`file'") { 150 | 151 | if missing("`using'") { 152 | local header header 153 | } 154 | 155 | if (!missing("`verbose'") | (`summary_flag' == 1) | !missing("`excel'") | !missing("`using'")) { 156 | local footer footer 157 | } 158 | 159 | _detect, /// 160 | file("`file'") excel("`excel'") ado_path("`ado_path'") /// 161 | indent("`indent'") linemax("`linemax'") space("`space'") /// 162 | suppress_flag("`suppress_flag'") summary_flag("`summary_flag'") /// 163 | `header' `footer' 164 | } 165 | 166 | * Check all do-files in a folder 167 | else if !missing("`folder'") { 168 | 169 | local files: dir "`folder'" files "*.do" 170 | 171 | foreach file of local files { 172 | 173 | _detect, /// 174 | file("`folder'/`file'") excel("`excel'") ado_path("`ado_path'") /// 175 | indent("`indent'") linemax("`linemax'") space("`space'") /// 176 | suppress_flag("`suppress_flag'") summary_flag("`summary_flag'") /// 177 | header footer 178 | } 179 | } 180 | 181 | * In debug mode, print status 182 | if !missing("`debug'") noi di "Exiting detect function" 183 | 184 | /******************************************************************************* 185 | Correct issues 186 | *******************************************************************************/ 187 | 188 | if !missing("`using'") { 189 | 190 | _correct, /// 191 | input("`input'") output("`output'") /// 192 | indent("`indent'") space("`space'") linemax("`linemax'") /// 193 | `replace' `force' `automatic' `debug' 194 | 195 | } 196 | 197 | end 198 | 199 | /******************************************************************************* 200 | ******************************************************************************** 201 | 202 | PART 3: Auxiliary functions 203 | 204 | ******************************************************************************** 205 | *******************************************************************************/ 206 | 207 | // Correct --------------------------------------------------------------------- 208 | 209 | capture program drop _correct 210 | program _correct 211 | 212 | syntax, /// 213 | input(string) output(string) /// 214 | indent(string) space(string) linemax(string) /// 215 | [replace force automatic debug] 216 | 217 | * Check that the Python function is defined 218 | qui: findfile stata_linter_correct.py 219 | if c(os) == "Windows" { 220 | local ado_path = subinstr(r(fn), "\", "/", .) 221 | } 222 | else { 223 | local ado_path = r(fn) 224 | } 225 | 226 | * Display a message if the correct option is added, so the output can be separated 227 | display as text " " 228 | display as result _dup(60) "-" 229 | display as result "Correcting {bf:do-file}" 230 | display as result _dup(60) "-" 231 | display as text " " 232 | 233 | * Import relevant python libraries 234 | python: import sys, os 235 | python: from sfi import Macro 236 | python: sys.path.append(os.path.dirname(r"`ado_path'")) 237 | python: from stata_linter_correct import * 238 | python: import stata_linter_detect as sld 239 | python: import stata_linter_utils as slu 240 | 241 | * Checking which issues are present in the dofile so we ask for their correction 242 | python: Macro.setLocal('_delimiter', str(slu.detect_delimit_in_file(r"`input'"))) 243 | python: Macro.setLocal('_hard_tab', str(slu.detect_hard_tab_in_file(r"`input'"))) 244 | python: Macro.setLocal('_bad_indent', str(slu.detect_bad_indent_in_file(r"`input'", "`indent'", "`space'"))) 245 | python: Macro.setLocal('_long_lines', str(slu.detect_line_too_long_in_file(r"`input'", "`linemax'"))) 246 | python: Macro.setLocal('_no_space_before_curly', str(slu.detect_no_space_before_curly_bracket_in_file(r"`input'"))) 247 | python: Macro.setLocal('_blank_before_curly', str(slu.detect_blank_line_before_curly_close_in_file(r"`input'"))) 248 | python: Macro.setLocal('_dup_blank_line', str(slu.detect_duplicated_blank_line_in_file(r"`input'"))) 249 | 250 | * If no issue was found, the function ends here. 251 | * Otherwise _correct continues. 252 | if ("`_delimiter'" == "False" & /// 253 | "`_hard_tab'" == "False" & /// 254 | "`_bad_indent'" == "False" & /// 255 | "`_long_lines'" == "False" & /// 256 | "`_no_space_before_curly'" == "False" & /// 257 | "`_blank_before_curly'" == "False" & /// 258 | "`_dup_blank_line'" == "False") { 259 | display as result `"{phang}Nothing to correct.{p_end}"' 260 | display as result `"{phang}The issues lint is able to correct are not present in your dofile.{p_end}"' 261 | display as result `"{phang}No output files were generated.{p_end}"' 262 | } 263 | else { 264 | 265 | * Counter of number of issues being corrected 266 | local _n_to_correct 0 267 | 268 | * Correct the output file, looping for each python command 269 | foreach fun in delimit_to_three_forward_slashes /// 270 | tab_to_space /// 271 | indent_in_bracket /// 272 | too_long_line /// 273 | space_before_curly /// 274 | remove_blank_lines_before_curly_close /// 275 | remove_duplicated_blank_lines { 276 | 277 | * If the issue is not present, we continue with the next one 278 | if ("`_delimiter'" == "False" & "`fun'" == "delimit_to_three_forward_slashes") { 279 | continue 280 | } 281 | else if ("`_hard_tab'" == "False" & "`fun'" == "tab_to_space") { 282 | continue 283 | } 284 | else if ("`_bad_indent'" == "False" & "`fun'" == "indent_in_bracket") { 285 | continue 286 | } 287 | else if ("`_long_lines'" == "False" & "`fun'" == "too_long_line") { 288 | continue 289 | } 290 | else if ("`_no_space_before_curly'" == "False" & "`fun'" == "space_before_curly") { 291 | continue 292 | } 293 | else if ("`_blank_before_curly'" == "False" & "`fun'" == "remove_blank_lines_before_curly_close") { 294 | continue 295 | } 296 | else if ("`_dup_blank_line'" == "False" & "`fun'" == "remove_duplicated_blank_lines") { 297 | continue 298 | } 299 | 300 | if missing("`automatic'") { 301 | 302 | noi di "" 303 | global confirmation "" //Reset global 304 | 305 | while (upper("${confirmation}") != "Y" & upper("${confirmation}") != "N" & upper("${confirmation}") != "BREAK") { 306 | if ("${confirmation}" != "") { 307 | noi di as txt "{pstd} Invalid input. {p_end}" 308 | noi di as txt "{pstd} Please type {bf:Y} or {bf:N} and hit enter. Type {bf:BREAK} and hit enter to exit. {p_end}" 309 | noi di "" 310 | } 311 | if ("`fun'" == "delimit_to_three_forward_slashes") { 312 | di as result "{pstd} Avoid using [delimit], use three forward slashes (///) instead. {p_end}" 313 | } 314 | else if ("`fun'" == "tab_to_space") { 315 | di as result "{pstd} Avoid using hard tabs, use soft tabs (white spaces) instead. {p_end}" 316 | } 317 | else if ("`fun'" == "indent_in_bracket") { 318 | di as result "{pstd} Indent commands inside curly brackets. {p_end}" 319 | } 320 | else if ("`fun'" == "space_before_curly") { 321 | di as result "{pstd} Use white space before opening curly brackets. {p_end}" 322 | } 323 | else if ("`fun'" == "too_long_line") { 324 | di as result "{pstd} Limit line length to `linemax' characters. {p_end}" 325 | } 326 | else if ("`fun'" == "remove_blank_lines_before_curly_close") { 327 | di as result "{pstd} Remove redundant blank lines before closing brackets. {p_end}" 328 | } 329 | else if ("`fun'" == "remove_duplicated_blank_lines") { 330 | di as result "{pstd} Remove duplicated blank lines. {p_end}" 331 | } 332 | noi di as txt "{pstd} Do you want to correct this? To confirm type {bf:Y} and hit enter, to abort type {bf:N} and hit enter. Type {bf:BREAK} and hit enter to stop the code. See option {help lint:automatic} to not be prompted before creating files. {p_end}", _request(confirmation) 333 | } 334 | 335 | // Copy user input to local 336 | local createfile = upper("${confirmation}") 337 | 338 | // If user wrote "BREAK" then exit the code 339 | if ("`createfile'" == "BREAK") error 1 340 | } 341 | 342 | // if automatic is used, always run the corresponding function 343 | else { 344 | local createfile "Y" 345 | } 346 | 347 | * If option [manual] was used and input was [N], function won't be used for this issue 348 | if ("`createfile'" == "N") { 349 | noi di as result "" 350 | } 351 | * If option input was [Y], or if option [automatic] was used, run the function 352 | else if ("`createfile'" == "Y") { 353 | 354 | local _n_to_correct = `_n_to_correct' + 1 355 | 356 | * If this is the first issue to correct, create the output file 357 | if `_n_to_correct' == 1 { 358 | 359 | if (missing("`force'")) { 360 | qui copy "`input'" "`output'", replace 361 | } 362 | } 363 | 364 | python: `fun'(r"`output'", r"`output'", "`indent'", "`space'", "`linemax'") 365 | } 366 | } 367 | 368 | * Print link to corrected output file if it was created 369 | if `_n_to_correct' > 0 { 370 | display as result `"{phang}Corrected do-file saved to {browse "`output'":`output'}.{p_end}"' 371 | } 372 | } 373 | 374 | 375 | end 376 | 377 | // Detect ---------------------------------------------------------------------- 378 | 379 | capture program drop _detect 380 | program _detect 381 | 382 | syntax , /// 383 | file(string) ado_path(string) /// 384 | indent(string) linemax(string) space(string) /// 385 | suppress_flag(string) summary_flag(string) /// 386 | [excel(string) header footer] 387 | 388 | * Import relevant python functions 389 | python: import sys, os 390 | python: sys.path.append(os.path.dirname(r"`ado_path'")) 391 | python: from stata_linter_detect import * 392 | 393 | * Stata result header 394 | if !missing("`header'") { 395 | di as result "" 396 | di as result "Linting file: `file'" 397 | di as result "" 398 | } 399 | 400 | * Actually run the Python code 401 | python: r = stata_linter_detect_py("`file'", "`indent'", "`suppress_flag'", "`summary_flag'", "`excel'", "`linemax'", "`space'") 402 | 403 | * Stata result footer 404 | if !missing("`footer'") { 405 | 406 | display as result _dup(85) "-" 407 | 408 | if "`excel'" != "" { 409 | display as result `"{phang}File {browse "`excel'":`excel'} created.{p_end}"' 410 | } 411 | 412 | display as result `"{phang}For more information about coding guidelines visit the {browse "https://dimewiki.worldbank.org/Stata_Linter":Stata linter wiki.}{p_end}"' 413 | } 414 | 415 | 416 | 417 | end 418 | 419 | // File Paths ------------------------------------------------------------------ 420 | 421 | cap program drop _testpath 422 | program _testpath, rclass 423 | 424 | syntax anything, argument(string) ext(string) [details(string) debug exists] 425 | 426 | if !missing("`debug'") di "Entering subcommand _filepath" 427 | 428 | * Standardize file path 429 | local path = subinstr(`"`anything'"', "\", "/", .) 430 | 431 | * If a folder, test that folder exists 432 | if !regex(`"`path'"', "\.") { 433 | _testdirectory `path' , argument(`argument') details(`details') `debug' 434 | local folder `path' 435 | } 436 | 437 | * If a file, parse information 438 | else { 439 | _testfile `path' , argument(`argument') ext(`"`ext'"') `exists' `debug' 440 | local file `path' 441 | } 442 | 443 | return local folder "`folder'" 444 | if !missing("`debug'") di `"Folder: `folder'"' 445 | 446 | return local file "`file'" 447 | if !missing("`debug'") di `"File: `file'"' 448 | 449 | if !missing("`debug'") di "Exiting subcommand _filepath" 450 | 451 | end 452 | 453 | // Test file format ------------------------------------------------------------ 454 | 455 | cap program drop _testfile 456 | program _testfile, rclass 457 | 458 | syntax anything, ext(string) argument(string) [debug exists] 459 | 460 | if !missing("`debug'") di "Entering subcommand _testfile" 461 | 462 | 463 | if !missing("`exists'") { 464 | confirm file `anything' 465 | } 466 | 467 | * Get index of separation between file name and file format 468 | local r_lastdot = strlen(`anything') - strpos(strreverse(`anything'), ".") 469 | 470 | * File format starts at the last period and ends at the end of the string 471 | local suffix = substr(`anything', `r_lastdot' + 1, .) 472 | 473 | if !inlist("`suffix'", `ext') { 474 | di as error `"{phang}File `anything' is not a valid input for `argument'. Only the following file extensions are accepted: `ext'.{p_end}"' 475 | error 198 476 | } 477 | 478 | end 479 | 480 | // Check if folder exists ------------------------------------------------------ 481 | 482 | cap program drop _testdirectory 483 | program _testdirectory 484 | 485 | syntax anything, argument(string) [details(string) debug] 486 | 487 | if !missing("`debug'") di "Entering subcommand _testdirectory" 488 | 489 | * Test that the folder for the report file exists 490 | mata : st_numscalar("r(dirExist)", direxists(`anything')) 491 | if `r(dirExist)' == 0 { 492 | noi di as error `"{phang}Directory `anything', used `argument', does not exist. `details'{p_end}"' 493 | error 601 494 | } 495 | 496 | end 497 | 498 | 499 | // Error checks ---------------------------------------------------------------- 500 | 501 | capture program drop _checkpyinstall 502 | program _checkpyinstall 503 | 504 | * Check if python is installed 505 | cap python search 506 | if _rc { 507 | noi di as error `"{phang}For this command, Python installation is required. Refer to {browse "https://blog.stata.com/2020/08/18/stata-python-integration-part-1-setting-up-stata-to-use-python/":this page} for how to integrate Python to Stata. {p_end}"' 508 | exit 509 | } 510 | 511 | * Check if pandas package is installed 512 | cap python which pandas 513 | if _rc { 514 | noi di as error `"{phang}For this command to run, the Python package "pandas" needs to be installed. Refer to {browse "https://blog.stata.com/2020/09/01/stata-python-integration-part-3-how-to-install-python-packages/":this page} for how to install Python packages. {p_end}"' 515 | exit 516 | } 517 | 518 | end 519 | 520 | capture program drop _checkopenpyxlinstall 521 | program _checkopenpyxlinstall 522 | 523 | * Check if openpyxl package is installed 524 | cap python which openpyxl 525 | if _rc { 526 | noi di as error `"{phang}For this command to run, the Python package "openpyxl" needs to be installed. Refer to {browse "https://blog.stata.com/2020/09/01/stata-python-integration-part-3-how-to-install-python-packages/":this page} for how to install Python packages. {p_end}"' 527 | exit 528 | } 529 | 530 | end 531 | 532 | // Check that version of lint.ado and Python scripts are the same 533 | 534 | capture program drop _checkversions 535 | program _checkversions 536 | 537 | * IMPORTANT: Every time we have a package update, update the version number here 538 | * Otherwise we'd be introducing a major bug! 539 | local version_ado 1.02 540 | 541 | * Check versions of .py files 542 | python: from sfi import Macro 543 | python: import stata_linter_detect as sld 544 | python: import stata_linter_correct as slc 545 | python: Macro.setLocal('version_detect', sld.VERSION) 546 | python: Macro.setLocal('version_correct', slc.VERSION) 547 | 548 | * Checking that versions are the same 549 | cap assert "`version_ado'" == "`version_detect'" 550 | if _rc { 551 | noi di as error `"{phang}For this command to run, the versions of all its auxiliary files need to be the same. Please update the command to the newest version with: {bf:ssc install stata_linter, replace} , restart Stata, and try again{p_end}"' 552 | error 553 | } 554 | cap assert "`version_ado'" == "`version_correct'" 555 | if _rc { 556 | noi di as error `"{phang}For this command to run, the versions of all its auxiliary files need to be the same. Please update the command to the newest version with: {bf:ssc install stata_linter, replace} , restart Stata, and try again{p_end}"' 557 | error 558 | } 559 | 560 | end 561 | 562 | ************************************************************* Have a lovely day! 563 | -------------------------------------------------------------------------------- /src/stata_linter_correct.py: -------------------------------------------------------------------------------- 1 | # version 1.02 06apr2023 DIME Analytics dimeanalytics@worldbank.org 2 | # Import packages ============ 3 | import os 4 | import re 5 | import sys 6 | import stata_linter_detect as sld 7 | 8 | # Version Global 9 | ## VERY IMPORTANT: Update the version number here every time there's an update 10 | ## in the package. Otherwise this will cause a major bug 11 | VERSION = "1.02" 12 | 13 | # Function to update comment delimiter ============= 14 | # (detection works only when comment delimiter == 0) 15 | def update_comment_delimiter(comment_delimiter, line): 16 | ''' 17 | This function detects if a line is opening a comment section 18 | in a Stata dofile. Comment sections are delimited by the 19 | charaters "/*" and "*/" 20 | ''' 21 | # if "/*" and "*/" are in the same line, never mind 22 | if re.search(r"\/\*.*\*\/", line): 23 | comment_delimiter += 0 24 | # if "/*" (opening) detected, add 1 25 | elif re.search(r"\/\*", line): 26 | comment_delimiter += 1 27 | # if "*/" (closing) detected, subtract 1 28 | elif (re.search(r"\*\/", line) != None) & (comment_delimiter > 0): 29 | comment_delimiter -= 1 30 | return(comment_delimiter) 31 | 32 | # Functions for auto-correction =================== 33 | 34 | # Convert delimit to three forward slashes ------------------- 35 | def delimit_to_three_forward_slashes(input_file, output_file, indent, tab_space, linemax): 36 | output_list = [] 37 | with open(input_file, "r") as reader: 38 | input_lines = reader.readlines() 39 | delimit_on = 0 40 | comment_delimiter = 0 41 | for line_index, line in enumerate(input_lines): 42 | # update comment_delimiter 43 | comment_delimiter = update_comment_delimiter(comment_delimiter, line) 44 | if comment_delimiter > 0: 45 | output_list.append(line) 46 | elif comment_delimiter == 0: 47 | # check if "#delimit (something other than cr)" is included in a line 48 | if re.search(r"^#delimit(?! cr)", line.lstrip()): 49 | delimit_on = 1 50 | # store the character used for line breaks (ignoring comments) 51 | # (if not specified, default is ";") 52 | line_split = re.split(r"//", line)[0].strip().split(" ") 53 | if len(line_split) > 1: 54 | delimit_symbol = line_split[1] 55 | else: 56 | delimit_symbol = ";" 57 | # check if "#delimit cr" appears in a line, which means 58 | # the end of delimit function 59 | elif re.search(r"^#delimit cr", line.lstrip()): 60 | delimit_on = 0 61 | # for other lines, if delimit_on = 0, then just use the line, and 62 | # if delimit_on = 1, then add "///" at the end of line but before 63 | # any comments 64 | else: 65 | if delimit_on == 0: 66 | output_list.append(line) 67 | elif delimit_on == 1: 68 | # get any non-comment part of the line and 69 | # strip any redundant whitespaces at the end 70 | line_split_for_comment = re.split(r"//", line) 71 | line_main = line_split_for_comment[0] 72 | if len(line_split_for_comment) > 1: 73 | line_comment = line_split_for_comment[1] 74 | line_main_rstrip = line_main.rstrip() 75 | # if the line is not blank, add appropriate line break commands (///) 76 | if len(line_main_rstrip) > 0: 77 | # if the line does not end with the delimit symbol (such as ";"), 78 | # then that means the command continues to the next line, 79 | # so add a line break 80 | if line_main_rstrip[-1] != delimit_symbol: 81 | output_line = line_main_rstrip + " ///" 82 | # if the line does end with the delimit symbol, then 83 | # just remove the last symbol in the line 84 | elif line_main_rstrip[-1] == delimit_symbol: 85 | output_line = line_main_rstrip[:-1] 86 | 87 | # replace all the remaining delimit symbols to "\n" 88 | output_line = re.sub(delimit_symbol, "\n", output_line) 89 | 90 | # if there is any comment in the line, then 91 | # just append the comment 92 | if len(line_split_for_comment) > 1: 93 | output_line = output_line + " //" + line_comment 94 | # if there is no comment in the line, then 95 | # just add a newline command (\n) at the end 96 | elif len(line_split_for_comment) == 1: 97 | output_line = output_line + " \n" 98 | 99 | output_list.append(output_line) 100 | 101 | # if the line is blank, just append the blank line 102 | elif len(line_main_rstrip) == 0: 103 | output_list.append(line) 104 | 105 | with open(output_file, "w") as writer: 106 | for output_line in output_list: 107 | writer.write(output_line) 108 | 109 | 110 | # Convert hard tabs to soft tabs (= whitespaces) ---------------------- 111 | def tab_to_space(input_file, output_file, indent, tab_space, linemax): 112 | output_list = [] 113 | with open(input_file, "r") as reader: 114 | input_lines = reader.readlines() 115 | comment_delimiter = 0 116 | for line_index, line in enumerate(input_lines): 117 | # replace the hard tabs detected in a line to soft tabs (whitespaces) 118 | spaces = ' ' * int(tab_space) 119 | pattern = r'^( *)(\t+)([^\t].*\n{0,1})' 120 | match = re.match(pattern, line) 121 | if match: 122 | output_list.append(match.group(1) + 123 | match.group(2).replace('\t', spaces) + 124 | match.group(3)) 125 | else: 126 | output_list.append(line) 127 | with open(output_file, "w") as writer: 128 | for output_line in output_list: 129 | writer.write(output_line) 130 | 131 | # Use indents in brackets after for and while loops or if/else conditions -------------------- 132 | def indent_in_bracket(input_file, output_file, indent, tab_space, linemax): 133 | with open(input_file, "r") as reader: 134 | input_lines = reader.readlines() 135 | loop_start = [] 136 | bracket_start = [] 137 | bracket_pair = [] 138 | nest_level = 0 139 | max_nest_level = 0 140 | comment_delimiter = 0 141 | for line_index, line in enumerate(input_lines): 142 | # update comment_delimiter 143 | comment_delimiter = update_comment_delimiter(comment_delimiter, line) 144 | if comment_delimiter == 0: 145 | # get the main command of the line (ignoring comments at the end) and remove 146 | # redundant whitespaces 147 | line_rstrip = re.sub(r"(\/\/)|(\/\*).*", r"", line).rstrip() 148 | # if the line is not blank or has any command other than comments, 149 | # do the followings 150 | if len(line_rstrip) > 0: 151 | # check if the line starts with commands that potentially have curly brackets 152 | # (but ignore if this line is the continuation from the previous line, 153 | # because then the expression here should not have curly brackets) 154 | if ( 155 | (re.search(r"^(qui[a-z]*\s+)?(foreach |while |forv|if |else |cap)", line.lstrip()) != None) & 156 | (re.search(r"\/\/\/", input_lines[max(line_index - 1, 0)]) == None) 157 | ): 158 | # if the line ends with an open curly bracket, 159 | # then tag it (here the depth of the nests are stored as well) 160 | if line_rstrip[-1] == "{": 161 | loop_start.append(line_index) 162 | bracket_start.append(line_index) 163 | nest_level += 1 164 | max_nest_level = max(max_nest_level, nest_level) 165 | # if the line does not end with an open curly bracket but includes line breaks, 166 | # then search for the line including the open curly bracket in the following lines 167 | # and tag the line 168 | elif (line_rstrip[-1] != "{") & (re.search(r"\/\/\/", line) != None): 169 | loop_start.append(line_index) 170 | for i in range(line_index, len(input_lines)): 171 | temp_line_rstrip = re.sub(r"\/\/.*", r"", input_lines[i]).rstrip() 172 | if temp_line_rstrip[-1] == "{": 173 | bracket_start.append(i) 174 | break 175 | nest_level += 1 176 | max_nest_level = max(max_nest_level, nest_level) 177 | # check if the line ends with a closing curly bracket 178 | # (ignore it if that is not used for global macro) 179 | if (line_rstrip[-1] == "}") & (not re.search(r"\$.?{", line)): 180 | bracket_pair.append([loop_start.pop(), line_index, nest_level, bracket_start.pop()]) 181 | nest_level -= 1 182 | # for each depth of nests, add appropriate indentations 183 | for nest_level in range(1, max_nest_level + 1): 184 | for pair in bracket_pair: 185 | if pair[2] == nest_level: 186 | # get the position of where to start indentations 187 | start_indent = len(input_lines[pair[0]]) - len(input_lines[pair[0]].lstrip()) 188 | # for each line in the nest, do the followings 189 | for j in range(pair[0] + 1, pair[1]): 190 | # if the line is blank, ignore it 191 | if len(input_lines[j].lstrip()) == 0: 192 | pass 193 | # if the line is not blank, then add indentations at the beginning of the line 194 | elif len(input_lines[j].lstrip()) > 0: 195 | input_lines[j] = " " * (start_indent + int(indent)) + (input_lines[j].lstrip()) 196 | with open(output_file, "w") as writer: 197 | for output_line in input_lines: 198 | writer.write(output_line) 199 | 200 | # Split too long line (> linemax characters) to multiple lines 201 | # (but do not break strings in double quotes (""), parentheses, or curly brackets) -------------------- 202 | def too_long_line(input_file, output_file, indent, tab_space, linemax): 203 | output_list = [] 204 | with open(input_file, "r") as reader: 205 | input_lines = reader.readlines() 206 | newline_flag = 0 207 | comment_delimiter = 0 208 | for line_index, line in enumerate(input_lines): 209 | # update comment_delimiter 210 | comment_delimiter = update_comment_delimiter(comment_delimiter, line) 211 | if comment_delimiter > 0: 212 | output_list.append(line) 213 | elif comment_delimiter == 0: 214 | # do nothing if any of the following conditions are met 215 | if ( 216 | (len(line) <= int(linemax)) | # the line is not too long, or 217 | ((line.lstrip() + " ")[0] == "*") | # the line is a comment 218 | ((line.lstrip() + " ")[:2] == "//") # line contains a comment 219 | ): 220 | output_list.append(line) 221 | # otherwise, do the followings 222 | else: 223 | # separate the comment part and the command part of the line 224 | line_split_for_comment = re.split(r"//", line) 225 | line_main = line_split_for_comment[0] 226 | if "\n" in line_main: 227 | line_main = line_main.rstrip() + "\n" 228 | else: 229 | line_main = line_main.rstrip() 230 | if len(line_split_for_comment) > 1: 231 | line_comment = line_split_for_comment[1] 232 | line_indent = ( 233 | len(line_main.rstrip()) - 234 | len(line_main.rstrip().expandtabs(int(indent)).lstrip()) 235 | ) 236 | 237 | i = 0 238 | break_line = [] 239 | potential_break_line = [] 240 | double_quote_count = 0 241 | parenthesis_count = 0 242 | curly_count = 0 243 | # looking at each character of a line, tag where to break the line 244 | for j, c in enumerate(line_main.lstrip()): 245 | 246 | position = j + len(line_main) - len(line_main.lstrip()) 247 | 248 | if c == '''"''': 249 | double_quote_count = 1 - double_quote_count 250 | elif c == "(": 251 | parenthesis_count += 1 252 | elif c == ")": 253 | parenthesis_count -= 1 254 | elif c == "{": 255 | curly_count += 1 256 | elif c == "}": 257 | curly_count -= 1 258 | 259 | # We check "potential" break lines first 260 | if ((c == "," or c == " ") and # break line at "," or " " 261 | (double_quote_count == 0) and # ignore if in double quotes 262 | (parenthesis_count == 0) and # ignore if in parentheses 263 | (curly_count == 0)# ignore if in curly brackets 264 | ): 265 | 266 | if c == " ": 267 | 268 | position2 = line_indent + i + 4 269 | potential_break_line.append(position) 270 | 271 | # If the soon-to-be new line is equal to the linemax, 272 | # we add the last potential line break position 273 | if position2 >= int(linemax): 274 | break_line.append(potential_break_line[-1]) 275 | i = int(indent) + position - potential_break_line[-1] 276 | else: 277 | i += 1 278 | 279 | elif c == ",": 280 | 281 | position2 = line_indent + i + 5 282 | 283 | # If the soon-to-be new line is equal to the linemax, 284 | # we add the last potential line break position 285 | if position2 >= int(linemax): 286 | break_line.append(potential_break_line[-1]) 287 | i = int(indent) + position - potential_break_line[-1] 288 | else: 289 | i += 1 290 | 291 | potential_break_line.append(position + 1) 292 | 293 | else: 294 | 295 | position2 = line_indent + i + 4 296 | if position2 >= int(linemax): 297 | break_line.append(potential_break_line[-1]) 298 | i = int(indent) + position - potential_break_line[-1] 299 | else: 300 | i += 1 301 | 302 | # break lines 303 | line_split = [] 304 | break_line_index = [0] 305 | break_line_index.extend(break_line) 306 | break_line_index.append(len(line_main)) 307 | for k in range(len(break_line_index) - 1): 308 | # if no line break is needed, just append the line 309 | if (break_line_index == 2): 310 | line_split.append( 311 | line_main[break_line_index[k]:break_line_index[k + 1]].rstrip() 312 | ) 313 | # otherwise, break the line according to the positions of characters tagged above 314 | else: 315 | line_split.append(line_main[break_line_index[k]:break_line_index[k + 1]]) 316 | 317 | # if no line break is needed, then just append the line 318 | # with appropriate indentations (and commends if needed) 319 | if len(line_split) == 1: 320 | if len(line_split_for_comment) > 1: 321 | output_list.append( 322 | " " * line_indent + line_split[0].lstrip() + " //" + line_comment 323 | ) 324 | elif len(line_split_for_comment) == 1: 325 | output_list.append(" " * line_indent + line_split[0].lstrip() + "\n") 326 | # otherwise, break the line 327 | elif len(line_split) > 1: 328 | for i, temp_line in enumerate(line_split): 329 | # the first line 330 | if i == 0: 331 | new_line = " " * line_indent + temp_line.lstrip() + " ///\n" 332 | # from the second to the last to the second line 333 | elif (i > 0) & (i < len(line_split) - 1): 334 | # if the previous line does not include a line break, then 335 | # add an appropriate indentations 336 | if newline_flag == 0: 337 | new_line = " " * (line_indent + int(indent)) + temp_line.lstrip() + " ///\n" 338 | # if the previous line does include a line break, then 339 | # assuming that the indentation is correctly done, 340 | # add no indentations 341 | elif newline_flag == 1: 342 | new_line = " " * (line_indent) + temp_line.lstrip() + " ///\n" 343 | # the last line 344 | elif (i == len(line_split) - 1): 345 | # if the previous line does not include a line break, then 346 | # add an appropriate indentations 347 | if newline_flag == 0: 348 | new_line = " " * (line_indent + int(indent)) + temp_line.lstrip() 349 | # if the previous line does include a line break, then 350 | # assuming that the indentation is correctly done, 351 | # add no indentations 352 | elif newline_flag == 1: 353 | new_line = " " * (line_indent) + temp_line.lstrip() 354 | # if there is any comment in the original line, add it at the end 355 | if len(line_split_for_comment) > 1: 356 | new_line = new_line + " //" + line_comment 357 | output_list.append(new_line) 358 | # flag if the line includes a line break, which will be used 359 | # in the next line 360 | if "///" in line: 361 | newline_flag = 1 362 | else: 363 | newline_flag = 0 364 | with open(output_file, "w") as writer: 365 | for output_line in output_list: 366 | writer.write(output_line) 367 | 368 | # Add a white space before a curly bracket 369 | # (but not if the curly bracket is used for global macro, as in "${}") -------------------- 370 | def space_before_curly(input_file, output_file, indent, tab_space, linemax): 371 | output_list = [] 372 | with open(input_file, "r") as reader: 373 | input_lines = reader.readlines() 374 | comment_delimiter = 0 375 | for line_index, line in enumerate(input_lines): 376 | # update comment_delimiter 377 | comment_delimiter = update_comment_delimiter(comment_delimiter, line) 378 | if comment_delimiter > 0: 379 | output_list.append(line) 380 | elif comment_delimiter == 0: 381 | # replace "{" with " {" if there is no whitespace 382 | # before an open curly bracket, but ignore if 383 | # "${" since this is for global macro 384 | output_list.append(re.sub(r"([^ $]){", r"\1 {", line)) 385 | with open(output_file, "w") as writer: 386 | for output_line in output_list: 387 | writer.write(output_line) 388 | 389 | # Remove blank lines before curly brackets are closed -------------------- 390 | def remove_blank_lines_before_curly_close(input_file, output_file, indent, tab_space, linemax): 391 | output_list = [] 392 | with open(input_file, "r") as reader: 393 | input_lines = reader.readlines() 394 | comment_delimiter = 0 395 | for line_index, line in enumerate(input_lines): 396 | # update comment_delimiter 397 | comment_delimiter = update_comment_delimiter(comment_delimiter, line) 398 | if comment_delimiter > 0: 399 | output_list.append(line) 400 | elif comment_delimiter == 0: 401 | if len(line.strip()) == 0: 402 | for i in range(line_index + 1, len(input_lines)): 403 | if len(input_lines[i].strip()) == 0: 404 | pass 405 | elif len(input_lines[i].strip()) > 0: 406 | line_rstrip = " " + re.sub(r"//.*", r"", input_lines[i]).rstrip() 407 | if (line_rstrip[-1] == "}") & (not re.search(r"\$.*{", input_lines[i])): 408 | break 409 | else: 410 | output_list.append(line) 411 | break 412 | elif len(line.strip()) > 0: 413 | output_list.append(line) 414 | with open(output_file, "w") as writer: 415 | for output_line in output_list: 416 | writer.write(output_line) 417 | 418 | 419 | # Remove duplicated blank lines -------------------- 420 | def remove_duplicated_blank_lines(input_file, output_file, indent, tab_space, linemax): 421 | output_list = [] 422 | with open(input_file, "r") as reader: 423 | input_lines = reader.readlines() 424 | comment_delimiter = 0 425 | for line_index, line in enumerate(input_lines): 426 | # update comment_delimiter 427 | comment_delimiter = update_comment_delimiter(comment_delimiter, line) 428 | if comment_delimiter > 0: 429 | output_list.append(line) 430 | elif comment_delimiter == 0: 431 | if sld.detect_duplicated_blank_line(line_index, line, input_lines): 432 | pass 433 | else: 434 | output_list.append(line) 435 | with open(output_file, "w") as writer: 436 | for i, output_line in enumerate(output_list): 437 | writer.write(output_line) 438 | -------------------------------------------------------------------------------- /src/stata_linter_detect.py: -------------------------------------------------------------------------------- 1 | # version 1.02 06apr2023 DIME Analytics dimeanalytics@worldbank.org 2 | # Import packages ==================== 3 | import os 4 | import re 5 | import sys 6 | import pandas as pd 7 | import argparse 8 | 9 | # Version Global 10 | ## VERY IMPORTANT: Update the version number here every time there's an update 11 | ## in the package. Otherwise this will cause a major bug 12 | VERSION = "1.02" 13 | 14 | # simple run entry point 15 | def run(): 16 | parser = argparse.ArgumentParser(description='Lint a Stata do-file.') 17 | parser.add_argument('filename', metavar='file', type=str, nargs='?', 18 | help='The name of the file to lint.') 19 | parser.add_argument('--indent', type=int, nargs='?', default=4, 20 | help="Number of spaces to use for each indentation" 21 | ) 22 | parser.add_argument('--suppress', action='store_true', 23 | help="Suppress line item printout" 24 | ) 25 | parser.add_argument('--summary', action='store_true', 26 | help="Print a summary of bad practices detected" 27 | ) 28 | parser.add_argument('--linemax', type=int, nargs='?', default=80, 29 | help="Maximum number of characters per line" 30 | ) 31 | parser.add_argument('--excel_output', type=str, nargs='?', default="", 32 | help="If specified, save results to Excel workbook" 33 | ) 34 | 35 | 36 | args=parser.parse_args() 37 | return stata_linter_detect_py( 38 | input_file=args.filename, 39 | indent=args.indent, 40 | suppress="1" if args.suppress else "0", 41 | summary="1" if args.summary else "0", 42 | excel=args.excel_output, 43 | linemax=args.linemax, 44 | tab_space=args.indent 45 | ) 46 | 47 | # Style =================== 48 | 49 | # Avoid to use abstract index names ---------------- 50 | def abstract_index_name( 51 | line_index, line, input_lines, indent, 52 | suppress, style_dictionary, excel_output_list, 53 | tab_space 54 | ): 55 | 56 | if re.search(r"^(qui[a-z]*\s+)?(foreach|forv)", line.lstrip()): 57 | list_of_words = line.split() 58 | # get the index used in for loops 59 | for word in list_of_words: 60 | if re.search(r"^(foreach)", word): 61 | index_in_loop = list_of_words[list_of_words.index(word) + 1] 62 | break 63 | elif re.search(r"^(forv)", word): 64 | index_in_loop = list_of_words[list_of_words.index(word) + 1].split("=")[0] 65 | break 66 | # warn if the number of characters in the index is just 1 67 | if len(set(index_in_loop)) == 1: 68 | print_output = ( 69 | '''In for loops, index names should describe what the code is looping over. ''' + 70 | '''Do not use an abstract index such as "{:s}".'''.format(index_in_loop) 71 | ) 72 | if suppress != "1": 73 | print( 74 | '''(line {:d}): '''.format(line_index + 1) + 75 | print_output 76 | ) 77 | 78 | style_dictionary["abstract_index_name"] += 1 79 | excel_output_list.append([line_index + 1, "style", print_output]) 80 | 81 | return([style_dictionary, excel_output_list]) 82 | 83 | def loop_open(line): 84 | 85 | ''' 86 | Detect if a line is opening a loop 87 | ''' 88 | line_rstrip = re.sub(r"((\/\/)|(\/\*)).*", r"", line).rstrip() 89 | if len(line_rstrip) > 0: 90 | # check if the line includes for-loop, while-loop, or if/else statements 91 | if ( 92 | (re.search(r"^(qui[a-z]*\s+)?(foreach |forv|if |else )", line.lstrip()) != None) & 93 | (line_rstrip[-1] == "{") 94 | ): 95 | return True 96 | return False 97 | 98 | 99 | def loop_close(line): 100 | 101 | ''' 102 | Detects if a line is closing a loop 103 | ''' 104 | relevant_part = re.split('//', line)[0].rstrip() 105 | 106 | if len(relevant_part) > 0: 107 | 108 | if relevant_part[-1] =='}': 109 | return True 110 | else: 111 | return False 112 | 113 | else: 114 | return False 115 | 116 | def bad_indent_in_loop(line, open_loop_line, indent, tab_space): 117 | 118 | ''' 119 | Detect if a line is correctly indented by checking the indentation of 120 | the first line of the loop 121 | ''' 122 | line_ws = line.expandtabs(tab_space) 123 | line_left_spaces1 = len(open_loop_line) - len(open_loop_line.lstrip()) 124 | line_left_spaces2 = len(line_ws) - len(line_ws.lstrip()) 125 | if (line_left_spaces2 - line_left_spaces1 < indent) & (len(line_ws.strip()) > 0): 126 | return True 127 | else: 128 | return False 129 | 130 | # Use proper indentations in for-loops, while-loops, and if/else statements ---------------- 131 | def detect_bad_indent(line_index, line, input_lines, indent, tab_space): 132 | 133 | if loop_open(line): 134 | line_ws = line.expandtabs(tab_space) 135 | j = 1 136 | embedded_loops = 0 137 | 138 | # Checking the lines inside the loop 139 | while j + line_index < len(input_lines): 140 | next_line = input_lines[line_index + j] 141 | 142 | # (next) line is opening another loop 143 | if loop_open(next_line): 144 | embedded_loops += 1 145 | j += 1 146 | continue 147 | 148 | # (next) line is closing a loop 149 | if loop_close(next_line): 150 | if embedded_loops > 0: 151 | # closing an embedded loop 152 | embedded_loops -= 1 153 | else: 154 | # closing the main loop 155 | break 156 | 157 | # (next) line is inside an embedded loop, we don't check it here. 158 | # it will be checked when this function is applied on its 159 | # correcponding loop level 160 | if embedded_loops > 0: 161 | j += 1 162 | continue 163 | 164 | # for other cases, we check they're non-blank lines and then 165 | # correct indentation 166 | if ( 167 | (len(next_line.strip()) > 0) & 168 | (re.search(r"^(\*|\/\/)", next_line.lstrip()) == None) 169 | ): 170 | if bad_indent_in_loop(next_line, line_ws, indent, tab_space): 171 | return True 172 | 173 | j += 1 174 | 175 | # No bad indentations detected 176 | return False 177 | 178 | def proper_indent( 179 | line_index, line, input_lines, indent, 180 | suppress, style_dictionary, excel_output_list, 181 | tab_space 182 | ): 183 | 184 | if detect_bad_indent(line_index, line, input_lines, indent, tab_space): 185 | 186 | print_output = ( 187 | '''After declaring for loop statement or if-else statement, ''' + 188 | '''add indentation ({:d} whitespaces).'''.format(indent) 189 | ) 190 | 191 | if suppress != "1": 192 | print( 193 | '''(line {:d}): '''.format(line_index + 1) + 194 | print_output 195 | ) 196 | 197 | style_dictionary["proper_indent"] += 1 198 | excel_output_list.append([line_index + 1, "style", print_output]) 199 | 200 | return([style_dictionary, excel_output_list]) 201 | 202 | # Use indentations after line breaks (///) ---------------- 203 | def indent_after_newline( 204 | line_index, line, input_lines, indent, 205 | suppress, style_dictionary, excel_output_list, 206 | tab_space 207 | ): 208 | 209 | # check if the previous line doesn't have "///" or if it's first line in dofile 210 | if not re.search(r"\/\/\/", input_lines[max(line_index - 1, 0)]) or line_index == 0: 211 | # no "///" found, the function finishes here 212 | return([style_dictionary, excel_output_list]) 213 | 214 | else: 215 | # Now we check which of the previous lines contained "///" 216 | # we then check indentation spaces with respect of the first 217 | # line with "///" 218 | i = 0 219 | while re.search(r"\/\/\/", input_lines[line_index - (i + 1)]): 220 | i += 1 221 | pass 222 | 223 | first_line = input_lines[line_index - i].expandtabs(tab_space) 224 | first_line_indent = len(first_line) - len(first_line.lstrip()) 225 | 226 | line_ws = line.expandtabs(tab_space) 227 | line_left_spaces = len(line_ws) - len(line_ws.lstrip()) 228 | 229 | if line_left_spaces - first_line_indent < indent: 230 | print_output = ( 231 | '''After new line statement ("///"), add indentation ({:d} whitespaces).'''.format(indent) 232 | ) 233 | 234 | if suppress != "1": 235 | print( 236 | '''(line {:d}): '''.format(line_index + 1) + 237 | print_output 238 | ) 239 | 240 | style_dictionary["indent_after_newline"] += 1 241 | excel_output_list.append([line_index + 1, "style", print_output]) 242 | 243 | return([style_dictionary, excel_output_list]) 244 | 245 | # No whitespaces around math symbols ---------------- 246 | def no_space_before_symbol(line): 247 | 248 | line = line.split('///')[0] 249 | groups = line.split('"') 250 | pattern = r"(?:[a-z]|[A-Z]|[0-9]|_|\)|')(?:<|>|=|\+|-|\*|\^)" 251 | 252 | for i, group in enumerate(groups): 253 | 254 | if i % 2 == 0: 255 | if re.search(pattern, group): 256 | return True 257 | 258 | return False 259 | 260 | def no_space_after_symbol(line): 261 | 262 | line = line.split('///')[0] 263 | groups = line.split('"') 264 | pattern = r"(?:(?:<|>|=|\+|-|\*|\^)(?:[a-z]|[A-Z]|_|\(|`|\.|$))|(?:(?:<|>|=|\+|\*|\^)(?:[0-9]))" 265 | 266 | for i, group in enumerate(groups): 267 | 268 | if i % 2 == 0: 269 | if re.search(pattern, group): 270 | return True 271 | 272 | return False 273 | 274 | def whitespace_symbol( 275 | line_index, line, input_lines, indent, 276 | suppress, style_dictionary, excel_output_list, 277 | tab_space 278 | ): 279 | 280 | # warn if no whitespaces around math symbols 281 | if no_space_before_symbol(line) or no_space_after_symbol(line): 282 | print_output = ( 283 | '''Before and after math symbols (>, <, =, +, etc), it is recommended to use whitespaces. ''' + 284 | '''(For example, do "gen a = b + c" instead of "gen a=b+c".)''' 285 | ) 286 | if suppress != "1": 287 | print( 288 | '''(line {:d}): '''.format(line_index + 1) + 289 | print_output 290 | ) 291 | 292 | style_dictionary["whitespace_symbol"] += 1 293 | excel_output_list.append([line_index + 1, "style", print_output]) 294 | return([style_dictionary, excel_output_list]) 295 | 296 | # For missing values "var < ." or "var != ." are used (!missing(var) is recommended) ---------------- 297 | def has_condition_missing(line): 298 | 299 | if re.search(r"(<|<=|!=|~=)( )*(\.(?![0-9]))", line): 300 | return True 301 | else: 302 | return False 303 | 304 | def condition_missing( 305 | line_index, line, input_lines, indent, 306 | suppress, style_dictionary, excel_output_list, 307 | tab_space 308 | ): 309 | 310 | # warn if "var < ." or "var != ." or "var ~= ." are used 311 | if has_condition_missing(line): 312 | print_output = ( 313 | '''Use "!missing(var)" instead of "var < ." or "var != ." or "var ~= ."''' 314 | ) 315 | if suppress != "1": 316 | print( 317 | '''(line {:d}): '''.format(line_index + 1) + 318 | print_output 319 | ) 320 | 321 | style_dictionary["condition_missing"] += 1 322 | excel_output_list.append([line_index + 1, "style", print_output]) 323 | return([style_dictionary, excel_output_list]) 324 | 325 | # Using "#delimit" should be avoided 326 | def detect_delimit(line): 327 | 328 | if re.search(r"#delimit(?! cr)", line): 329 | return True 330 | else: 331 | return False 332 | 333 | def dont_use_delimit( 334 | line_index, line, input_lines, indent, 335 | suppress, style_dictionary, excel_output_list, 336 | tab_space 337 | ): 338 | 339 | # warn if "#delimit" is used 340 | if detect_delimit(line): 341 | print_output = ( 342 | '''Avoid to use "delimit". For line breaks, use "///" instead.''' 343 | ) 344 | if suppress != "1": 345 | print( 346 | '''(line {:d}): '''.format(line_index + 1) + 347 | print_output 348 | ) 349 | 350 | style_dictionary["dont_use_delimit"] += 1 351 | excel_output_list.append([line_index + 1, "style", print_output]) 352 | return([style_dictionary, excel_output_list]) 353 | 354 | def check_cd(line): 355 | 356 | if re.search(r"^cd\s", line.lstrip()): 357 | return True 358 | else: 359 | return False 360 | 361 | # Using "cd" should be avoided 362 | def dont_use_cd( 363 | line_index, line, input_lines, indent, 364 | suppress, style_dictionary, excel_output_list, 365 | tab_space 366 | ): 367 | 368 | # warn if "#cd" is used 369 | if check_cd(line): 370 | print_output = ( 371 | '''Do not use "cd" but use absolute and dynamic file paths.''' 372 | ) 373 | if suppress != "1": 374 | print( 375 | '''(line {:d}): '''.format(line_index + 1) + 376 | print_output 377 | ) 378 | 379 | style_dictionary["dont_use_cd"] += 1 380 | excel_output_list.append([line_index + 1, "style", print_output]) 381 | return([style_dictionary, excel_output_list]) 382 | 383 | # If a line is too lone, it should be broken into multiple lines 384 | def detect_line_too_long(line, linemax): 385 | 386 | # if the last char is a line break, we leave it out 387 | if len(line) > 0 and line[-1] == '\n': 388 | line = line[:-1] 389 | 390 | if (len(line) > linemax): 391 | return True 392 | else: 393 | return False 394 | 395 | def too_long_line( 396 | line_index, line, input_lines, indent, linemax, 397 | suppress, style_dictionary, excel_output_list, 398 | tab_space 399 | ): 400 | 401 | # warn if the line is too long (and line breaks are not used yet) 402 | if detect_line_too_long(line, linemax): 403 | print_output = ( 404 | '''This line is too long ({:d} characters). '''.format(len(line)) + 405 | '''Use "///" for line breaks so that one line has at most {:d} characters.'''.format(linemax) 406 | ) 407 | if suppress != "1": 408 | print( 409 | '''(line {:d}): '''.format(line_index + 1) + 410 | print_output 411 | ) 412 | 413 | style_dictionary["too_long_line"] += 1 414 | excel_output_list.append([line_index + 1, "style", print_output]) 415 | return([style_dictionary, excel_output_list]) 416 | 417 | # "if" condition should be explicit 418 | def detect_implicit_if(line): 419 | 420 | search_if = re.search(r"(?:^|\s)(?:if|else if)\s", line.lstrip()) 421 | 422 | if search_if != None: 423 | 424 | line = line[search_if.span()[0]:] 425 | if ( 426 | (re.search(r"missing\(", line) == None) & 427 | (re.search(r"inrange\(", line) == None) & 428 | (re.search(r"inlist\(", line) == None) & 429 | (re.search(r"=|<|>", line) == None) 430 | ): 431 | return True 432 | 433 | return False 434 | 435 | def explicit_if( 436 | line_index, line, input_lines, indent, 437 | suppress, style_dictionary, excel_output_list, 438 | tab_space 439 | ): 440 | 441 | # warn if "if" statement is used but the condition is not explicit 442 | if detect_implicit_if(line): 443 | print_output = ( 444 | '''Always explicitly specify the condition in the if statement. ''' + 445 | '''(For example, declare "if var == 1" instead of "if var".) ''' 446 | ) 447 | if suppress != "1": 448 | print( 449 | '''(line {:d}): '''.format(line_index + 1) + 450 | print_output 451 | ) 452 | style_dictionary["explicit_if"] += 1 453 | excel_output_list.append([line_index + 1, "style", print_output]) 454 | 455 | return([style_dictionary, excel_output_list]) 456 | 457 | # Use parentheses for global macros 458 | def parentheses_for_global_macro( 459 | line_index, line, input_lines, indent, 460 | suppress, style_dictionary, excel_output_list, 461 | tab_space 462 | ): 463 | 464 | # warn if global macros are used without parentheses 465 | if re.search(r"\$[a-zA-Z]", line): 466 | print_output = ( 467 | '''Always use "${}" for global macros. ''' 468 | ) 469 | if suppress != "1": 470 | print( 471 | '''(line {:d}): '''.format(line_index + 1) + 472 | print_output 473 | ) 474 | 475 | style_dictionary["parentheses_for_global_macro"] += 1 476 | excel_output_list.append([line_index + 1, "style", print_output]) 477 | return([style_dictionary, excel_output_list]) 478 | 479 | # Check =================== 480 | 481 | # Ask if missing variables are properly taken into account 482 | def check_missing_expression(line): 483 | 484 | if re.search(r"(<|!=|~=)( )*(\.(?![0-9]))|!missing\(.+\)", line): 485 | return True 486 | else: 487 | return False 488 | 489 | def check_expression(line): 490 | 491 | if re.search(r"(~=|!=|>|>=)(?! *\.(?![0-9]))", line): 492 | return True 493 | else: 494 | return False 495 | 496 | 497 | def check_missing( 498 | line_index, line, input_lines, indent, 499 | suppress, check_dictionary, excel_output_list, 500 | tab_space 501 | ): 502 | # ask if missing variables are properly taken into account 503 | 504 | expression = check_expression(line) 505 | missing_expression = check_missing_expression(line) 506 | 507 | if expression and not missing_expression: 508 | print_output = ( 509 | '''Are you taking missing values into account properly? ''' + 510 | '''(Remember that "a != 0" or "a > 0" include cases where a is missing.)''' 511 | ) 512 | if suppress != "1": 513 | print( 514 | '''(line {:d}): '''.format(line_index + 1) + 515 | print_output 516 | ) 517 | 518 | check_dictionary["check_missing"] += 1 519 | excel_output_list.append([line_index + 1, "check", print_output]) 520 | return([check_dictionary, excel_output_list]) 521 | 522 | # Ask if the user may be using backslashes in file paths 523 | def check_global(line): 524 | 525 | if re.search(r"^global\s", line.lstrip()): 526 | return True 527 | else: 528 | return False 529 | 530 | def check_local(line): 531 | if re.search(r"^local\s", line.lstrip()): 532 | return True 533 | else: 534 | return False 535 | 536 | def check_backslash(line): 537 | if re.search(r"\\", line): 538 | return True 539 | else: 540 | return False 541 | 542 | def backslash_in_path( 543 | line_index, line, input_lines, indent, 544 | suppress, check_dictionary, excel_output_list, 545 | tab_space 546 | ): 547 | # warn if anything is sandwiched by backslashes, 548 | # which suggests that the user may be using backslashes for file paths 549 | changes_dir = check_cd(line) 550 | is_local = check_local(line) 551 | is_global = check_global(line) 552 | has_backslash = check_backslash(line) 553 | 554 | if (changes_dir | is_local | is_global) & has_backslash: 555 | print_output = ( 556 | '''Are you using backslashes ("\\") for a file path? ''' + 557 | '''If so, use forward slashes ("/") instead.''' 558 | ) 559 | if suppress != "1": 560 | print( 561 | '''(line {:d}): '''.format(line_index + 1) + 562 | print_output 563 | ) 564 | 565 | check_dictionary["backslash_in_path"] += 1 566 | excel_output_list.append([line_index + 1, "check", print_output]) 567 | return([check_dictionary, excel_output_list]) 568 | 569 | def bang_not_tilde( 570 | line_index, line, input_lines, indent, 571 | suppress, check_dictionary, excel_output_list, 572 | tab_space 573 | ): 574 | 575 | # warn if tilde is used, which suggests 576 | # that the user may be using tilde for negation 577 | if re.search(r"~=\s*([^\s.]|\.[0-9]+)", line): 578 | print_output = ( 579 | '''Are you using tilde (~) for negation? ''' + 580 | '''If so, for negation, use bang (!) instead of tilde (~).''' 581 | ) 582 | 583 | if suppress != "1": 584 | print( 585 | '''(line {:d}): '''.format(line_index + 1) + 586 | print_output 587 | ) 588 | 589 | check_dictionary["bang_not_tilde"] += 1 590 | excel_output_list.append([line_index + 1, "check", print_output]) 591 | return([check_dictionary, excel_output_list]) 592 | 593 | def detect_hard_tab(line): 594 | 595 | if re.search(r"\t", line): 596 | return True 597 | else: 598 | return False 599 | 600 | def detect_no_space_before_curly_bracket(line): 601 | 602 | if re.search(r"([^ $]){", line): 603 | return True 604 | else: 605 | return False 606 | 607 | def detect_blank_line_before_curly_close(line_index, line, dofile_lines): 608 | 609 | if len(line.strip()) > 0 or line_index == len(dofile_lines) - 1: 610 | # non-blank lines or last line in the dofile 611 | return False 612 | 613 | # only blank lines from this point 614 | else: 615 | next_line = dofile_lines[line_index+1] 616 | next_line_rstrip = " " + re.sub(r"//.*", r"", next_line).rstrip() 617 | 618 | # Checking if next line is a closing bracket 619 | if (next_line_rstrip[-1] == "}") & (not re.search(r"\$.*{", next_line)): 620 | return True 621 | else: 622 | return False 623 | 624 | def detect_duplicated_blank_line(line_index, line, dofile_lines): 625 | 626 | #if len(line.strip()) > 0 or line_index == len(dofile_lines) - 1: 627 | if len(line.strip()) > 0: 628 | # non-blank lines 629 | return False 630 | 631 | # only blank lines from this point 632 | else: 633 | # Check if there is not next line -- note that Python doesn't show 634 | # empty next lines as an empty last element 635 | if line_index+1 >= len(dofile_lines): 636 | return True 637 | 638 | # Check if next line is also blank: 639 | next_line = dofile_lines[line_index+1] 640 | if len(next_line.strip()) == 0: 641 | return True 642 | else: 643 | return False 644 | 645 | # Function to update comment delimiter ====================== 646 | # (detection works only when comment delimiter == 0) 647 | def update_comment_delimiter(comment_delimiter, line): 648 | # if "/*" and "*/" are in the same line, never mind 649 | if re.search(r"\/\*.*\*\/", line): 650 | pass 651 | # if "/*" (opening) detected, add 1 652 | elif re.search(r"\/\*", line): 653 | comment_delimiter += 1 654 | # if "*/" (closing) detected, subtract 1 655 | elif (re.search(r"\*\/", line) != None) & (comment_delimiter > 0): 656 | comment_delimiter -= 1 657 | return(comment_delimiter) 658 | 659 | # Run linter program to detect bad coding practices =================== 660 | def stata_linter_detect_py( 661 | input_file, indent, 662 | suppress, summary, excel, linemax, 663 | tab_space 664 | ): 665 | 666 | excel_output_list = [] 667 | 668 | # style ============ 669 | # Any hard tabs in the do file 670 | with open(input_file, "r") as f: 671 | input_lines = f.readlines() 672 | comment_delimiter = 0 673 | for line_index, line in enumerate(input_lines): 674 | 675 | comment_delimiter = update_comment_delimiter(comment_delimiter, line) 676 | 677 | if comment_delimiter == 0: 678 | hard_tab = "No" 679 | if detect_hard_tab(line): 680 | hard_tab = "Yes" 681 | print_output = ( 682 | '''Use {:d} white spaces instead of tabs. '''.format(int(indent)) + 683 | '''(This may apply to other lines as well.)''' 684 | ) 685 | excel_output_list.append([line_index + 1, "style", print_output]) 686 | if suppress != "1": 687 | print( 688 | '''(line {:d}): '''.format(line_index + 1) + 689 | print_output 690 | ) 691 | break 692 | 693 | # Other line-by-line bad practices 694 | style_dictionary = { 695 | "abstract_index_name": 0, 696 | "proper_indent": 0, 697 | "indent_after_newline": 0, 698 | "whitespace_symbol": 0, 699 | "condition_missing": 0, 700 | "explicit_if": 0, 701 | "dont_use_delimit": 0, 702 | "dont_use_cd": 0, 703 | "too_long_line": 0, 704 | "parentheses_for_global_macro": 0 705 | } 706 | 707 | with open(input_file, "r") as f: 708 | input_lines = f.readlines() 709 | comment_delimiter = 0 710 | for line_index, line in enumerate(input_lines): 711 | # update comment delimiter 712 | comment_delimiter = update_comment_delimiter(comment_delimiter, line) 713 | 714 | if re.search(r"^(\*|\/\/)", line.lstrip()) != None: 715 | pass 716 | elif comment_delimiter > 0: 717 | pass 718 | else: 719 | style_dictionary, excel_output_list = abstract_index_name( 720 | line_index, line, input_lines, int(indent), 721 | suppress, style_dictionary, excel_output_list, 722 | int(tab_space) 723 | ) 724 | style_dictionary, excel_output_list = proper_indent( 725 | line_index, line, input_lines, int(indent), 726 | suppress, style_dictionary, excel_output_list, 727 | int(tab_space) 728 | ) 729 | style_dictionary, excel_output_list = indent_after_newline( 730 | line_index, line, input_lines, int(indent), 731 | suppress, style_dictionary, excel_output_list, 732 | int(tab_space) 733 | ) 734 | style_dictionary, excel_output_list = whitespace_symbol( 735 | line_index, line, input_lines, int(indent), 736 | suppress, style_dictionary, excel_output_list, 737 | int(tab_space) 738 | ) 739 | style_dictionary, excel_output_list = condition_missing( 740 | line_index, line, input_lines, int(indent), 741 | suppress, style_dictionary, excel_output_list, 742 | int(tab_space) 743 | ) 744 | style_dictionary, excel_output_list = explicit_if( 745 | line_index, line, input_lines, int(indent), 746 | suppress, style_dictionary, excel_output_list, 747 | int(tab_space) 748 | ) 749 | style_dictionary, excel_output_list = dont_use_delimit( 750 | line_index, line, input_lines, int(indent), 751 | suppress, style_dictionary, excel_output_list, 752 | int(tab_space) 753 | ) 754 | style_dictionary, excel_output_list = dont_use_cd( 755 | line_index, line, input_lines, int(indent), 756 | suppress, style_dictionary, excel_output_list, 757 | int(tab_space) 758 | ) 759 | style_dictionary, excel_output_list = too_long_line( 760 | line_index, line, input_lines, int(indent), int(linemax), 761 | suppress, style_dictionary, excel_output_list, 762 | int(tab_space) 763 | ) 764 | style_dictionary, excel_output_list = parentheses_for_global_macro( 765 | line_index, line, input_lines, int(indent), 766 | suppress, style_dictionary, excel_output_list, 767 | int(tab_space) 768 | ) 769 | # check ============ 770 | check_dictionary = { 771 | "check_missing": 0, 772 | "backslash_in_path": 0, 773 | "bang_not_tilde": 0, 774 | } 775 | 776 | with open(input_file, "r") as f: 777 | input_lines = f.readlines() 778 | comment_delimiter = 0 779 | for line_index, line in enumerate(input_lines): 780 | 781 | # update comment delimiter 782 | comment_delimiter = update_comment_delimiter(comment_delimiter, line) 783 | 784 | if re.search(r"^(\*|\/\/)", line.lstrip()) != None: 785 | pass 786 | elif comment_delimiter > 0: 787 | pass 788 | else: 789 | check_dictionary, excel_output_list = check_missing( 790 | line_index, line, input_lines, int(indent), 791 | suppress, check_dictionary, excel_output_list, 792 | int(tab_space) 793 | ) 794 | check_dictionary, excel_output_list = backslash_in_path( 795 | line_index, line, input_lines, int(indent), 796 | suppress, check_dictionary, excel_output_list, 797 | int(tab_space) 798 | ) 799 | check_dictionary, excel_output_list = bang_not_tilde( 800 | line_index, line, input_lines, int(indent), 801 | suppress, check_dictionary, excel_output_list, 802 | int(tab_space) 803 | ) 804 | print("") 805 | 806 | if summary == "1": 807 | print("-------------------------------------------------------------------------------------") 808 | print("{:69s} {:30s}".format("Bad practice", "Occurrences")) 809 | print("-------------------------------------------------------------------------------------") 810 | 811 | print("{:69s} {:10s}".format("Hard tabs used instead of soft tabs: ", hard_tab)) 812 | print("{:60s} {:10d}".format("One-letter local name in for-loop: ", style_dictionary["abstract_index_name"])) 813 | print("{:60s} {:10d}".format("Non-standard indentation in { } code block: ", style_dictionary["proper_indent"])) 814 | print("{:60s} {:10d}".format("No indentation on line following ///: ", style_dictionary["indent_after_newline"])) 815 | print("{:60s} {:10d}".format("Use of . where missing() is appropriate: ", style_dictionary["condition_missing"])) 816 | print("{:60s} {:10d}".format("Missing whitespaces around operators: ", style_dictionary["whitespace_symbol"])) 817 | print("{:60s} {:10d}".format("Implicit logic in if-condition: ", style_dictionary["explicit_if"])) 818 | print("{:60s} {:10d}".format("Delimiter changed: ", style_dictionary["dont_use_delimit"])) 819 | print("{:60s} {:10d}".format("Working directory changed: ", style_dictionary["dont_use_cd"])) 820 | print("{:60s} {:10d}".format("Lines too long: ", style_dictionary["too_long_line"])) 821 | print("{:60s} {:10d}".format("Global macro reference without { }: ", style_dictionary["parentheses_for_global_macro"])) 822 | print("{:60s} {:10d}".format("Potential omission of missing values in expression: ", check_dictionary["check_missing"])) 823 | print("{:60s} {:10d}".format("Backslash detected in potential file path: ", check_dictionary["backslash_in_path"])) 824 | print("{:60s} {:10d}".format("Tilde (~) used instead of bang (!) in expression: ", check_dictionary["bang_not_tilde"])) 825 | 826 | output_df = pd.DataFrame(excel_output_list) 827 | if excel != "": 828 | if (output_df.empty == True): 829 | output_df = pd.DataFrame(columns = ["Line", "Type", "Problem"]) 830 | output_df.columns = ["Line", "Type", "Problem"] 831 | if os.path.exists(excel): 832 | with pd.ExcelWriter(excel, engine = "openpyxl", mode = "a") as writer: 833 | output_df.to_excel(writer, index = False, sheet_name = os.path.basename(input_file)[:20]) 834 | else: 835 | with pd.ExcelWriter(excel) as writer: 836 | output_df.to_excel(writer, index = False, sheet_name = os.path.basename(input_file)[:20]) 837 | 838 | return( not output_df.empty ) 839 | --------------------------------------------------------------------------------