├── +UnitTest └── +tokernizer │ ├── FileCheckTests.m │ └── TokenizeTests.m ├── README.md ├── Token.m ├── analyze_file.m ├── check.m ├── check_settings.m ├── run_unittests.m ├── testFiles ├── MatlabArgumentClass.m └── MatlabIndentedClass.m ├── test_MatlabIndentedClass.m ├── test_check.m └── tokenize_code.m /+UnitTest/+tokernizer/FileCheckTests.m: -------------------------------------------------------------------------------- 1 | classdef FileCheckTests < matlab.unittest.TestCase 2 | 3 | methods(TestClassSetup) 4 | % Shared setup for the entire test class 5 | function setPathDef(~) 6 | addpath('testFiles') 7 | end 8 | end 9 | 10 | methods(TestClassTeardown) 11 | % Setup for each test 12 | function rmPathDef(~) 13 | rmpath('testFiles') 14 | end 15 | end 16 | 17 | methods(Test) 18 | % Test methods 19 | 20 | function testMatlabIndentedClass(testCase) 21 | % Matlab indentation class test 22 | H = @() check('MatlabIndentedClass.m'); 23 | testCase.verifyWarningFree(H); 24 | end 25 | 26 | function testMatlabArgumentValidation(testCase) 27 | % Argument validation test 28 | 29 | % Argument validation not supported by versions earlier than 9.7 30 | % (earlier than R2019b) 31 | testCase.assumeFalse(verLessThan('matlab', '9.7')) 32 | H = @() check('MatlabArgumentClass.m'); 33 | testCase.verifyWarningFree(H); 34 | end 35 | 36 | end 37 | 38 | end -------------------------------------------------------------------------------- /+UnitTest/+tokernizer/TokenizeTests.m: -------------------------------------------------------------------------------- 1 | classdef TokenizeTests < matlab.unittest.TestCase 2 | %TOKENIZETESTS Tests for tokenize_code 3 | 4 | methods(Test) 5 | function testText(obj) 6 | %TESTTEXT Tokenizing a text should not change the content 7 | 8 | % Read file 9 | text = fileread('check.m'); 10 | 11 | % Tokenize code 12 | tokens = tokenize_code(text); 13 | 14 | % Reconstruct text from tokens 15 | reconstructed_text = horzcat(tokens.text); 16 | 17 | % Compare with actual text 18 | obj.assertEqual(reconstructed_text, text) 19 | end 20 | 21 | function testDoubleQuote(obj) 22 | %TESTDOUBLEQUOTE Tests a double quoted string 23 | 24 | % Input data for the test 25 | input_str = '"test"'; % String: "test" 26 | 27 | % Construct expected output for comparison 28 | expected = Token('string', input_str, 1, 1); 29 | 30 | % Get actual output 31 | actual = tokenize_code(input_str); 32 | 33 | % Compare actual output with expected output 34 | obj.verifyEqual(actual, expected); 35 | end 36 | 37 | function testSoloDoubleQuote(obj) 38 | %TESTSOLODOUBLEQUOTE Tests a string with only a double quoted 39 | 40 | % Input data for the test 41 | input_str = 'output = "test"'; % String: output = 'test' 42 | 43 | % Construct expected output for comparison 44 | expected(1) = Token('identifier', 'output', 1, 1); 45 | expected(2) = Token('space', ' ', 1, 7); 46 | expected(3) = Token('punctuation', '=', 1, 8); 47 | expected(4) = Token('space', ' ', 1, 9); 48 | expected(5) = Token('string', '"test"', 1, 10); 49 | 50 | % Get actual output 51 | actual = tokenize_code(input_str); 52 | 53 | % Compare actual output with expected output 54 | obj.verifyEqual(actual, expected); 55 | end 56 | 57 | function testNestedQuote(obj) 58 | %TESTNESTEDQUOTE Tests a double quote inside single quote 59 | 60 | % Input data for the test 61 | input_str = '"let''s go"'; % String: "let's go" 62 | 63 | % Construct expected output for comparison 64 | expected = Token('string', input_str, 1, 1); 65 | 66 | % Get actual output 67 | actual = tokenize_code(input_str); 68 | 69 | % Compare actual output with expected output 70 | obj.verifyEqual(actual, expected); 71 | end 72 | 73 | function testNestedQuote2(obj) 74 | %TESTNESTEDQUOTE2 Tests a double quote inside single quote 75 | 76 | % Input data for the test 77 | input_str = '''He said, "hi"'''; % String: 'He said, "hi"' 78 | 79 | % Construct expected output for comparison 80 | expected = Token('string', input_str, 1, 1); 81 | 82 | % Get actual output 83 | actual = tokenize_code(input_str); 84 | 85 | % Compare actual output with expected output 86 | obj.verifyEqual(actual, expected); 87 | end 88 | 89 | function testFunctionNames(obj) 90 | %TESTFUNCTIONNAMES Function names should be extracted 91 | report = analyze_file('', tokenize_code('function foo(); end')); 92 | obj.assertEqual(report.name.text, 'foo') 93 | 94 | report = analyze_file('', tokenize_code('function x = foo(); end')); 95 | obj.assertEqual(report.name.text, 'foo') 96 | 97 | report = analyze_file('', tokenize_code('function [x, y] = foo(); end')); 98 | obj.assertEqual(report.name.text, 'foo') 99 | end 100 | 101 | function testFunctionReturnNames(obj) 102 | %TESTFUNCTIONRETURNNAMES Function return names should be extracted 103 | report = analyze_file('', tokenize_code('function foo(); end')); 104 | obj.assertEmpty(report.returns) 105 | 106 | report = analyze_file('', tokenize_code('function x = foo(); end')); 107 | obj.assertEqual(report.returns(1).text, 'x') 108 | obj.assertLength(report.returns, 1) 109 | 110 | report = analyze_file('', tokenize_code('function [x, y] = foo(); end')); 111 | obj.assertEqual(report.returns(1).text, 'x') 112 | obj.assertEqual(report.returns(2).text, 'y') 113 | obj.assertLength(report.returns, 2) 114 | end 115 | 116 | function testFunctionArguments(obj) 117 | %TESTFUNCTIONARGUMENTS Function arguments should be extracted 118 | report = analyze_file('', tokenize_code('function foo(); end')); 119 | obj.assertEmpty(report.arguments) 120 | 121 | report = analyze_file('', tokenize_code('function foo(x); end')); 122 | obj.assertEqual(report.arguments(1).text, 'x') 123 | obj.assertLength(report.arguments, 1) 124 | 125 | report = analyze_file('', tokenize_code('function foo(x, y); end')); 126 | obj.assertEqual(report.arguments(1).text, 'x') 127 | obj.assertEqual(report.arguments(2).text, 'y') 128 | obj.assertLength(report.arguments, 2) 129 | 130 | end 131 | 132 | function testOperatorsGeneral(obj) 133 | %TESTOPERATORSGENERAL Operators should be parsed correctly 134 | tokens = tokenize_code('a>=-b'); 135 | obj.assertTrue(tokens(2).hasText('>=')) 136 | obj.assertTrue(tokens(3).hasText('-')) 137 | end 138 | 139 | function testOperatorsTranspose(obj) 140 | %TESTOPERATORSTRANSPOSE Transpose Operators should not be strings 141 | tokens = tokenize_code('a'''); 142 | obj.assertTrue(tokens(2).isEqual('punctuation', '''')) 143 | 144 | tokens = tokenize_code('a.'''); 145 | obj.assertTrue(tokens(2).isEqual('punctuation', '.''')) 146 | 147 | tokens = tokenize_code('a''+''a''.'''); 148 | obj.assertTrue(tokens(2).isEqual('punctuation', '''')) 149 | obj.assertTrue(tokens(4).isEqual('string', '''a''')) 150 | obj.assertTrue(tokens(5).isEqual('punctuation', '.''')) 151 | end 152 | 153 | function testCommands(obj) 154 | %TESTCOMMANDS Differentiate commands from expressions 155 | tokens = tokenize_code('help me please % test'); 156 | obj.assertTrue(tokens(1).isEqual('identifier', 'help')) 157 | obj.assertTrue(tokens(3).isEqual('string', 'me')) 158 | obj.assertTrue(tokens(5).isEqual('string', 'please')) 159 | obj.assertTrue(tokens(7).isEqual('comment', '% test')) 160 | end 161 | 162 | function testEnd(obj) 163 | %TESTEND Differentiate keyword end from variable end 164 | tokens = tokenize_code('if a(end); end'); 165 | obj.assertTrue(tokens(5).isEqual('identifier', 'end')) 166 | obj.assertTrue(tokens(9).isEqual('keyword', 'end')) 167 | end 168 | 169 | function testSimicolon(obj) 170 | %TESTSEMICOLONS Differentiate semicolons from linebreaks 171 | tokens = tokenize_code('[1;2];3'); 172 | obj.assertTrue(tokens(3).isEqual('punctuation', ';')) 173 | obj.assertTrue(tokens(6).isEqual('linebreak', ';')) 174 | end 175 | 176 | function testBlock(obj) 177 | %TESTBLOCK Identify block comments 178 | comment = sprintf('%%{ \n foo bar \n %%}'); 179 | tokens = tokenize_code(comment); 180 | obj.assertLength(tokens, 1) 181 | obj.assertTrue(tokens.isEqual('comment', comment)) 182 | 183 | tokens = tokenize_code(sprintf('x\n%s\nx', comment)); 184 | obj.assertLength(tokens, 5) 185 | obj.assertTrue(tokens(3).isEqual('comment', comment)) 186 | end 187 | 188 | function testLinebreak(obj) 189 | %TESTLINEBREAK Test line breaks 190 | 191 | % Line breaks should break lines 192 | tokens = tokenize_code(',foo bar'); 193 | obj.assertTrue(tokens(1).hasType('linebreak')) 194 | obj.assertTrue(tokens(4).hasType('string')) 195 | 196 | tokens = tokenize_code(';foo bar'); 197 | obj.assertTrue(tokens(1).hasType('linebreak')) 198 | obj.assertTrue(tokens(4).hasType('string')) 199 | 200 | % Line breaks should not break lines within brackets 201 | tokens = tokenize_code('[a;b];'); 202 | obj.assertTrue(tokens(3).hasType('punctuation')) 203 | obj.assertTrue(tokens(6).hasType('linebreak')) 204 | 205 | tokens = tokenize_code('[a,b],'); 206 | obj.assertTrue(tokens(3).hasType('punctuation')) 207 | obj.assertTrue(tokens(6).hasType('linebreak')) 208 | end 209 | 210 | function testComment(obj) 211 | %TESTCOMMENT Test conventional comments in text 212 | 213 | % Conventional comments in text 214 | tokens = tokenize_code('% this is a comment'); 215 | obj.assertLength(tokens, 1) 216 | obj.assertTrue(tokens(1).hasType('comment')); 217 | 218 | tokens = tokenize_code(' % this is a comment'); 219 | obj.assertLength(tokens, 2) 220 | obj.assertTrue(tokens(1).hasType('space')); 221 | obj.assertTrue(tokens(2).hasType('comment')); 222 | 223 | txt = sprintf('%s\n%s', ... 224 | ' % this is a comment', ... 225 | ' && ...'); 226 | tokens = tokenize_code(txt); 227 | obj.assertLength(tokens, 7) 228 | obj.assertTrue(tokens(1).hasType('space')); 229 | obj.assertTrue(tokens(2).hasType('comment')); 230 | obj.assertTrue(tokens(3).hasType('linebreak')); 231 | obj.assertTrue(tokens(4).hasType('space')); 232 | obj.assertTrue(tokens(5).hasType('punctuation')); 233 | obj.assertTrue(tokens(6).hasType('space')); 234 | obj.assertTrue(tokens(7).hasType('punctuation')); 235 | end 236 | 237 | function testCommentContinuationOperator(obj) 238 | %TESTCOMMENTCONTINUATIONOPERATOR Test comments that follow continuation operator 239 | 240 | % Test comments that follow continuation operator 241 | tokens = tokenize_code('... % this is a comment'); 242 | obj.assertLength(tokens, 3) 243 | obj.assertTrue(tokens(1).hasType('punctuation')); 244 | obj.assertTrue(tokens(2).hasType('space')); 245 | obj.assertTrue(tokens(3).hasType('comment')); 246 | 247 | tokens = tokenize_code('... this is a comment'); 248 | obj.assertLength(tokens, 3) 249 | obj.assertTrue(tokens(1).hasType('punctuation')); 250 | obj.assertTrue(tokens(2).hasType('space')); 251 | obj.assertTrue(tokens(3).hasType('comment')); 252 | 253 | tokens = tokenize_code(' ... % this is a comment'); 254 | obj.assertLength(tokens, 4) 255 | obj.assertTrue(tokens(1).hasType('space')); 256 | obj.assertTrue(tokens(2).hasType('punctuation')); 257 | obj.assertTrue(tokens(3).hasType('space')); 258 | obj.assertTrue(tokens(4).hasType('comment')); 259 | 260 | tokens = tokenize_code('....'); 261 | obj.assertLength(tokens, 2) 262 | obj.assertTrue(tokens(1).hasType('punctuation')); 263 | obj.assertTrue(tokens(2).hasType('comment')); 264 | 265 | tokens = tokenize_code('..., this is a comment'); 266 | obj.assertLength(tokens, 2) 267 | obj.assertTrue(tokens(1).hasType('punctuation')); 268 | obj.assertTrue(tokens(2).hasType('comment')); 269 | 270 | tokens = tokenize_code('.*...'); 271 | obj.assertLength(tokens, 2) 272 | obj.assertTrue(tokens(1).hasType('punctuation')); 273 | obj.assertTrue(tokens(2).hasType('punctuation')); 274 | 275 | tokens = tokenize_code(' &&...this is a comment'); 276 | obj.assertLength(tokens, 4) 277 | obj.assertTrue(tokens(1).hasType('space')); 278 | obj.assertTrue(tokens(2).hasType('punctuation')); 279 | obj.assertTrue(tokens(3).hasType('punctuation')); 280 | obj.assertTrue(tokens(4).hasType('comment')); 281 | 282 | tokens = tokenize_code('&... this is a comment'); 283 | obj.assertLength(tokens, 4) 284 | obj.assertTrue(tokens(1).hasType('punctuation')); 285 | obj.assertTrue(tokens(2).hasType('punctuation')); 286 | obj.assertTrue(tokens(3).hasType('space')); 287 | obj.assertTrue(tokens(4).hasType('comment')); 288 | 289 | % Test comments that follow continuation operator with line break 290 | txt = sprintf('%s\n%s', ... 291 | ' |... this is a comment', ... 292 | ' ||.... this is a comment'); 293 | tokens = tokenize_code(txt); 294 | obj.assertLength(tokens, 10) 295 | obj.assertTrue(tokens(1).hasType('space')); 296 | obj.assertTrue(tokens(2).hasType('punctuation')); 297 | obj.assertTrue(tokens(3).hasType('punctuation')); 298 | obj.assertTrue(tokens(4).hasType('space')); 299 | obj.assertTrue(tokens(5).hasType('comment')); 300 | obj.assertTrue(tokens(6).hasType('linebreak')); 301 | obj.assertTrue(tokens(7).hasType('space')); 302 | obj.assertTrue(tokens(8).hasType('punctuation')); 303 | obj.assertTrue(tokens(9).hasType('punctuation')); 304 | obj.assertTrue(tokens(10).hasType('comment')); 305 | 306 | txt = sprintf('%s\n%s\n%s', ... 307 | ' % this is a comment', ... 308 | ' true||.... this is a comment', ... 309 | ' false% this is a comment'); 310 | tokens = tokenize_code(txt); 311 | obj.assertLength(tokens, 12) 312 | obj.assertTrue(tokens(1).hasType('space')); 313 | obj.assertTrue(tokens(2).hasType('comment')); 314 | obj.assertTrue(tokens(3).hasType('linebreak')); 315 | obj.assertTrue(tokens(4).hasType('space')); 316 | obj.assertTrue(tokens(5).hasType('identifier')); 317 | obj.assertTrue(tokens(6).hasType('punctuation')); 318 | obj.assertTrue(tokens(7).hasType('punctuation')); 319 | obj.assertTrue(tokens(8).hasType('comment')); 320 | obj.assertTrue(tokens(9).hasType('linebreak')); 321 | obj.assertTrue(tokens(10).hasType('space')); 322 | obj.assertTrue(tokens(11).hasType('identifier')); 323 | obj.assertTrue(tokens(12).hasType('comment')); 324 | end 325 | end 326 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Matlab Code Analyzer 2 | ================== 3 | 4 | MATLAB comes with the very important tool MLINT, which can check your code for common defects. Experience shows that these hints can be very helpful for cleaning up MATLAB code, and preventing simple errors. 5 | 6 | Crucially though, MLINT is not a style checker. That is where this program comes in: 7 | 8 | Say you have some code in `ugly_code.m`. You can analyze this code for problems using one simple command: 9 | 10 | ```matlab 11 | check ugly_code.m 12 | ``` 13 | 14 | This might produce a report like this: 15 | 16 | ``` 17 | Code Analysis for ugly_code.m 18 | 19 | Required files: ugly_code.m, ugly_toolbox.m 20 | Required toolboxes: MATLAB, Signal Processing Toolbox 21 | 22 | Function ugly_code (Line 1, col 18): 23 | 24 | Number of lines: 67 (high) 25 | Number of function arguments: 2 (good) 26 | Number of used variables: 5 (good) 27 | Max level of nesting: 3 (high) 28 | Code complexity: 6 (good) 29 | 30 | Line 1, col 1: too few comments (2 comments for 67 lines of code) 31 | Line 1, col 10: return argument 'szOut' is very short (used 5 times across 38 lines) 32 | Line 1, col 18: function argument 'testInput' is not mentioned in the documentation 33 | Line 15, col 84: very long line 34 | Line 20, col 22: no spaces after operator ',' 35 | Line 27, col 1: incorrect indentation 36 | Line 27, col 1: variable 'szOut' is very short (used 5 times across 38 lines) 37 | Line 27, col 23: variable 'text' shadows a built-in 38 | Line 27, col 34: Eval should never be used 39 | Line 39, col 10: no spaces around operator '=' 40 | ``` 41 | 42 | A report like this will be printed for every function in the file, for script-files, and for classes. The more serious of these comments will be highlighted in red, whereas less important ones will stay black. Every line number is clickable and opens directly in the editor. 43 | 44 | Additionally, this comes with a settings file `check_settings.m`, which can change the thresholds on all warnings, and even enable or disable whole categories of warnings entirely. 45 | 46 | Contributing 47 | ------------ 48 | 49 | While this file works well for our current applications, it is a complex piece of software, and it has not been thoroughly tested yet. If you find a bug, or would like to see a new feature, or would like to contribute a new feature, please feel free to open an issue or pull request. 50 | 51 | However, this is not my job, and I can not guarantee an immediate response, or support for every problem. That said, the code is available under the terms of the BSD 3-clause license, so feel free to use it however you like as long as you honor my authorship of it. 52 | 53 | Also, please bear in mind that all of the warnings generated by this program are just that: Warnings. They are *not* laws. If a slightly longer line improves readability, please *do not* make it shorter just to make the style analyzer happy. Please watch [this video](https://www.youtube.com/watch?v=wf-BqAjZb8M) for some context. 54 | 55 | And finally, while this style checker can find many issues, it is by no means perfect. It can not comment on whether your variable names are good or not, whether your comments are out of date or not, or whether your code makes intuitive sense when reading, or just results in confusion. For more in-depth heuristics on how to improve these aspects of your code, pleas read the wonderful [MATLAB Style Guidelines 1.0](http://mathworks.com/matlabcentral/fileexchange/46056-matlab-style-guidelines-2-0). 56 | -------------------------------------------------------------------------------- /Token.m: -------------------------------------------------------------------------------- 1 | classdef Token < handle 2 | properties 3 | type 4 | text 5 | line 6 | col 7 | end 8 | 9 | methods 10 | function obj = Token(type, text, line, col) 11 | %TOKEN an atomic piece of source code 12 | % Each token references an atomic piece of source code TEXT at a 13 | % specific LINE and COL. Each TOKEN is tagged as a certain TYPE. 14 | % returns a new OBJ. 15 | 16 | obj.type = type; 17 | obj.text = text; 18 | obj.line = line; 19 | obj.col = col; 20 | end 21 | 22 | function yesNo = hasType(obj, type) 23 | %HASTYPE checks it OBJ has matching TYPE 24 | % YESNO is a boolean. 25 | 26 | yesNo = any(strcmp(obj.type, type)); 27 | end 28 | 29 | function yesNo = hasText(obj, text) 30 | %HASTEXT checks it OBJ has matching TEXT 31 | % YESNO is a boolean. 32 | 33 | yesNo = any(strcmp(obj.text, text)); 34 | end 35 | 36 | function yesNo = isEqual(obj, type, text) 37 | %ISEQUAL checks it OBJ has matching TYPE and TEXT 38 | % YESNO is a boolean. 39 | 40 | yesNo = obj.hasType(type) && obj.hasText(text); 41 | end 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /analyze_file.m: -------------------------------------------------------------------------------- 1 | function blocks = analyze_file(filename, tokenlist) 2 | %ANALYZE_FILE analyzes TOKENLIST and extracts information about BLOCKS 3 | % in FILENAME. TOKENLIST is assumed to be the content of FILENAME. 4 | % 5 | % Returns a struct array with fields: 6 | % - name: the function name 7 | % - body: the tokens that make up the body of the function 8 | % - nesting: how deeply is this block nested within other blocks 9 | % - children: other blocks nested within this block 10 | % (again as a struct array) 11 | % - variables: variables defined in this block, or properties if the 12 | % block is a class. 13 | % - arguments: function arguments of this block (if a function) 14 | % - returns: return variable names of this block (if a function) 15 | % - type: one of 'Function', 'Nested Function', 'Subfunction', 16 | % 'Class', or 'Script'. 17 | % - filename: the FILENAME. 18 | 19 | % (c) 2016, Bastian Bechtold 20 | % This code is licensed under the terms of the BSD 3-clause license 21 | 22 | beginnings = check_settings('beginnings'); 23 | 24 | blocks = struct('name', {}, 'body', {}, 'nesting', {}, ... 25 | 'children', {}, 'variables', {}, ... 26 | 'arguments', {}, 'returns', {}, ... 27 | 'type', {}, 'filename', {}); 28 | function_stack = struct('start', {}, 'nesting', {}, 'children', {}); 29 | nesting = 0; 30 | is_first_block = true; 31 | main_type = ''; 32 | for current_pos = 1:length(tokenlist) 33 | current_token = tokenlist(current_pos); 34 | 35 | % count the 'end's to figure out function extents: 36 | if current_token.isEqual('keyword', beginnings) 37 | nesting = nesting + 1; 38 | elseif current_token.isEqual('keyword', 'end') 39 | nesting = nesting - 1; 40 | end 41 | 42 | % determine file type (Script, Function, or Class): 43 | if isempty(main_type) && ... 44 | ~current_token.hasType({'linebreak', 'comment'}) 45 | if current_token.isEqual('keyword', 'function') 46 | main_type = 'Function'; 47 | elseif current_token.isEqual('keyword', 'classdef') 48 | main_type = 'Class'; 49 | else 50 | main_type = 'Script'; 51 | end 52 | end 53 | 54 | % pre-compute intermediate values for better readability: 55 | is_end_of_block = current_token.isEqual('keyword', 'end') && ... 56 | ~isempty(function_stack) && ... 57 | nesting == function_stack(end).nesting; 58 | is_end_of_function_file = current_pos == length(tokenlist) && ... 59 | ~isempty(function_stack); 60 | is_end_of_other_file = current_pos == length(tokenlist) && ... 61 | any(strcmp(main_type, {'Script' 'Class'})); 62 | 63 | % build a stack of function definitions: 64 | % We don't know where these functions end, yet. As soon as we 65 | % know the end, it will get appended to the block list. For 66 | % now, only record where the function starts. 67 | if current_token.isEqual('keyword', 'function') 68 | % include any leading space in the function body, so that 69 | % later analysis steps can figure out the initial 70 | % indentation of the function: 71 | if current_pos > 1 && tokenlist(current_pos-1).hasType('space') 72 | function_start = current_pos - 1; 73 | else 74 | function_start = current_pos; 75 | end 76 | 77 | % save the new function on the function stack: 78 | stack_frame = struct('start', function_start, ... 79 | 'nesting', nesting-1, ... 80 | 'children', []); 81 | function_stack = [function_stack stack_frame]; %#ok 82 | 83 | elseif is_end_of_block || is_end_of_function_file 84 | function_body = ... 85 | tokenlist(function_stack(end).start:current_pos); 86 | 87 | % determine function type (Top-Level, Nested, or Subfunction): 88 | if nesting > 0 && current_pos ~= length(tokenlist) 89 | block_type = 'Nested Function'; 90 | elseif is_first_block 91 | block_type = main_type; 92 | is_first_block = false; 93 | else 94 | block_type = 'Subfunction'; 95 | end 96 | 97 | % build block struct: 98 | new_block = struct( ... 99 | 'name', get_funcname(function_body), ... 100 | 'body', function_body, ... 101 | 'nesting', function_stack(end).nesting, ... 102 | 'children', function_stack(end).children, ... 103 | 'variables', {get_funcvariables(function_body)}, ... 104 | 'arguments', {get_funcarguments(function_body)}, ... 105 | 'returns', {get_funcreturns(function_body)}, ... 106 | 'type', block_type, 'filename', filename); 107 | 108 | % update function stack with new block struct: 109 | function_stack(end) = []; 110 | if nesting > 0 && ~isempty(function_stack) 111 | if isempty(function_stack(end).children) 112 | function_stack(end).children = new_block; 113 | else 114 | function_stack(end).children = ... 115 | [function_stack(end).children new_block]; 116 | end 117 | else 118 | blocks = [blocks new_block]; %#ok 119 | end 120 | 121 | elseif is_end_of_other_file 122 | % in classes, variables contains properties: 123 | if strcmp(main_type, 'Script') 124 | variables = {get_variables(tokenlist)}; 125 | else 126 | variables = {get_properties(tokenlist)}; 127 | end 128 | blocks = struct('name', Token('special', filename, 0, 0), ... 129 | 'body', tokenlist, ... 130 | 'nesting', 0, ... 131 | 'children', blocks, ... 132 | 'variables', variables, ... 133 | 'arguments', [], ... 134 | 'returns', [], ... 135 | 'type', main_type, ... 136 | 'filename', filename); 137 | end 138 | end 139 | end 140 | 141 | 142 | function variables = get_properties(tokenlist) 143 | %GET_PROPERTIES extracts all assigned property VARIABLES from TOKENLIST 144 | % returns an object array of Tokens. 145 | 146 | variables = Token.empty; 147 | in_properties = false; % true whenever the loop is inside a properties 148 | % block. 149 | is_first = false; % true whenever the loop is between a line break and 150 | % the beginning of the line's content. 151 | for pos = 1:length(tokenlist) 152 | token = tokenlist(pos); 153 | if token.isEqual('keyword', 'properties') 154 | in_properties = true; 155 | is_first = false; 156 | elseif in_properties && token.isEqual('keyword', 'end') 157 | in_properties = false; 158 | end 159 | if token.hasType('linebreak') 160 | is_first = true; 161 | elseif token.hasType('identifier') && is_first && in_properties 162 | variables = [variables token]; %#ok 163 | is_first = false; 164 | end 165 | end 166 | end 167 | 168 | 169 | function variables = get_funcvariables(tokenlist) 170 | %GET_FUNCVARIABLES extracts all assigned VARIABLES from TOKENLIST 171 | % 172 | % See also: get_variables 173 | 174 | % skip the function declaration: 175 | end_declaration = search_token('pair', ')', tokenlist, 1, +1); 176 | variables = get_variables(tokenlist(end_declaration+1:end)); 177 | end 178 | 179 | 180 | function variables = get_variables(tokenlist) 181 | %GET_VARIABLES extracts all assigned VARIABLES from TOKENLIST 182 | % Variables are things on the left hand side of equal signs which are not 183 | % enclosed in braces. 184 | 185 | variables = containers.Map(); 186 | for token_idx = 1:length(tokenlist) 187 | token = tokenlist(token_idx); 188 | if token.isEqual('punctuation', '=') 189 | start = search_token('linebreak', [], tokenlist, token_idx, -1); 190 | lhs_tokens = tokenlist(start:token_idx); 191 | % all non-nested identifiers are assigned variable names 192 | nesting = 0; 193 | for this_token = lhs_tokens 194 | if this_token.isEqual('pair', {'{' '('}) 195 | nesting = nesting + 1; 196 | elseif this_token.isEqual('pair', {'}' ')'}) 197 | nesting = nesting - 1; 198 | elseif this_token.hasType('identifier') && ... 199 | nesting == 0 && ... 200 | ~variables.isKey(this_token.text) 201 | variables(this_token.text) = this_token; 202 | end 203 | end 204 | end 205 | end 206 | variables = variables.values(); 207 | variables = [variables{:}]; % convert to object array 208 | if ~isempty(variables) 209 | % sort by column: 210 | [~, sort_idx] = sort([variables.col]); 211 | variables = variables(sort_idx); 212 | % sort by line (this preserves column ordering for variables 213 | % on the same line): 214 | [~, sort_idx] = sort([variables.line]); 215 | variables = variables(sort_idx); 216 | end 217 | end 218 | 219 | 220 | function name = get_funcname(tokenlist) 221 | %GET_FUNCNAME analyzes TOKENLIST to find function name 222 | % NAME is a Token 223 | 224 | pos = search_token('pair', '(', tokenlist, 1, +1); 225 | pos = search_token('identifier', [], tokenlist, pos, -1); 226 | name = tokenlist(pos); 227 | end 228 | 229 | 230 | function arguments = get_funcarguments(tokenlist) 231 | %GET_FUNCARGUMENTS analyzes TOKENLIST to find function return values 232 | % ARGUMENTS is an object array of Tokens. 233 | 234 | start = search_token('pair', '(', tokenlist, 1, +1); 235 | stop = search_token('pair', ')', tokenlist, start, +1); 236 | arguments = tokenlist(start+1:stop-1); 237 | % extract all identifiers: 238 | arguments = arguments(strcmp({arguments.type}, 'identifier')); 239 | end 240 | 241 | 242 | function returns = get_funcreturns(tokenlist) 243 | %GET_FUNCRETURNS analyzes TOKENLIST to find function return values 244 | % RETURNS is an object array of Tokens. 245 | 246 | start = search_token('keyword', 'function', tokenlist, 1, +1); 247 | pos = search_token('pair', '(', tokenlist, start, +1); 248 | stop = search_token('identifier', [], tokenlist, pos, -1); 249 | returns = tokenlist(start+1:stop-1); 250 | % extract all identifiers: 251 | returns = returns(strcmp({returns.type}, 'identifier')); 252 | end 253 | 254 | 255 | function token_idx = search_token(token_type, token_text, tokenlist, token_idx, increment) 256 | %SEARCH_TOKEN search TOKENLIST for token with TOKEN_TYPE and TOKEN_TEXT 257 | % starting from TOKEN_IDX and stepping with INCREMENT. 258 | % 259 | % To search for any Token with a given TOKEN_TYPE, leave TOKEN_TEXT empty 260 | % To search for any Token with a given TOKEN_TEXT, leave TOKEN_TYPE empty 261 | % Set INCREMENT to 1 for forward searching and -1 for backward searching 262 | % 263 | % Returns the TOKEN_IDX of the first matching token. 264 | 265 | if ~isempty(token_type) && ~isempty(token_text) 266 | while ~tokenlist(token_idx).isEqual(token_type, token_text) 267 | if token_idx + increment < 1 || ... 268 | token_idx + increment > length(tokenlist) 269 | break 270 | end 271 | token_idx = token_idx + increment; 272 | end 273 | elseif ~isempty(token_text) 274 | while ~tokenlist(token_idx).hasText(token_text) 275 | if token_idx + increment < 1 || ... 276 | token_idx + increment > length(tokenlist) 277 | break 278 | end 279 | token_idx = token_idx + increment; 280 | end 281 | elseif ~isempty(token_type) 282 | while ~tokenlist(token_idx).hasType(token_type) 283 | if token_idx + increment < 1 || ... 284 | token_idx + increment > length(tokenlist) 285 | break 286 | end 287 | token_idx = token_idx + increment; 288 | end 289 | end 290 | end 291 | -------------------------------------------------------------------------------- /check.m: -------------------------------------------------------------------------------- 1 | function check(filename) 2 | %CHECK a source file FILENAME for problems 3 | % 4 | % CHECK does a deep analysis of the code in FILENAME, and reports on 5 | % problems with the code. 6 | % 7 | % Each function defined in the file is reported separately, with 8 | % separate statistics and warnings. Minor warnings are written in 9 | % black, while major warnings are printed red. Even though some 10 | % warnings are somewhat subjective, in general, at least all red 11 | % issues *should* be fixed. 12 | % 13 | % Every warning is presented as a clickable link that will jump to the 14 | % correct line in the editor. 15 | % 16 | % Many warnings have configurable settings in CHECK_SETTINGS. Note 17 | % though that *disabling* a warning does not count as *fixing* it. 18 | % 19 | % Warnings include: 20 | % - Required files to run the code 21 | % - Required toolboxes to run the code 22 | % - High number of lines 23 | % - High number of function arguments 24 | % - High number of used variables 25 | % - Too many levels of nesting 26 | % - Too much function complexity 27 | % - MLINT warnings 28 | % - missing documentation, or missing documentation of function arguments 29 | % - not enough comments 30 | % - incorrect or insufficient indentation 31 | % - excessive line length 32 | % - too short variable names 33 | % - no spaces around some operators 34 | % - use of dangerous functions like eval 35 | 36 | % (c) 2016, Bastian Bechtold 37 | % This code is licensed under the terms of the BSD 3-clause license 38 | 39 | [requiredFiles, requiredProducts] = ... 40 | matlab.codetools.requiredFilesAndProducts(filename); 41 | % manually fetch file name, since checkcode won't do it correctly 42 | fullfilename = which(filename); 43 | mlintInfo = ... 44 | checkcode(fullfilename, '-cyc', '-id', '-struct', '-fullpath'); 45 | 46 | source_code = fileread(filename); 47 | tokens = tokenize_code(source_code); 48 | func_report = analyze_file(fullfilename, tokens); 49 | 50 | fprintf('Code Analysis for %s\n\n', filename); 51 | 52 | fprintf(' Required files: '); 53 | for file_idx = 1:length(requiredFiles) 54 | [~, basename, ext] = fileparts(requiredFiles{file_idx}); 55 | fprintf('%s%s', basename, ext); 56 | if file_idx < length(requiredFiles) 57 | fprintf(', '); 58 | else 59 | fprintf('\n'); 60 | end 61 | end 62 | 63 | fprintf(' Required toolboxes: '); 64 | for product_idx = 1:length(requiredProducts) 65 | fprintf('%s%s', requiredProducts(product_idx).Name); 66 | if product_idx < length(requiredProducts) 67 | fprintf(', '); 68 | else 69 | fprintf('\n\n'); 70 | end 71 | end 72 | 73 | for func = func_report 74 | print_code_report(func, mlintInfo, 2); 75 | end 76 | end 77 | 78 | 79 | function print_code_report(func, mlintInfo, indentation) 80 | %PRINT_CODE_REPORT prints a comprehensive report about a code block FUNC 81 | % The printed text is indented at INDENTATION spaces. 82 | % 83 | % FUNC is analyzed for many common defects and stylistic mishaps, and 84 | % prints a nicely formatted list of issues, plus some additional 85 | % statistics about the code block. 86 | % 87 | % Depending on the type of code block (Function, Subfunction, Nested 88 | % Function, Class, Script) different kinds of statistics are reported. 89 | % 90 | % Additionally, many warnings are collected and presented, including 91 | % MLINT warnings from MLINTINFO. 92 | 93 | prefix = repmat(' ', 1, indentation); 94 | link = sprintf('Line %i, col %i', ... 95 | open_file_link(func.filename, func.name.line), ... 96 | func.name.line, func.name.col); 97 | fprintf('%s%s %s (%s):\n\n', ... 98 | prefix, func.type, func.name.text, link); 99 | 100 | functypes = {'Function', 'Subfunction', 'Nested Function'}; 101 | if any(strcmp(func.type, functypes)) 102 | stats = get_function_stats(func, mlintInfo); 103 | print_function_stats(stats, indentation+2); 104 | fprintf('\n'); 105 | elseif strcmp(func.type, 'Class') 106 | stats = get_class_stats(func); 107 | print_class_stats(stats, indentation+2); 108 | fprintf('\n'); 109 | elseif strcmp(func.type, 'Script') 110 | stats = get_script_stats(func); 111 | print_script_stats(stats, indentation+2); 112 | fprintf('\n'); 113 | end 114 | 115 | reports = [report_documentation(func) ... 116 | report_comments(func.body) ... 117 | report_mlint_warnings(mlintInfo, func.body) ... 118 | report_indentation(func) ... 119 | report_line_length(func.body) ... 120 | report_variables(func.variables, func.body, 'variable') ... 121 | report_operators(func.body) ... 122 | report_eval(func.body)]; 123 | 124 | if any(strcmp(func.type, functypes)) 125 | reports = [reports ... 126 | report_variables(func.name, func.body, ... 127 | 'function') ... 128 | report_variables(func.arguments, func.body, ... 129 | 'function argument') ... 130 | report_variables(func.returns, func.body, ... 131 | 'return argument')]; 132 | end 133 | 134 | if ~isempty(reports) 135 | % First, secondary sort by column 136 | report_tokens = [reports.token]; 137 | [~, sort_idx] = sort([report_tokens.col]); 138 | reports = reports(sort_idx); 139 | % Second, primary sort by line (preserves secondary 140 | % sorting order in case of collisions) 141 | report_tokens = [reports.token]; 142 | [~, sort_idx] = sort([report_tokens.line]); 143 | reports = reports(sort_idx); 144 | print_report(reports, indentation+2, func.filename); 145 | end 146 | 147 | fprintf('\n\n'); 148 | 149 | for subfunc = func.children 150 | print_code_report(subfunc, mlintInfo, indentation+4) 151 | end 152 | end 153 | 154 | 155 | function class_stats = get_class_stats(class_struct) 156 | %GET_CLASS_STATS analyzes a script CLASS_STRUCT and 157 | % gathers some statistics CLASS_STATS about them. 158 | % 159 | % Statistics gathered (fieldname): 160 | % - number of lines (num_lines) 161 | % - number of properties (num_properties) 162 | % - number of methods (num_methods) 163 | % 164 | % The statistics are returned as struct CLASS_STATS 165 | 166 | class_stats.num_lines = length(split_lines(class_struct.body)); 167 | class_stats.num_properties = length(class_struct.variables); 168 | class_stats.num_methods = length(class_struct.children); 169 | end 170 | 171 | 172 | function print_class_stats(class_stats, indentation) 173 | %PRINT_CLASS_STATS prints some general statistics CLSS_STATS about 174 | % a class. The printed text is indented at INDENTATION spaces. 175 | % 176 | % This function prints an evaluation of 177 | % - the number of lines in the function 178 | % - the number of properties 179 | % - the number of methods 180 | % 181 | % All of these values are evaluated as `good` if they are below a 182 | % certain low threshold; as `high` if they are above this threshold 183 | % and as `too high` and in red text if they exceed a high threshold. 184 | % The thresholds can be controlled using the settings 185 | % - `lo_class_num_lines` and `hi_class_num_lines` 186 | % - `lo_class_num_properties` and `hi_class_num_properties` 187 | % - `lo_class_num_methods` and `hi_class_num_methods` 188 | 189 | prefix = repmat(' ', 1, indentation); 190 | 191 | fprintf('%sNumber of lines: ', prefix); 192 | print_evaluation(class_stats.num_lines, ... 193 | check_settings('lo_class_num_lines'), ... 194 | check_settings('hi_class_num_lines')); 195 | 196 | fprintf('%sNumber of properties: ', prefix); 197 | print_evaluation(class_stats.num_properties, ... 198 | check_settings('lo_class_num_properties'), ... 199 | check_settings('hi_class_num_properties')); 200 | 201 | fprintf('%sNumber of methods: ', prefix); 202 | print_evaluation(class_stats.num_methods, ... 203 | check_settings('lo_class_num_methods'), ... 204 | check_settings('hi_class_num_methods')); 205 | end 206 | 207 | 208 | function script_stats = get_script_stats(script_struct) 209 | %GET_SCRIPT_STATS analyzes a script SCRIPT_STRUCT and 210 | % gathers some statistics SCRIPT_STATS about them. 211 | % 212 | % Statistics gathered (fieldname): 213 | % - number of lines (num_lines) 214 | % - number of variables used in the function (num_variables) 215 | % - the maximum level of indentation in the function (max_indentation) 216 | % 217 | % The statistics are returned as struct SCRIPT_STATS 218 | 219 | script_stats.num_lines = length(split_lines(script_struct.body)); 220 | script_stats.num_variables = length(script_struct.variables); 221 | 222 | % max indentation 223 | keyword_indices = strcmp({script_struct.body.type}, 'keyword'); 224 | keywords = script_struct.body(keyword_indices); 225 | indentation = 1; 226 | max_indentation = 0; 227 | for keyword = keywords 228 | if keyword.hasText({'if' 'for' 'parfor' 'while' 'switch'}) 229 | indentation = indentation + 1; 230 | max_indentation = max(max_indentation, indentation); 231 | elseif keyword.hasText('end') 232 | indentation = indentation - 1; 233 | end 234 | end 235 | script_stats.max_indentation = max_indentation; 236 | end 237 | 238 | 239 | function print_script_stats(script_stats, indentation) 240 | %PRINT_SCRIPT_STATS prints some general statistics SCRIPT_STATS about 241 | % a script. The printed text is indented at INDENTATION spaces. 242 | % 243 | % This function prints an evaluation of 244 | % - the number of lines in the function 245 | % - the number of variables used in the script 246 | % - the maximum level of indentation in the script 247 | % 248 | % All of these values are evaluated as `good` if they are below a 249 | % certain low threshold; as `high` if they are above this threshold 250 | % and as `too high` and in red text if they exceed a high threshold. 251 | % The thresholds can be controlled using the settings 252 | % - `lo_script_num_lines` and `hi_script_num_lines` 253 | % - `lo_script_num_variables` and `hi_script_num_variables` 254 | % - `lo_script_max_indentation` and `hi_script_max_indentation` 255 | prefix = repmat(' ', 1, indentation); 256 | 257 | fprintf('%sNumber of lines: ', prefix); 258 | print_evaluation(script_stats.num_lines, ... 259 | check_settings('lo_script_num_lines'), ... 260 | check_settings('hi_script_num_lines')); 261 | 262 | fprintf('%sNumber of variables: ', prefix); 263 | print_evaluation(script_stats.num_variables, ... 264 | check_settings('lo_script_num_variables'), ... 265 | check_settings('hi_script_num_variables')); 266 | 267 | fprintf('%sNumber of variables: ', prefix); 268 | print_evaluation(script_stats.max_indentation, ... 269 | check_settings('lo_script_max_indentation'), ... 270 | check_settings('hi_script_max_indentation')); 271 | end 272 | 273 | 274 | function func_stats = get_function_stats(func_struct, mlintInfo) 275 | %GET_FUNCTION_STATS analyzes a function FUNC_STRUCT and MLINTINFO and 276 | % gathers some statistics FUNC_STATS about them. 277 | % 278 | % Statistics gathered (fieldname): 279 | % - number of lines (num_lines) 280 | % - number of function arguments (num_arguments) 281 | % - number of variables used in the function (num_variables) 282 | % - the maximum level of indentation in the function (max_indentation) 283 | % - the function complexity (complexity) 284 | % 285 | % The statistics are returned as struct FUNC_STATS 286 | 287 | func_stats.num_lines = length(split_lines(func_struct.body)); 288 | func_stats.num_arguments = length(func_struct.arguments); 289 | func_stats.num_variables = length(func_struct.variables); 290 | 291 | % max indentation 292 | keyword_indices = strcmp({func_struct.body.type}, 'keyword'); 293 | keywords = func_struct.body(keyword_indices); 294 | indentation = 1; 295 | max_indentation = 0; 296 | for keyword = keywords 297 | if keyword.hasText({'if' 'for' 'parfor' 'while' 'switch'}) 298 | indentation = indentation + 1; 299 | max_indentation = max(max_indentation, indentation); 300 | elseif keyword.hasText('end') 301 | indentation = indentation - 1; 302 | end 303 | end 304 | func_stats.max_indentation = max_indentation; 305 | 306 | % cyclomatic complexity 307 | mlintInfo = mlintInfo(strcmp({mlintInfo.id}, 'CABE')); 308 | mlintInfo = mlintInfo([mlintInfo.line] == func_struct.body(1).line); 309 | assert(length(mlintInfo) == 1); 310 | pattern = '''(?[^'']+)'' is (?[0-9]+)'; 311 | matches = regexp(mlintInfo.message, pattern, 'names'); 312 | func_stats.complexity = str2double(matches.n); 313 | end 314 | 315 | 316 | function print_function_stats(func_stats, indentation) 317 | %PRINT_FUNCTION_STATS prints some general statistics FUNC_STATS about 318 | % a function. The printed text is indented at INDENTATION spaces. 319 | % 320 | % This function prints an evaluation of 321 | % - the number of lines in the function 322 | % - the number of function arguments 323 | % - the number of variables used in the function 324 | % - the maximum level of indentation in the function 325 | % - the function complexity 326 | % 327 | % All of these values are evaluated as `good` if they are below a 328 | % certain low threshold; as `high` if they are above this threshold 329 | % and as `too high` and in red text if they exceed a high threshold. 330 | % The thresholds can be controlled using the settings 331 | % - `lo_function_num_lines` and `hi_function_num_lines` 332 | % - `lo_function_num_arguments` and `hi_function_num_arguments` 333 | % - `lo_function_num_variables` and `hi_function_num_variables` 334 | % - `lo_function_max_indentation` and `hi_function_max_indentation` 335 | % - `lo_function_complexity` and `hi_function_complexity` 336 | 337 | prefix = repmat(' ', 1, indentation); 338 | 339 | fprintf('%sNumber of lines: ', prefix); 340 | print_evaluation(func_stats.num_lines, ... 341 | check_settings('lo_function_num_lines'), ... 342 | check_settings('hi_function_num_lines')); 343 | 344 | fprintf('%sNumber of function arguments: ', prefix); 345 | print_evaluation(func_stats.num_arguments, ... 346 | check_settings('lo_function_num_arguments'), ... 347 | check_settings('hi_function_num_arguments')); 348 | 349 | fprintf('%sNumber of used variables: ', prefix); 350 | print_evaluation(func_stats.num_variables, ... 351 | check_settings('lo_function_num_variables'), ... 352 | check_settings('hi_function_num_variables')); 353 | 354 | fprintf('%sMax level of nesting: ', prefix); 355 | print_evaluation(func_stats.max_indentation, ... 356 | check_settings('lo_function_max_indentation'), ... 357 | check_settings('hi_function_max_indentation')); 358 | 359 | fprintf('%sCode complexity: ', prefix); 360 | print_evaluation(func_stats.complexity, ... 361 | check_settings('lo_function_complexity'), ... 362 | check_settings('hi_function_complexity')); 363 | end 364 | 365 | 366 | function print_evaluation(value, low_thr, high_thr) 367 | %PRINT_EVALUATION prints an evaluation of VALUE. 368 | % LOW_THR and HIGH_THR mark thresholds, above which the value is 369 | % described as "(good)" -> "(high)" -> "(too high)" in red 370 | 371 | if value < low_thr 372 | fprintf('%i (good)\n', value); 373 | elseif value < high_thr 374 | fprintf('%i (high)\n', value); 375 | else 376 | fprintf('%i [\b(too high)]\b\n', value); 377 | end 378 | end 379 | 380 | 381 | function print_report(report, indentation, filename) 382 | %PRINT_REPORT prints the contents of REPORT at INDENTATION. Each REPORT 383 | % item is written as a link to the appropriate place in FILENAME. 384 | 385 | prefix = repmat(' ', 1, indentation); 386 | 387 | for report_entry = report 388 | % print severe report_entrys in red: 389 | % red text is created by surrounding it with `[` and 390 | % `]`. The `` will delete the preceding 391 | % bracket and not show up in the text itself, but it will be 392 | % interpreted as a flag to change the text color. This is an 393 | % ancient ASCII convention. 394 | if report_entry.severity == 2 395 | fprintf('%sLine %i, col %i: [\b%s]\b\n', ... 396 | prefix, ... 397 | open_file_link(filename, report_entry.token.line), ... 398 | report_entry.token.line, ... 399 | report_entry.token.col, ... 400 | report_entry.message); 401 | 402 | % print regular report_entrys in black: 403 | else 404 | fprintf('%sLine %i, col %i: %s\n', ... 405 | prefix, ... 406 | open_file_link(filename, report_entry.token.line), ... 407 | report_entry.token.line, ... 408 | report_entry.token.col, ... 409 | report_entry.message); 410 | end 411 | end 412 | end 413 | 414 | 415 | function report = report_comments(tokenlist) 416 | %REPORT_COMMENTS REPORTs on the number of comments in TOKENLIST. 417 | % 418 | % Comments should not describe the code itself, but provide context 419 | % for reading the code. In other words, they should describe the 420 | % *why*, not the *what. 421 | % 422 | % returns a struct array REPORT with fields `token`, `message`, and 423 | % `severity`. 424 | % 425 | % This check can be switched off by setting `do_check_comments` in 426 | % CHECK_SETTINGS to FALSE. 427 | 428 | report = struct('token', {}, 'severity', {}, 'message', {}); 429 | if ~check_settings('do_check_comments') 430 | return 431 | end 432 | 433 | linelist = split_lines(tokenlist); 434 | num_lines = length(linelist); 435 | num_comments = 0; 436 | for line_idx = 1:length(linelist) 437 | line_tokens = linelist{line_idx}; 438 | if any(strcmp({line_tokens.type}, 'comment')) 439 | num_comments = num_comments + 1; 440 | end 441 | end 442 | 443 | usage = sprintf('(%i comments for %i lines of code)', ... 444 | num_comments, num_lines); 445 | if num_comments/num_lines < 0.1 446 | report = struct('token', tokenlist(1), ... 447 | 'severity', 2, ... 448 | 'message', ['too few comments ' usage]); 449 | elseif num_comments/num_lines < 0.2 450 | report = struct('token', tokenlist(1), ... 451 | 'severity', 1, ... 452 | 'message', ['very few comments ' usage]); 453 | end 454 | end 455 | 456 | 457 | function report = report_documentation(func_struct) 458 | %REPORT_DOCUMENTATION REPORTs on problems with the documentation of the 459 | % function in FUNC_STRUCT. 460 | % 461 | % Documentation is very important for humans. Code is not primarily 462 | % written for the machine to execute, but mostly for humans to read. 463 | % But many ideas are more efficiently described in prose than in code, 464 | % hence we write documentation. Functions in particular should always 465 | % be documented. 466 | % 467 | % Problems might be: 468 | % - the function name is not mentioned in the documentation 469 | % - the function arguments are not mentioned 470 | % - the function return values are not mentioned 471 | % - there is no documentation 472 | % 473 | % returns a struct array REPORT with fields `token`, `message`, and 474 | % `severity`. 475 | % 476 | % This check can be switched off by setting `do_check_documentation` in 477 | % CHECK_SETTINGS to FALSE. 478 | 479 | report = struct('token', {}, 'severity', {}, 'message', {}); 480 | if ~check_settings('do_check_documentation') 481 | return 482 | end 483 | 484 | doc_text = get_function_documentation(func_struct.body); 485 | if isempty(doc_text) 486 | msg = 'there is no documentation'; 487 | report = [report struct('token', func_struct.body(1), ... 488 | 'severity', 2, ... 489 | 'message', msg)]; 490 | return 491 | end 492 | template = '%s ''%s'' is not mentioned in the documentation'; 493 | [~, funcname, ~] = fileparts(func_struct.name.text); 494 | if isempty(strfind(lower(doc_text), lower(funcname))) 495 | msg = sprintf(template, 'function name', func_struct.name.text); 496 | report = [report struct('token', func_struct.name, ... 497 | 'severity', 2, ... 498 | 'message', msg)]; 499 | end 500 | for variable = func_struct.arguments 501 | if isempty(strfind(lower(doc_text), lower(variable.text))) && ... 502 | ~strcmp(doc_text, 'varargin') 503 | msg = sprintf(template, 'function argument', variable.text); 504 | report = [report struct('token', variable, ... 505 | 'severity', 2, ... 506 | 'message', msg)]; %#ok 507 | end 508 | end 509 | for variable = func_struct.returns 510 | if isempty(strfind(lower(doc_text), lower(variable.text))) && ... 511 | ~strcmp(doc_text, 'varargout') 512 | msg = sprintf(template, 'return argument', variable.text); 513 | report = [report struct('token', variable, ... 514 | 'severity', 2, ... 515 | 'message', msg)]; %#ok 516 | end 517 | end 518 | end 519 | 520 | 521 | function doc_text = get_function_documentation(tokenlist) 522 | %GET_FUNCTION_DOCUMENTATION extracts function documentation from TOKENLIST 523 | % 524 | % returns DOC_TEXT as a string 525 | 526 | % skip function declaration 527 | token_idx = 1; 528 | while token_idx <= length(tokenlist) && ... 529 | ~tokenlist(token_idx).isEqual('pair', ')') 530 | token_idx = token_idx + 1; 531 | end 532 | token_idx = token_idx + 2; 533 | 534 | % find documentation 535 | doc_types = {'comment' 'space' 'linebreak'}; 536 | start = token_idx; 537 | while token_idx <= length(tokenlist) && ... 538 | tokenlist(token_idx).hasType(doc_types) 539 | token_idx = token_idx + 1; 540 | end 541 | 542 | % extract documentation text 543 | comment_tokens = tokenlist(start:token_idx-1); 544 | comment_tokens = ... 545 | comment_tokens(strcmp({comment_tokens.type}, 'comment')); 546 | doc_text = [comment_tokens.text]; 547 | end 548 | 549 | 550 | function report = report_eval(tokenlist) 551 | %REPORT_EVAL REPORTs on uses of `eval` in TOKENLIST. 552 | % 553 | % Using `eval` is *never* the right thing to do. There is *always* 554 | % a better way. Seriously. 555 | % 556 | % returns a struct array REPORT with fields `token`, `message`, and 557 | % `severity`. 558 | % 559 | % This check can be switched off by setting `do_check_eval` in 560 | % CHECK_SETTINGS to FALSE. 561 | 562 | report = struct('token', {}, 'severity', {}, 'message', {}); 563 | if ~check_settings('do_check_eval') 564 | return 565 | end 566 | 567 | eval_tokens = tokenlist(strcmp({tokenlist.text}, 'eval') & ... 568 | strcmp({tokenlist.type}, 'identifier')); 569 | for t = eval_tokens 570 | msg = 'Eval should never be used'; 571 | report = [report struct('token', t, ... 572 | 'severity', 2, ... 573 | 'message', msg)]; %#ok 574 | end 575 | end 576 | 577 | 578 | function report = report_operators(tokenlist) 579 | %REPORT_OPERATORS reports on incorrectly used operators in TOKENLIST 580 | % 581 | % To improve readability, operators should be treated like punctuation 582 | % in regular English, i.e. be preceded and followed by spaces just like 583 | % in English and math. In particular: 584 | % - relational operators such as `>`, `<`, `==`, `~=`, `<=`, `>=`, `=`, 585 | % `||`, and `&&` should be surrounded by spaces. 586 | % - punctuation such as `,` and `;` should be followed by a space. 587 | % - unary operators such as `@` and `...` should be preceded by a space. 588 | % 589 | % returns a struct array REPORT with fields `token`, `message`, and 590 | % `severity`. 591 | % 592 | % This check can be switched off by setting `do_check_operators` in 593 | % CHECK_SETTINGS to FALSE. 594 | 595 | report = struct('token', {}, 'severity', {}, 'message', {}); 596 | if ~check_settings('do_check_operators') 597 | return 598 | end 599 | 600 | space_around_operators = { '>' '<' '==' '>=' '<=' '~=' ... 601 | '=' '||' '&&'}; 602 | space_after_operators = { ',' ';' }; 603 | space_before_operators = { '@' '...' }; 604 | 605 | op_indices = find(strcmp({tokenlist.type}, 'punctuation')); 606 | for op_idx = op_indices 607 | has_space_before = op_idx > 1 && ... 608 | tokenlist(op_idx-1).hasType('space'); 609 | has_space_after = op_idx < length(tokenlist) && ... 610 | tokenlist(op_idx+1).hasType('space'); 611 | has_newline_after = op_idx < length(tokenlist) && ... 612 | tokenlist(op_idx+1).hasText(sprintf('\n')); 613 | if tokenlist(op_idx).hasText(space_around_operators) && ... 614 | (~has_space_before || ~has_space_after) 615 | msg = sprintf('no spaces around operator ''%s''', ... 616 | tokenlist(op_idx).text); 617 | report = [report struct('token', tokenlist(op_idx), ... 618 | 'severity', 1, ... 619 | 'message', msg)]; %#ok 620 | elseif tokenlist(op_idx).hasText(space_after_operators) && ... 621 | ~has_space_after && ~has_newline_after 622 | msg = sprintf('no spaces after operator ''%s''', ... 623 | tokenlist(op_idx).text); 624 | report = [report struct('token', tokenlist(op_idx), ... 625 | 'severity', 1, ... 626 | 'message', msg)]; %#ok 627 | elseif tokenlist(op_idx).hasText(space_before_operators) && ... 628 | ~has_space_before 629 | msg = sprintf('no spaces before operator ''%s''', ... 630 | tokenlist(op_idx).text); 631 | report = [report struct('token', tokenlist(op_idx), ... 632 | 'severity', 1, ... 633 | 'message', msg)]; %#ok 634 | end 635 | end 636 | end 637 | 638 | 639 | function report = report_variables(varlist, tokenlist, description) 640 | %REPORT_VARIABLES checks all variables in VARLIST, as used in TOKENLIST, 641 | % and REPORTs on problems with these variables. DESCRIPTION is used 642 | % to describe the variable in REPORT. 643 | % 644 | % Problems with variables can be: 645 | % - The variable shadows a built-in 646 | % - The variable has a very short name and is used very often. 647 | % 648 | % In general, variable name lengths should correlate with the amount 649 | % of code they are used in. If variables are used over a long piece 650 | % of code, the programmer will stumble across the variable often, 651 | % and it should have a descriptive name. Short variable names are 652 | % only allowed if they are ephemeral, such as loop counters in small 653 | % loops. There, they don't need to be remembered for long, thus a short 654 | % name is permissible. 655 | % 656 | % returns a struct array REPORT with fields `token`, `message`, and 657 | % `severity`. 658 | % 659 | % This check can be switched off by setting `do_check_variables` in 660 | % CHECK_SETTINGS to FALSE. 661 | 662 | report = struct('token', {}, 'severity', {}, 'message', {}); 663 | if ~check_settings('do_check_variables') 664 | return 665 | end 666 | 667 | for variable = varlist 668 | if does_shadow(variable.text) && ... 669 | ~any(strcmp(variable.text, {'varargin', 'varargout'})) 670 | msg = sprintf('%s ''%s'' shadows a built-in', ... 671 | description, variable.text); 672 | report = [report struct('token', variable, ... 673 | 'severity', 2, ... 674 | 'message', msg)]; %#ok 675 | end 676 | [numuses, spread] = get_variable_usage(variable.text, tokenlist); 677 | usage_descr = sprintf('(used %i times across %i lines)', ... 678 | numuses, spread); 679 | varlen = length(variable.text); 680 | 681 | short_spread = check_settings('lo_varname_short_spread'); 682 | short_length = check_settings('lo_varname_short_length'); 683 | long_spread = check_settings('lo_varname_long_spread'); 684 | long_length = check_settings('lo_varname_long_length'); 685 | slightly_too_short = ... 686 | (spread > short_spread && varlen <= short_length) || ... 687 | (spread > long_spread && varlen <= long_length); 688 | 689 | short_spread = check_settings('hi_varname_short_spread'); 690 | short_length = check_settings('hi_varname_short_length'); 691 | long_spread = check_settings('hi_varname_long_spread'); 692 | long_length = check_settings('hi_varname_long_length'); 693 | much_too_short = ... 694 | (spread > short_spread && varlen <= short_length) || ... 695 | (spread > long_spread && varlen <= long_length); 696 | 697 | 698 | if slightly_too_short 699 | msg = sprintf('%s ''%s'' is very short %s', ... 700 | description, variable.text, usage_descr); 701 | report = [report struct('token', variable, ... 702 | 'severity', 1, ... 703 | 'message', msg)]; %#ok 704 | elseif much_too_short 705 | msg = sprintf('%s ''%s'' is too short %s', ... 706 | description, variable.text, usage_descr); 707 | report = [report struct('token', variable, ... 708 | 'severity', 2, ... 709 | 'message', msg)]; %#ok 710 | end 711 | end 712 | end 713 | 714 | 715 | function [numuses, linerange] = get_variable_usage(varname, tokenlist) 716 | %GET_VARIABLE_USAGE finds all uses of variable VARNAME in TOKENLIST 717 | % Returns the number of uses NUMUSES and the range of lines LINERANGE 718 | % in which the variable is used. 719 | 720 | uses = tokenlist(strcmp({tokenlist.text}, varname) & ... 721 | strcmp({tokenlist.type}, 'identifier')); 722 | numuses = length(uses); 723 | linelist = [uses.line]; 724 | linerange = max(linelist)-min(linelist); 725 | end 726 | 727 | 728 | function report = report_mlint_warnings(mlint_info, tokenlist) 729 | %REPORT_MLINT_WARNINGS reads through MLINT_INFO and REPORTs on all messages 730 | % that refer to the code in TOKENLIST. 731 | % 732 | % returns a struct array REPORT with fields `token`, `message`, and 733 | % `severity`. 734 | % 735 | % This check can be switched off by setting `do_check_mlint_warnings` in 736 | % CHECK_SETTINGS to FALSE. 737 | 738 | report = struct('token', {}, 'severity', {}, 'message', {}); 739 | if ~check_settings('do_check_mlint_warnings') 740 | return 741 | end 742 | 743 | mlint_info = mlint_info([mlint_info.line] >= tokenlist(1).line); 744 | mlint_info = mlint_info([mlint_info.line] <= tokenlist(end).line); 745 | mlint_info = mlint_info(~strcmp({mlint_info.id}, 'CABE')); 746 | if isempty(mlint_info) 747 | return 748 | end 749 | for idx = 1:length(mlint_info) 750 | mlint_msg = mlint_info(idx); 751 | token = Token('special', 'mlint warning', ... 752 | mlint_msg.line, mlint_msg.column(1)); 753 | report = [report struct('token', token, ... 754 | 'severity', 2, ... 755 | 'message', mlint_msg.message)]; %#ok 756 | end 757 | end 758 | 759 | 760 | function is_builtin = does_shadow(varname) 761 | %DOES_SHADOW figures out if variable with name VARNAME shadows a built-in 762 | % function or variable. 763 | % 764 | % returns a boolean IS_BUILTIN. 765 | 766 | if any(exist(varname) == [2 3 4 5 6 8]) %#ok 767 | % now we know that something with name `varname` exists. But is it 768 | % a built-in, or something I wrote? 769 | % `which` can tell, in one of three spellings: 770 | shadows = which(varname, '-all'); 771 | builtinfun = 'is a built-in method'; 772 | builtinstr = 'built-in'; 773 | for idx = 1:length(shadows) 774 | shadow = shadows{idx}; 775 | if ( length(shadow) >= length(matlabroot) && ... 776 | strcmp(shadow(1:length(matlabroot)), matlabroot) ) || ... 777 | ( length(shadow) >= length(builtinstr) && ... 778 | strcmp(shadow(1:length(builtinstr)), builtinstr) ) || ... 779 | ( length(shadow) >= length(builtinfun) && ... 780 | strcmp(shadow(end-length(builtinfun)+1:end), builtinfun) ) 781 | is_builtin = true; 782 | return 783 | end 784 | end 785 | end 786 | is_builtin = false; 787 | end 788 | 789 | 790 | function report = report_line_length(tokenlist) 791 | %REPORT_LINE_LENGTH walks through TOKENLIST and REPORTs on the length of 792 | % all lines. 793 | % 794 | % While line length should not matter with today's high-resolution 795 | % displays, it is still useful to limit line lengths in order to be 796 | % able to fit several editor panes next to one another, or to be able 797 | % print the source code. 798 | % 799 | % - By default, lines longer than 75 characters are flagged 800 | % as `very long`, and 801 | % - lines longer than 90 characters are flagged as `too long`. 802 | % 803 | % returns a struct array REPORT with fields `token`, `message`, and 804 | % `severity`. 805 | % 806 | % This check can be switched off by setting `do_check_line_length` in 807 | % CHECK_SETTINGS to FALSE. 808 | 809 | report = struct('token', {}, 'message', {}, 'severity', {}); 810 | if ~check_settings('do_check_line_length') 811 | return 812 | end 813 | lo_line_length = check_settings('lo_line_length'); 814 | hi_line_length = check_settings('hi_line_length'); 815 | 816 | linelist = split_lines(tokenlist); 817 | for line_idx = 1:length(linelist) 818 | line_tokens = linelist{line_idx}; 819 | line_text = [line_tokens.text]; 820 | if length(line_text) > lo_line_length 821 | report_token = Token('special', 'line warning', ... 822 | line_tokens(1).line, ... 823 | length(line_text)); 824 | report = [report struct('token', report_token, ... 825 | 'message', 'line very long', ... 826 | 'severity', 1)]; %#ok 827 | elseif length(line_text) > hi_line_length 828 | report_token = Token('special', 'line warning', ... 829 | line_tokens(1).line, ... 830 | length(line_text)); 831 | report = [report struct('token', report_token, ... 832 | 'message', 'line too long', ... 833 | 'severity', 2)]; %#ok 834 | end 835 | end 836 | end 837 | 838 | 839 | function report = report_indentation(func_struct) 840 | %REPORT_INDENTATION parses FUNC_STRUCT and REPORTs about its indentation. 841 | % 842 | % Indentation is one of the primary means of making code easy to read, 843 | % by highlighting the structure of the code. If code is not indented 844 | % correctly, it can be hard to see where where nested blocks (if, for, 845 | % etc.) begin and end. 846 | % 847 | % The first line is assumed to be indented correctly, and subsequent 848 | % indentation follows the normal MATLAB indentation rules: 849 | % 850 | % - Indent after `for`, `parfor`, `while`, `if`, `switch`, `classdef`, 851 | % `events`, `properties`, `enumeration`, `methods`, 852 | % `function`. 853 | % - Dedent for `end` 854 | % - Dedent momentarily for `else`, `elseif`, `case`, `otherwise`. 855 | % - Comments are allowed to be indented one level out, and any amount of 856 | % deeper indentation than the source code. 857 | % - Continuation lines must be indented deeper than the surrounding 858 | % source code. 859 | % 860 | % returns a struct array REPORT with fields `token`, `message`, and 861 | % `severity`. 862 | % 863 | % This check can be switched off by setting `do_check_indentation` in 864 | % CHECK_SETTINGS to FALSE. 865 | % 866 | % The setting `indentation_check_like_matlab` controls whether 867 | % indentation should be checked like MATLAB does it (top-level function 868 | % bodies are not indented in function files) or how every other language 869 | % on this planet does it (function bodies are always indented). 870 | 871 | report = struct('token', {}, 'message', {}, 'severity', {}); 872 | if ~check_settings('do_check_indentation') 873 | return 874 | end 875 | 876 | linelist = split_lines(func_struct.body); 877 | 878 | nesting = func_struct.nesting; 879 | function_nesting = func_struct.nesting; 880 | 881 | is_switch_nesting = false; 882 | 883 | for line_idx = 1:length(linelist) 884 | line_tokens = linelist{line_idx}; 885 | is_continuation = is_continuation_line(line_idx, linelist); 886 | 887 | if isempty(line_tokens) 888 | continue 889 | end 890 | 891 | first_nonspace = get_first_nonspace(line_tokens); 892 | 893 | 894 | if ~is_continuation 895 | [nesting, function_nesting, correction] = ... 896 | indentation_rule(nesting, function_nesting, first_nonspace); 897 | 898 | % Special case for switch 899 | if first_nonspace.isEqual('keyword', 'switch') 900 | % Increment nesting by 1 for switch statement 901 | nesting = nesting + 1; 902 | correction = correction - 1; 903 | is_switch_nesting = true; 904 | end 905 | 906 | if first_nonspace.isEqual('keyword', 'end') && is_switch_nesting 907 | % Reverse nesting increment for switch statement at 'end' 908 | nesting = nesting - 1; 909 | is_switch_nesting = false; 910 | end 911 | end 912 | 913 | increment = check_settings('indentation_step'); 914 | expected_indent = (nesting+correction) * increment; 915 | expected_indent = max(expected_indent, 0); 916 | 917 | current_indent = get_line_indentation(line_tokens); 918 | 919 | incorrect_comment = ... 920 | first_nonspace.hasType('comment') && ... 921 | ~(current_indent >= expected_indent) && ... 922 | current_indent ~= expected_indent-increment; 923 | incorrect_normal_line = ... 924 | ~first_nonspace.hasType('comment') && ... 925 | ~is_continuation && ... 926 | current_indent ~= expected_indent; 927 | incorrect_continuation_line = ... 928 | ~first_nonspace.hasType('comment') && ... 929 | is_continuation && ... 930 | current_indent <= expected_indent; 931 | 932 | if incorrect_comment || incorrect_normal_line || ... 933 | incorrect_continuation_line 934 | report_token = Token('special', 'indentation warning', ... 935 | line_tokens(1).line, line_tokens(1).col); 936 | report_entry = struct('token', report_token, ... 937 | 'message', 'incorrect indentation', ... 938 | 'severity', 2); 939 | report = [report report_entry]; %#ok 940 | end 941 | end 942 | end 943 | 944 | 945 | function yesNo = is_continuation_line(line_idx, linelist) 946 | %IS_CONTINUATION_LINE checks if LINELIST{LINE_IDX} is a continuation 947 | % of the previous line. YESNO is a boolean. 948 | 949 | if line_idx > 1 950 | previous_line = linelist{line_idx-1}; 951 | yesNo = any(strcmp({previous_line.text}, '...')); 952 | else 953 | yesNo = false; 954 | end 955 | end 956 | 957 | 958 | function [nesting, function_nesting, correction] = indentation_rule(nesting, function_nesting, first_token) 959 | %INDENTATION_RULE decides about the indentation of the current line 960 | % NESTING and FUNCTION_NESTING will change depending on the 961 | % FIRST_TOKEN on the current line. 962 | % 963 | % NESTING holds the current nesting within if/for/function blocks and 964 | % FUNCTION_NESTING holds the current nesting within function blocks. 965 | % CORRECTION is an offset on NESTING for the current line only. 966 | % 967 | % In case of scripts and class files, FUNCTION_NESTING is 968 | % effectively ignored. In case of function files, FUNCTION_NESTING 969 | % is used to determine whether the current function is a top-level 970 | % function (whose body should not be indented) or a nested function 971 | % (whose body should be indented). 972 | % 973 | % All indentations are given and returned as integer levels of 974 | % indentation. Depending on your editor setup, one level might correspond 975 | % to 2, 3, 4, or 8 spaces. 976 | % 977 | % The correct indentation for the current line is (by default): 978 | % (nesting + correction)*4 spaces 979 | 980 | beginnings = check_settings('beginnings'); 981 | middles = check_settings('middles'); 982 | 983 | % deactivate function file rules in class files: 984 | if first_token.isEqual('keyword', 'classdef') 985 | function_nesting = nan; 986 | end 987 | 988 | if ~check_settings('indentation_check_like_matlab') 989 | function_nesting = nan; 990 | end 991 | 992 | % beginning of a function: 993 | if first_token.isEqual('keyword', 'function') 994 | function_nesting = function_nesting + 1; 995 | nesting = nesting + 1; 996 | correction = -1; 997 | % any other beginning: 998 | elseif first_token.isEqual('keyword', beginnings) 999 | nesting = nesting + 1; 1000 | correction = -1; 1001 | % end of a function in: 1002 | elseif first_token.isEqual('keyword', 'end') && ... 1003 | nesting == function_nesting 1004 | function_nesting = function_nesting - 1; 1005 | nesting = nesting - 1; 1006 | if function_nesting == 1 1007 | correction = +1; 1008 | else 1009 | correction = 0; 1010 | end 1011 | % any other end: 1012 | elseif first_token.isEqual('keyword', 'end') 1013 | nesting = nesting - 1; 1014 | correction = 0; 1015 | % any middle (else, elseif, case): 1016 | elseif first_token.isEqual('keyword', middles) 1017 | correction = -1; 1018 | % a normal line: 1019 | else 1020 | correction = 0; 1021 | end 1022 | 1023 | % if this is in a top-level function: 1024 | if function_nesting == 1 1025 | correction = correction - 1; 1026 | end 1027 | end 1028 | 1029 | 1030 | function indentation = get_line_indentation(line_tokens) 1031 | %GET_LINE_INDENTATION returns the number of spaces at the beginning of 1032 | % LINE_TOKENS. INDENTATION is an integer. 1033 | 1034 | if ~isempty(line_tokens) && line_tokens(1).hasType('space') 1035 | indentation = length(line_tokens(1).text); 1036 | else 1037 | indentation = 0; 1038 | end 1039 | end 1040 | 1041 | 1042 | function token = get_first_nonspace(tokenlist) 1043 | %GET_FIRST_NONSPACE returns the first TOKEN in TOKENLIST that is not a 1044 | % token of type space. 1045 | % This can be useful to return the first "real" token on a line. 1046 | 1047 | token_idx = 1; 1048 | while token_idx < length(tokenlist) && ... 1049 | tokenlist(token_idx).hasType('space') 1050 | token_idx = token_idx + 1; 1051 | end 1052 | token = tokenlist(token_idx); 1053 | end 1054 | 1055 | 1056 | function linelist = split_lines(tokens) 1057 | %SPLIT_LINES splits TOKENS into lines. 1058 | % returns a cell array LINELIST of Token-arrays. 1059 | 1060 | linelist = {}; 1061 | line_start = 1; 1062 | linebreaks = {sprintf('\n'), sprintf('\r\n')}; 1063 | for pos = 1:length(tokens)+1 1064 | if pos == length(tokens)+1 || ... 1065 | tokens(pos).isEqual('linebreak', linebreaks) 1066 | linelist = [linelist {tokens(line_start:pos-1)}]; %#ok 1067 | line_start = pos + 1; 1068 | end 1069 | end 1070 | end 1071 | 1072 | 1073 | function link = open_file_link(filename, linenum) 1074 | %OPEN_FILE_LINK returns a link target for HTML links 1075 | % the LINK is supposed to be used in ... links inside 1076 | % MATLAB. It will generate a linke that opens FILENAME at LINENUM in the 1077 | % MATLAB editor. 1078 | 1079 | prefix = 'matlab.desktop.editor.openAndGoToLine'; 1080 | link = sprintf('matlab:%s(''%s'', %i);', prefix, filename, linenum); 1081 | end 1082 | -------------------------------------------------------------------------------- /check_settings.m: -------------------------------------------------------------------------------- 1 | function value = check_settings(name) 2 | %CHECK_SETTINGS returns settings vor CHECK. 3 | % CHECK_SETTINGS(NAME) returns the VALUE of the settings called NAME. 4 | % 5 | % Create a local copy of this file and overwrite values if you want 6 | % custom behavior in a specific project. 7 | 8 | % thresholds for the number of lines in classes: 9 | settings.lo_class_num_lines = 200; 10 | settings.hi_class_num_lines = 400; 11 | % thresholds for the number of properties in classes: 12 | settings.lo_class_num_properties = 10; 13 | settings.hi_class_num_properties = 15; 14 | % thresholds for the number of methods in classes: 15 | settings.lo_class_num_methods = 10; 16 | settings.hi_class_num_methods = 20; 17 | 18 | % thresholds for the number of lines in scripts: 19 | settings.lo_script_num_lines = 100; 20 | settings.hi_script_num_lines = 200; 21 | % thresholds for the number of variables in scripts: 22 | settings.lo_script_num_variables = 10; 23 | settings.hi_script_num_variables = 20; 24 | % thresholds for the level of indentation in scripts: 25 | settings.lo_script_max_indentation = 4; 26 | settings.hi_script_max_indentation = 8; 27 | 28 | % thresholds for the number of lines in functions: 29 | settings.lo_function_num_lines = 50; 30 | settings.hi_function_num_lines = 100; 31 | % thresholds for the number of arguments in functions: 32 | settings.lo_function_num_arguments = 3; 33 | settings.hi_function_num_arguments = 5; 34 | % thresholds for the number of variables in functions: 35 | settings.lo_function_num_variables = 7; 36 | settings.hi_function_num_variables = 15; 37 | % thresholds for the level of indentation in functions: 38 | settings.lo_function_max_indentation = 3; 39 | settings.hi_function_max_indentation = 6; 40 | % thresholds for the complexity of functions: 41 | settings.lo_function_complexity = 10; 42 | settings.hi_function_complexity = 15; 43 | 44 | % thresholds for the line length of files: 45 | settings.lo_line_length = 75; 46 | settings.hi_line_length = 90; 47 | 48 | % threshold for the variable length and spread (spread is the 49 | % number of lines in which a variable is used). 50 | % Read this as "if a variable name is less than 3 characters 51 | % long, it should be use in no more than 3 lines": 52 | settings.lo_varname_short_length = 3; 53 | settings.lo_varname_short_spread = 3; 54 | settings.lo_varname_long_length = 5; 55 | settings.lo_varname_long_spread = 10; 56 | settings.hi_varname_short_length = 3; 57 | settings.hi_varname_short_spread = 5; 58 | settings.hi_varname_long_length = 5; 59 | settings.hi_varname_long_spread = 15; 60 | 61 | % switches to switch whole modules on or off: 62 | settings.do_check_comments = true; 63 | settings.do_check_documentation = true; 64 | settings.do_check_eval = true; 65 | settings.do_check_operators = true; 66 | settings.do_check_variables = true; 67 | settings.do_check_mlint_warnings = true; 68 | settings.do_check_line_length = true; 69 | settings.do_check_indentation = true; 70 | 71 | % indent by this many spaces per level of indentation: 72 | settings.indentation_step = 4; 73 | % Matlab does not indent top-level function bodies. Most other 74 | % languages would think this behavior funny: 75 | settings.indentation_check_like_matlab = true; 76 | 77 | % keywords for tokenize_code 78 | settings.keywords = {'for' 'try' 'while' 'if' 'else' 'elseif' 'switch' ... 79 | 'case' 'otherwise' 'function' 'classdef' 'methods' ... 80 | 'properties' 'events' 'enumeration' 'parfor' ... 81 | 'return' 'break' 'continue' 'catch', 'arguments'}; 82 | 83 | % keyword beginnings which are considered for indentation calculation 84 | settings.beginnings = {'for' 'parfor' 'while' 'if' 'switch' 'classdef' ... 85 | 'events' 'properties' 'enumeration' 'methods' ... 86 | 'function' 'try', 'arguments'}; 87 | % keyword middles which are considered for indentation calculation 88 | settings.middles = {'else' 'elseif' 'case' 'otherwise' 'catch'}; 89 | 90 | value = settings.(name); 91 | end 92 | -------------------------------------------------------------------------------- /run_unittests.m: -------------------------------------------------------------------------------- 1 | function run_unittests() 2 | %RUN_UNITTESTS Runs all unit tests 3 | 4 | import matlab.unittest.TestSuite 5 | import matlab.unittest.TestRunner 6 | 7 | try 8 | % Create a test suite 9 | suite = ... 10 | TestSuite.fromPackage('UnitTest', ... 11 | 'IncludingSubpackages', true); 12 | 13 | % Run all tests 14 | runner = TestRunner.withTextOutput; 15 | result = runner.run(suite); 16 | 17 | % Display results 18 | disp(table(result)); 19 | disp(result); 20 | 21 | % Throw an error if any test failed 22 | if sum([result(:).Failed]) + sum([result(:).Incomplete]) > 0 23 | error('There are failing unittests!') 24 | end 25 | catch err 26 | disp(err.getReport) 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /testFiles/MatlabArgumentClass.m: -------------------------------------------------------------------------------- 1 | classdef MatlabArgumentClass < matlab.mixin.Heterogeneous 2 | %MATLABARGUMENTCLASS This is an example class for testing the 3 | % argument validation 4 | % 5 | % Some more comments to make the checker happy 6 | % Some more comments to make the checker happy 7 | % returns a new OBJ. 8 | 9 | % 10 | % 11 | properties (Access = private) 12 | property1 (1,1) string = "Hello World" 13 | 14 | property2 (1,:) char = 'Hello World' 15 | 16 | property3 {mustBeTextScalar} 17 | end 18 | 19 | methods (Access = protected) 20 | 21 | function obj = foo_function(input1, input2, options) 22 | %FOO_FUNCTION This is an example function for testing the 23 | % indentation check 24 | % output1 = foo_function: input1, input2 25 | % Some more comments to make the checker happy 26 | 27 | arguments 28 | input1 (1,1) string 29 | input2 {mustBeText} 30 | options.?matlab.mixin.Heterogeneous 31 | end 32 | 33 | try 34 | input1 = 42; 35 | catch 36 | input2 = 42; 37 | end 38 | % Some more comments to make the checker happy 39 | if input1 40 | obj = 1; 41 | elseif input2 42 | obj = 2; 43 | else 44 | obj = 0; 45 | end 46 | 47 | obj.property3 = options; 48 | 49 | end 50 | 51 | function foobar = second_function(barfoo) 52 | %SECOND_FUNCTION This is an example function for testing the 53 | % indentation check 54 | % foobar, barfoo 55 | foobar = barfoo; 56 | end 57 | 58 | function varargout = variable_length_of_in_and_output(varargin) 59 | %VARIABLE_LENGTH_OF_IN_AND_OUTPUT is provided with input param 60 | % VARARGIN and output parameter VARARGOUT 61 | varargout = varargin; 62 | end 63 | 64 | function output = test_linebreak_with_continuation_operator(inputarg) 65 | %TEST_LINEBREAK_WITH_CONTINUATION_OPERATOR is a test to verify 66 | % line continuation operator 67 | % INPUTARG, OUTPUT 68 | 69 | assignment_at_first_line = ... 70 | inputarg; 71 | 72 | assignment_at_second_line = ... some comment 73 | assignment_at_first_line; 74 | 75 | output = .... 4 dots give also comment 76 | assignment_at_second_line; 77 | end 78 | 79 | function test_switch_case(inputarg) 80 | %TEST_SWITCH_CASE test indentation of switch case 81 | % INPUTARG 82 | % Some more comments to make the checker happy 83 | 84 | switch inputarg 85 | case 1 86 | return 87 | case 2 88 | return 89 | otherwise 90 | return 91 | end 92 | end 93 | end 94 | end 95 | -------------------------------------------------------------------------------- /testFiles/MatlabIndentedClass.m: -------------------------------------------------------------------------------- 1 | classdef MatlabIndentedClass 2 | %MATLABINDENTEDCLASS This is an example class for testing the 3 | % indentation check 4 | % 5 | % Some more comments to make the checker happy 6 | % Some more comments to make the checker happy 7 | % returns a new OBJ. 8 | 9 | % 10 | % 11 | properties(Access = private) 12 | foobar 13 | end 14 | 15 | methods(Access = protected) 16 | 17 | function output1 = foo_function(input1, input2) 18 | %FOO_FUNCTION This is an example function for testing the 19 | % indentation check 20 | % output1 = foo_function: input1, input2 21 | % Some more comments to make the checker happy 22 | 23 | try 24 | input1 = 42; 25 | catch 26 | input2 = 42; 27 | end 28 | % Some more comments to make the checker happy 29 | if input1 30 | output1 = 1; 31 | elseif input2 32 | output1 = 2; 33 | else 34 | output1 = 0; 35 | end 36 | 37 | end 38 | 39 | function foobar = second_function(barfoo) 40 | %SECOND_FUNCTION This is an example function for testing the 41 | % indentation check 42 | % foobar, barfoo 43 | foobar = barfoo; 44 | end 45 | 46 | function varargout = variable_length_of_in_and_output(varargin) 47 | %VARIABLE_LENGTH_OF_IN_AND_OUTPUT is provided with input param 48 | % VARARGIN and output parameter VARARGOUT 49 | varargout = varargin; 50 | end 51 | 52 | function output = test_linebreak_with_continuation_operator(inputarg) 53 | %TEST_LINEBREAK_WITH_CONTINUATION_OPERATOR is a test to verify 54 | % line continuation operator 55 | % INPUTARG, OUTPUT 56 | 57 | assignment_at_first_line = ... 58 | inputarg; 59 | 60 | assignment_at_second_line = ... some comment 61 | assignment_at_first_line; 62 | 63 | output = .... 4 dots give also comment 64 | assignment_at_second_line; 65 | end 66 | 67 | function test_switch_case(inputarg) 68 | %TEST_SWITCH_CASE test indentation of switch case 69 | % INPUTARG 70 | % Some more comments to make the checker happy 71 | 72 | switch inputarg 73 | case 1 74 | return 75 | case 2 76 | return 77 | otherwise 78 | return 79 | end 80 | end 81 | end 82 | end 83 | -------------------------------------------------------------------------------- /test_MatlabIndentedClass.m: -------------------------------------------------------------------------------- 1 | function test_MatlabIndentedClass() 2 | 3 | assert(check_settings('indentation_check_like_matlab') == true) 4 | 5 | addpath('testFiles') 6 | check('testFiles/MatlabIndentedClass.m'); 7 | 8 | end -------------------------------------------------------------------------------- /test_check.m: -------------------------------------------------------------------------------- 1 | %% Tokenizing a text should not change the content 2 | text = fileread('check.m'); 3 | tokens = tokenize_code(text); 4 | reconstructed_text = horzcat(tokens.text); 5 | assert(strcmp(reconstructed_text, text)) 6 | 7 | 8 | %% Function names should be extracted 9 | report = analyze_file('', tokenize_code('function foo(); end')); 10 | assert(strcmp(report.name.text, 'foo')) 11 | 12 | report = analyze_file('', tokenize_code('function x = foo(); end')); 13 | assert(strcmp(report.name.text, 'foo')) 14 | 15 | report = analyze_file('', tokenize_code('function [x, y] = foo(); end')); 16 | assert(strcmp(report.name.text, 'foo')) 17 | 18 | 19 | %% Function return names should be extracted 20 | report = analyze_file('', tokenize_code('function foo(); end')); 21 | assert(isempty(report.returns)) 22 | 23 | report = analyze_file('', tokenize_code('function x = foo(); end')); 24 | assert(strcmp(report.returns(1).text, 'x')) 25 | assert(length(report.returns) == 1) 26 | 27 | report = analyze_file('', tokenize_code('function [x, y] = foo(); end')); 28 | assert(strcmp(report.returns(1).text, 'x')) 29 | assert(strcmp(report.returns(2).text, 'y')) 30 | assert(length(report.returns) == 2) 31 | 32 | 33 | %% Function arguments should be extracted 34 | report = analyze_file('', tokenize_code('function foo(); end')); 35 | assert(isempty(report.arguments)) 36 | 37 | report = analyze_file('', tokenize_code('function foo(x); end')); 38 | assert(strcmp(report.arguments(1).text, 'x')) 39 | assert(length(report.arguments) == 1) 40 | 41 | report = analyze_file('', tokenize_code('function foo(x, y); end')); 42 | assert(strcmp(report.arguments(1).text, 'x')) 43 | assert(strcmp(report.arguments(2).text, 'y')) 44 | assert(length(report.arguments) == 2) 45 | 46 | 47 | %% Operators should be parsed correctly 48 | tokens = tokenize_code('a>=-b'); 49 | assert(tokens(2).hasText('>=')) 50 | assert(tokens(3).hasText('-')) 51 | 52 | 53 | %% Transpose Operators should not be strings 54 | tokens = tokenize_code('a'''); 55 | assert(tokens(2).isEqual('punctuation', '''')) 56 | 57 | tokens = tokenize_code('a.'''); 58 | assert(tokens(2).isEqual('punctuation', '.''')) 59 | 60 | tokens = tokenize_code('a''+''a''.'''); 61 | assert(tokens(2).isEqual('punctuation', '''')) 62 | assert(tokens(4).isEqual('string', '''a''')) 63 | assert(tokens(5).isEqual('punctuation', '.''')) 64 | 65 | 66 | %% differentiate commands from expressions 67 | tokens = tokenize_code('help me please % test'); 68 | assert(tokens(1).isEqual('identifier', 'help')) 69 | assert(tokens(3).isEqual('string', 'me')) 70 | assert(tokens(5).isEqual('string', 'please')) 71 | assert(tokens(7).isEqual('comment', '% test')) 72 | 73 | 74 | %% differentiate keyword end from variable end 75 | tokens = tokenize_code('if a(end); end'); 76 | assert(tokens(5).isEqual('identifier', 'end')) 77 | assert(tokens(9).isEqual('keyword', 'end')) 78 | 79 | 80 | %% differentiate semicolons from linebreaks 81 | tokens = tokenize_code('[1;2];3'); 82 | assert(tokens(3).isEqual('punctuation', ';')) 83 | assert(tokens(6).isEqual('linebreak', ';')) 84 | 85 | 86 | %% Identify block comments 87 | comment = sprintf('%%{ \n foo bar \n %%}'); 88 | tokens = tokenize_code(comment); 89 | assert(length(tokens) == 1) 90 | assert(tokens.isEqual('comment', comment)) 91 | 92 | tokens = tokenize_code(sprintf('x\n%s\nx', comment)); 93 | assert(length(tokens) == 5) 94 | assert(tokens(3).isEqual('comment', comment)) 95 | 96 | 97 | %% line breaks should break lines 98 | tokens = tokenize_code(',foo bar'); 99 | assert(tokens(1).hasType('linebreak')) 100 | assert(tokens(4).hasType('string')) 101 | 102 | tokens = tokenize_code(';foo bar'); 103 | assert(tokens(1).hasType('linebreak')) 104 | assert(tokens(4).hasType('string')) 105 | 106 | 107 | %% line breaks should not break lines within brackets 108 | tokens = tokenize_code('[a;b];'); 109 | assert(tokens(3).hasType('punctuation')) 110 | assert(tokens(6).hasType('linebreak')) 111 | 112 | tokens = tokenize_code('[a,b],'); 113 | assert(tokens(3).hasType('punctuation')) 114 | assert(tokens(6).hasType('linebreak')) 115 | 116 | %% comments follow continuation operator 117 | tokens = tokenize_code('... % this is a comment'); 118 | assert(tokens(1).hasType('punctuation')); 119 | assert(tokens(3).hasType('comment')); 120 | 121 | tokens = tokenize_code('... this is a comment'); 122 | assert(tokens(1).hasType('punctuation')); 123 | assert(tokens(2).hasType('space')); 124 | assert(tokens(3).hasType('comment')); 125 | 126 | tokens = tokenize_code('....'); 127 | assert(tokens(1).hasType('punctuation')); 128 | assert(tokens(2).hasType('comment')); 129 | 130 | tokens = tokenize_code('.*...'); 131 | assert(tokens(1).hasType('punctuation')); 132 | assert(tokens(2).hasType('punctuation')); -------------------------------------------------------------------------------- /tokenize_code.m: -------------------------------------------------------------------------------- 1 | function tokenlist = tokenize_code(source_code) 2 | %TOKENIZE_CODE splits M-code into Tokens 3 | % TOKENIZE(SOURCE_CODE) splits the SOURCE_CODE into interpretable 4 | % parts. It returns an object array of Tokens TOKENLIST, where each 5 | % token has a 'type', a 'text', a 'line', and a 'col'. Concatenating 6 | % all 'text's recreates the original SOURCE_CODE. 7 | % 'type' can be one of: 8 | % - 'keyword' 9 | % - 'identifier' 10 | % - 'space' 11 | % - 'punctuation' 12 | % - 'property' 13 | % - 'string' 14 | % - 'number' 15 | % - 'pair' 16 | % - 'linebreak' 17 | % - 'comment' 18 | % - 'escape' 19 | % 20 | % See also: Token 21 | 22 | % (c) 2016, Bastian Bechtold 23 | % This code is licensed under the terms of the BSD 3-clause license 24 | 25 | punctuation = '=.&|><~+-*^/\:@?'; 26 | open_pairs = '{[('; 27 | close_pairs = '}])'; 28 | escapes = '!%'; 29 | 30 | keywords = check_settings('keywords'); 31 | 32 | operators = { '+' '-' '*' '/' '^' '\' ... 33 | '.+' '.-' '.*' './' '.^' '.\' ... 34 | '>' '<' '~' '==' '>=' '<=' '~=' ... 35 | '@' '=' ',' ';' '||' '&&' '|' '&' '...' ':' '.?'}; 36 | unary_operators = '+-@~.'; 37 | 38 | spaces = sprintf(' \t'); 39 | breaks = sprintf('\n\r'); 40 | number_start = '0123456789'; 41 | number_body = [number_start 'eEij.']; 42 | name_start = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; 43 | name_body = [name_start '0123456789_']; 44 | 45 | tokenlist = Token.empty; 46 | pos = 1; % the current character position in the source_code 47 | line_num = 1; % the current line number 48 | line_start = pos; % where the current line started 49 | is_first_symbol = true; % the first symbol can have special meaning 50 | source_code = [source_code sprintf('\n')]; % ensure proper file end 51 | nesting = 0; % count braces, since some operators have different 52 | % meaning inside and outside braces 53 | while pos < length(source_code) 54 | letter = source_code(pos); 55 | % a variable or a function or a keyword: 56 | if any(letter == name_start) 57 | symbol = skip(name_body); 58 | % keywords such as `if` or `classdef` 59 | if any(strcmp(symbol, keywords)) 60 | is_first_symbol = false; 61 | add_token('keyword', symbol); 62 | % the keyword `end`: 63 | elseif strcmp(symbol, 'end') && nesting == 0 64 | add_token('keyword', symbol); 65 | % anything else is just a variable or function name: 66 | else 67 | add_token('identifier', symbol); 68 | % if this is the start of a command, the rest of the line 69 | % needs to be interpreted as strings. 70 | % Note: this is not the case if the the identifier is inside a 71 | % 'properties' or 'arguments' block. In that case, the rest of 72 | % the line needs to be interpreted as validation routine. 73 | last_keyword_idx = find(strcmp({tokenlist.type}, 'keyword'), 1, 'last'); 74 | is_argument_validation_command = ~isempty(last_keyword_idx) && ... 75 | any(strcmp(tokenlist(last_keyword_idx).text, {'properties' 'arguments'}), 2); 76 | if is_first_symbol && nesting == 0 && ~is_argument_validation_command 77 | is_first_symbol = false; 78 | saved_pos = pos; 79 | first_space = skip(spaces); 80 | first_word = skip_unless([spaces breaks ';,%']); 81 | pos = saved_pos; 82 | % commands are any single identifier that is not 83 | % followed by space-operator-space: 84 | if ~any(strcmp(first_word, operators)) && ... 85 | ~isempty(first_space) 86 | parse_command() 87 | end 88 | end 89 | end 90 | % a sequence of one or more spaces or tabs: 91 | elseif any(letter == spaces) 92 | symbol = skip(spaces); 93 | add_token('space', symbol); 94 | % any binary or unary operator, such as `+`, `>=`, or `.foo` 95 | elseif any(letter == punctuation) 96 | is_first_symbol = false; 97 | % property access begins with a `.` operator, and includes a 98 | % name, such as `.foo`. Classifying this as punctuation makes 99 | % it easier to differentiate it from variable/function names. 100 | if letter == '.' && pos < length(source_code) && ... 101 | any(source_code(pos+1) == name_start) 102 | pos = pos + 1; 103 | symbol = [letter skip(name_body)]; 104 | add_token('property', symbol); 105 | % any other operator: 106 | else 107 | symbol = skip(punctuation); 108 | % one operator: 109 | % Multiple operators can be present in 'symbol', e.g. '&&...' or 110 | % '|...'. Find largest operator at start of symbol. 111 | largest_start_operator = find_pattern(operators); 112 | if ~isempty(largest_start_operator) 113 | % Add operator and keep remainder of symbol for next 114 | % iteration. 115 | add_token('punctuation', largest_start_operator); 116 | pos = pos - length(symbol) + length(largest_start_operator); 117 | % All text on the same line after '...' must be interpreted 118 | % as a comment. 119 | if strcmp(largest_start_operator, '...') 120 | symbol = skip(spaces); 121 | if ~isempty(symbol) 122 | add_token('space', symbol) 123 | end 124 | symbol = skip_unless(breaks); 125 | if ~isempty(symbol) 126 | add_token('comment', symbol); 127 | end 128 | end 129 | % a binary operator, followed by a unary operator: 130 | elseif any(symbol(end) == unary_operators) && ... 131 | any(strcmp(symbol(1:end-1), operators)) 132 | add_token('punctuation', symbol(1:end-1)); 133 | add_token('punctuation', symbol(end)); 134 | % element-wise transpose operator: 135 | % This has to be parsed here, so as to not confuse the `'` 136 | % with the beginning of a string. 137 | elseif strcmp(symbol, '.') && source_code(pos) == '''' 138 | pos = pos + 1; 139 | add_token('punctuation', '.'''); 140 | % struct access operator such as `.(foo)`: 141 | % There is normally no `.` operator, but it makes sense to 142 | % classify `.(` as such here. 143 | elseif strcmp(symbol, '.') && source_code(pos) == '(' 144 | add_token('punctuation', '.'); 145 | % this should never happen: 146 | else 147 | error(['unknown operator ''' symbol '''']); 148 | end 149 | end 150 | % strings and transpose begin with `'`. The `.'` operator has 151 | % already been handled above: 152 | elseif letter == '''' 153 | % the first symbol cannot be transpose, so must be string 154 | if is_first_symbol 155 | string = skip_string(''''); 156 | add_token('string', string); 157 | else 158 | previous = tokenlist(end); 159 | 160 | % transpose operator: 161 | % To differentiate the start of a string from the 162 | % transpose operator, we need to check whether the 163 | % previous token was a value or an operator. If a value, 164 | % `'` means transpose. If an operator, `'` marks the start 165 | % of a string. 166 | if previous.isEqual('pair', {'}' ']' ')'}) || ... 167 | previous.hasType({'identifier' 'number' 'property'}) 168 | pos = pos + 1; 169 | add_token('punctuation', letter); 170 | % strings: 171 | else 172 | string = skip_string(''''); 173 | add_token('string', string); 174 | end 175 | end 176 | is_first_symbol = false; 177 | % string that starts with double quotes (") 178 | elseif letter == '"' 179 | is_first_symbol = false; 180 | string = skip_string('"'); 181 | add_token('string', string); 182 | % we don't make any distinction between different kinds of parens: 183 | elseif any(letter == open_pairs) 184 | is_first_symbol = false; 185 | pos = pos + 1; 186 | nesting = nesting + 1; 187 | add_token('pair', letter); 188 | elseif any(letter == close_pairs) 189 | pos = pos + 1; 190 | nesting = nesting - 1; 191 | add_token('pair', letter); 192 | % new lines are line breaks and increment the line: 193 | elseif any(letter == breaks) 194 | % split into individual line breaks 195 | start = pos; 196 | line_breaks = regexp(skip(breaks), '(\n)|(\r\n)', 'match'); 197 | pos = start; 198 | for line_break = line_breaks 199 | pos = pos + length(line_break{1}); 200 | add_token('linebreak', line_break{1}); 201 | % add the token before incrementing the line to to avoid 202 | % confusing add_token 203 | line_num = line_num + 1; 204 | line_start = pos; 205 | end 206 | is_first_symbol = true; 207 | % `,` and `;` are line breaks that do not increment the line, 208 | % or simple operators if they occur within a pair 209 | elseif any(letter == ';,') 210 | pos = pos + 1; 211 | if nesting == 0 212 | add_token('linebreak', letter); 213 | is_first_symbol = true; 214 | else 215 | add_token('punctuation', letter); 216 | end 217 | % numbers are easy, and may contain `.`, `e`, `E`, `i`, and `j` 218 | elseif any(letter == number_start) 219 | is_first_symbol = false; 220 | symbol = skip(number_body); 221 | add_token('number', symbol); 222 | % finally, comments and `!` include the rest of the line, 223 | % unless they are block comments, in which case they might include 224 | % much more. 225 | elseif any(letter == escapes) 226 | comment = skip_line(); 227 | if letter == '%' 228 | if ~isempty(regexp(comment, '^\%\{\s*$', 'once')) && ... 229 | is_first_symbol 230 | comment = [comment skip_block_comment()]; %#ok 231 | end 232 | add_token('comment', comment); 233 | else 234 | add_token('escape', comment); 235 | end 236 | else 237 | error('unknown identifier'); 238 | end 239 | end 240 | 241 | function add_token(token_type, token_text) 242 | %ADD_TOKEN adds a new token to the token list, and annotates it 243 | % with the current line number and column. TOKEN_TYPE and TOKEN_TEXT 244 | % become the Token's `type` and `text` property. 245 | % this modifies TOKENLIST! 246 | 247 | char_num = pos-line_start-length(token_text)+1; 248 | tokenlist(length(tokenlist)+1) = Token(token_type, token_text, ... 249 | line_num, char_num); 250 | end 251 | 252 | function string = skip(letters) 253 | %SKIP skips LETTERS and returns skipped letters as STRING 254 | % this modifies POS! 255 | 256 | string_start = pos; 257 | while any(source_code(pos) == letters) && pos < length(source_code) 258 | pos = pos + 1; 259 | end 260 | string = source_code(string_start:pos-1); 261 | end 262 | 263 | function string = skip_unless(letters) 264 | %SKIP_UNLESS skips letters not in LETTERS and returns skipped letters 265 | % as STRING. 266 | % this modifies POS! 267 | 268 | string_start = pos; 269 | while all(source_code(pos) ~= letters) 270 | pos = pos + 1; 271 | end 272 | string = source_code(string_start:pos-1); 273 | end 274 | 275 | function string = skip_line() 276 | %SKIP_LINE skips to the end of the line and returns the line as STRING 277 | % this modifies POS! 278 | 279 | string_start = pos; 280 | while all(source_code(pos) ~= sprintf('\r\n')) 281 | pos = pos + 1; 282 | end 283 | string = source_code(string_start:pos-1); 284 | end 285 | 286 | function string = skip_string(quote_type) 287 | %SKIP_STRING skips to the end of the string and returns the STRING 288 | % the STRING includes both quotation marks. QUOTE_TYPE is the 289 | % type of quote character to look for (' or "). 290 | % this modifies POS! 291 | 292 | string_start = pos; 293 | while true 294 | if source_code(pos) ~= quote_type || pos == string_start 295 | pos = pos + 1; 296 | elseif length(source_code) > pos ... 297 | && source_code(pos+1) == quote_type 298 | pos = pos + 2; 299 | else % source_code(pos) == quote_type 300 | pos = pos + 1; 301 | break; 302 | end 303 | end 304 | string = source_code(string_start:pos-1); 305 | end 306 | 307 | function string = skip_block_comment() 308 | %SKIP_block_comment skips to the end of the block comment and returns 309 | % the whole multi-line block comment as STRING. 310 | % this modifies POS! 311 | 312 | block_start = pos; 313 | is_first_statement = false; 314 | while pos <= length(source_code) 315 | % line break: 316 | if any(source_code(pos) == sprintf('\n\r')) 317 | is_first_statement = true; 318 | % don't change `is_first_statement` while skipping spaces: 319 | elseif any(source_code(pos) == sprintf('\t ')) 320 | % nothing changes 321 | % block comment ends must be alone on the line: 322 | elseif source_code(pos) == '%' && is_first_statement && ... 323 | pos < length(source_code) && source_code(pos+1) == '}' 324 | pos = pos + 2; 325 | break 326 | % any other character is just part of the comment: 327 | else 328 | is_first_statement = false; 329 | end 330 | pos = pos + 1; 331 | end 332 | string = source_code(block_start:pos-1); 333 | end 334 | 335 | function parse_command() 336 | %PARSE_COMMAND parses to the end of a command, and appends all args 337 | % to the token list. 338 | % this modifies POS and TOKENLIST! 339 | 340 | while pos < length(source_code) 341 | letter = source_code(pos); 342 | % commands can contain literal strings: 343 | if letter == '''' 344 | string_literal = skip_string(''''); 345 | add_token('string', string_literal); 346 | elseif letter == '"' 347 | string_literal = skip_string('"'); 348 | add_token('string', string_literal); 349 | % commands can contain spaces: 350 | elseif any(letter == spaces) 351 | symbol = skip(spaces); 352 | add_token('space', symbol); 353 | % commands end at `\n`, `%`, `,`, or `;`: 354 | elseif any(letter == [breaks '%,;']) 355 | break 356 | % any other non-space sequence is interpreted as a string: 357 | else 358 | str = skip_unless([breaks spaces '%,;']); 359 | add_token('string', str); 360 | end 361 | end 362 | end 363 | 364 | function pat_out = find_pattern(pat) 365 | %FIND_PATTERN Find pattern with most characters in symbol. 366 | % pat_out = FIND_PATTERN(pat) returns the pattern with which 367 | % SYMBOL starts and that has the most characters. The input 368 | % pat is a cell array of character vectors that represent the 369 | % patterns that should be tested. If symbol does not start 370 | % with any pattern defined by pat, pat_out is empty. 371 | pat_out = ''; 372 | % Find pat location. If non-existent, idx is zero. 373 | pat_idx = cellfun(@(x) strfind(symbol, x), pat, 'UniformOutput', false); 374 | pat_idx(cellfun(@isempty, pat_idx)) = {0}; 375 | pat_idx = [pat_idx{:}]; 376 | % Only evaluate patterns with which symbol starts (i.e. 377 | % pat_idx == 1) 378 | if any(pat_idx == 1) 379 | start_pat_array = pat(pat_idx == 1); 380 | [~, max_start_idx] = max(cellfun(@length, start_pat_array)); 381 | if length(max_start_idx) == 1 382 | pat_out = start_pat_array{max_start_idx}; 383 | end 384 | end 385 | end 386 | end 387 | --------------------------------------------------------------------------------