├── +UnitTest
└── +tokernizer
│ ├── FileCheckTests.m
│ └── TokenizeTests.m
├── README.md
├── Token.m
├── analyze_file.m
├── check.m
├── check_settings.m
├── run_unittests.m
├── testFiles
├── MatlabArgumentClass.m
└── MatlabIndentedClass.m
├── test_MatlabIndentedClass.m
├── test_check.m
└── tokenize_code.m
/+UnitTest/+tokernizer/FileCheckTests.m:
--------------------------------------------------------------------------------
1 | classdef FileCheckTests < matlab.unittest.TestCase
2 |
3 | methods(TestClassSetup)
4 | % Shared setup for the entire test class
5 | function setPathDef(~)
6 | addpath('testFiles')
7 | end
8 | end
9 |
10 | methods(TestClassTeardown)
11 | % Setup for each test
12 | function rmPathDef(~)
13 | rmpath('testFiles')
14 | end
15 | end
16 |
17 | methods(Test)
18 | % Test methods
19 |
20 | function testMatlabIndentedClass(testCase)
21 | % Matlab indentation class test
22 | H = @() check('MatlabIndentedClass.m');
23 | testCase.verifyWarningFree(H);
24 | end
25 |
26 | function testMatlabArgumentValidation(testCase)
27 | % Argument validation test
28 |
29 | % Argument validation not supported by versions earlier than 9.7
30 | % (earlier than R2019b)
31 | testCase.assumeFalse(verLessThan('matlab', '9.7'))
32 | H = @() check('MatlabArgumentClass.m');
33 | testCase.verifyWarningFree(H);
34 | end
35 |
36 | end
37 |
38 | end
--------------------------------------------------------------------------------
/+UnitTest/+tokernizer/TokenizeTests.m:
--------------------------------------------------------------------------------
1 | classdef TokenizeTests < matlab.unittest.TestCase
2 | %TOKENIZETESTS Tests for tokenize_code
3 |
4 | methods(Test)
5 | function testText(obj)
6 | %TESTTEXT Tokenizing a text should not change the content
7 |
8 | % Read file
9 | text = fileread('check.m');
10 |
11 | % Tokenize code
12 | tokens = tokenize_code(text);
13 |
14 | % Reconstruct text from tokens
15 | reconstructed_text = horzcat(tokens.text);
16 |
17 | % Compare with actual text
18 | obj.assertEqual(reconstructed_text, text)
19 | end
20 |
21 | function testDoubleQuote(obj)
22 | %TESTDOUBLEQUOTE Tests a double quoted string
23 |
24 | % Input data for the test
25 | input_str = '"test"'; % String: "test"
26 |
27 | % Construct expected output for comparison
28 | expected = Token('string', input_str, 1, 1);
29 |
30 | % Get actual output
31 | actual = tokenize_code(input_str);
32 |
33 | % Compare actual output with expected output
34 | obj.verifyEqual(actual, expected);
35 | end
36 |
37 | function testSoloDoubleQuote(obj)
38 | %TESTSOLODOUBLEQUOTE Tests a string with only a double quoted
39 |
40 | % Input data for the test
41 | input_str = 'output = "test"'; % String: output = 'test'
42 |
43 | % Construct expected output for comparison
44 | expected(1) = Token('identifier', 'output', 1, 1);
45 | expected(2) = Token('space', ' ', 1, 7);
46 | expected(3) = Token('punctuation', '=', 1, 8);
47 | expected(4) = Token('space', ' ', 1, 9);
48 | expected(5) = Token('string', '"test"', 1, 10);
49 |
50 | % Get actual output
51 | actual = tokenize_code(input_str);
52 |
53 | % Compare actual output with expected output
54 | obj.verifyEqual(actual, expected);
55 | end
56 |
57 | function testNestedQuote(obj)
58 | %TESTNESTEDQUOTE Tests a double quote inside single quote
59 |
60 | % Input data for the test
61 | input_str = '"let''s go"'; % String: "let's go"
62 |
63 | % Construct expected output for comparison
64 | expected = Token('string', input_str, 1, 1);
65 |
66 | % Get actual output
67 | actual = tokenize_code(input_str);
68 |
69 | % Compare actual output with expected output
70 | obj.verifyEqual(actual, expected);
71 | end
72 |
73 | function testNestedQuote2(obj)
74 | %TESTNESTEDQUOTE2 Tests a double quote inside single quote
75 |
76 | % Input data for the test
77 | input_str = '''He said, "hi"'''; % String: 'He said, "hi"'
78 |
79 | % Construct expected output for comparison
80 | expected = Token('string', input_str, 1, 1);
81 |
82 | % Get actual output
83 | actual = tokenize_code(input_str);
84 |
85 | % Compare actual output with expected output
86 | obj.verifyEqual(actual, expected);
87 | end
88 |
89 | function testFunctionNames(obj)
90 | %TESTFUNCTIONNAMES Function names should be extracted
91 | report = analyze_file('', tokenize_code('function foo(); end'));
92 | obj.assertEqual(report.name.text, 'foo')
93 |
94 | report = analyze_file('', tokenize_code('function x = foo(); end'));
95 | obj.assertEqual(report.name.text, 'foo')
96 |
97 | report = analyze_file('', tokenize_code('function [x, y] = foo(); end'));
98 | obj.assertEqual(report.name.text, 'foo')
99 | end
100 |
101 | function testFunctionReturnNames(obj)
102 | %TESTFUNCTIONRETURNNAMES Function return names should be extracted
103 | report = analyze_file('', tokenize_code('function foo(); end'));
104 | obj.assertEmpty(report.returns)
105 |
106 | report = analyze_file('', tokenize_code('function x = foo(); end'));
107 | obj.assertEqual(report.returns(1).text, 'x')
108 | obj.assertLength(report.returns, 1)
109 |
110 | report = analyze_file('', tokenize_code('function [x, y] = foo(); end'));
111 | obj.assertEqual(report.returns(1).text, 'x')
112 | obj.assertEqual(report.returns(2).text, 'y')
113 | obj.assertLength(report.returns, 2)
114 | end
115 |
116 | function testFunctionArguments(obj)
117 | %TESTFUNCTIONARGUMENTS Function arguments should be extracted
118 | report = analyze_file('', tokenize_code('function foo(); end'));
119 | obj.assertEmpty(report.arguments)
120 |
121 | report = analyze_file('', tokenize_code('function foo(x); end'));
122 | obj.assertEqual(report.arguments(1).text, 'x')
123 | obj.assertLength(report.arguments, 1)
124 |
125 | report = analyze_file('', tokenize_code('function foo(x, y); end'));
126 | obj.assertEqual(report.arguments(1).text, 'x')
127 | obj.assertEqual(report.arguments(2).text, 'y')
128 | obj.assertLength(report.arguments, 2)
129 |
130 | end
131 |
132 | function testOperatorsGeneral(obj)
133 | %TESTOPERATORSGENERAL Operators should be parsed correctly
134 | tokens = tokenize_code('a>=-b');
135 | obj.assertTrue(tokens(2).hasText('>='))
136 | obj.assertTrue(tokens(3).hasText('-'))
137 | end
138 |
139 | function testOperatorsTranspose(obj)
140 | %TESTOPERATORSTRANSPOSE Transpose Operators should not be strings
141 | tokens = tokenize_code('a''');
142 | obj.assertTrue(tokens(2).isEqual('punctuation', ''''))
143 |
144 | tokens = tokenize_code('a.''');
145 | obj.assertTrue(tokens(2).isEqual('punctuation', '.'''))
146 |
147 | tokens = tokenize_code('a''+''a''.''');
148 | obj.assertTrue(tokens(2).isEqual('punctuation', ''''))
149 | obj.assertTrue(tokens(4).isEqual('string', '''a'''))
150 | obj.assertTrue(tokens(5).isEqual('punctuation', '.'''))
151 | end
152 |
153 | function testCommands(obj)
154 | %TESTCOMMANDS Differentiate commands from expressions
155 | tokens = tokenize_code('help me please % test');
156 | obj.assertTrue(tokens(1).isEqual('identifier', 'help'))
157 | obj.assertTrue(tokens(3).isEqual('string', 'me'))
158 | obj.assertTrue(tokens(5).isEqual('string', 'please'))
159 | obj.assertTrue(tokens(7).isEqual('comment', '% test'))
160 | end
161 |
162 | function testEnd(obj)
163 | %TESTEND Differentiate keyword end from variable end
164 | tokens = tokenize_code('if a(end); end');
165 | obj.assertTrue(tokens(5).isEqual('identifier', 'end'))
166 | obj.assertTrue(tokens(9).isEqual('keyword', 'end'))
167 | end
168 |
169 | function testSimicolon(obj)
170 | %TESTSEMICOLONS Differentiate semicolons from linebreaks
171 | tokens = tokenize_code('[1;2];3');
172 | obj.assertTrue(tokens(3).isEqual('punctuation', ';'))
173 | obj.assertTrue(tokens(6).isEqual('linebreak', ';'))
174 | end
175 |
176 | function testBlock(obj)
177 | %TESTBLOCK Identify block comments
178 | comment = sprintf('%%{ \n foo bar \n %%}');
179 | tokens = tokenize_code(comment);
180 | obj.assertLength(tokens, 1)
181 | obj.assertTrue(tokens.isEqual('comment', comment))
182 |
183 | tokens = tokenize_code(sprintf('x\n%s\nx', comment));
184 | obj.assertLength(tokens, 5)
185 | obj.assertTrue(tokens(3).isEqual('comment', comment))
186 | end
187 |
188 | function testLinebreak(obj)
189 | %TESTLINEBREAK Test line breaks
190 |
191 | % Line breaks should break lines
192 | tokens = tokenize_code(',foo bar');
193 | obj.assertTrue(tokens(1).hasType('linebreak'))
194 | obj.assertTrue(tokens(4).hasType('string'))
195 |
196 | tokens = tokenize_code(';foo bar');
197 | obj.assertTrue(tokens(1).hasType('linebreak'))
198 | obj.assertTrue(tokens(4).hasType('string'))
199 |
200 | % Line breaks should not break lines within brackets
201 | tokens = tokenize_code('[a;b];');
202 | obj.assertTrue(tokens(3).hasType('punctuation'))
203 | obj.assertTrue(tokens(6).hasType('linebreak'))
204 |
205 | tokens = tokenize_code('[a,b],');
206 | obj.assertTrue(tokens(3).hasType('punctuation'))
207 | obj.assertTrue(tokens(6).hasType('linebreak'))
208 | end
209 |
210 | function testComment(obj)
211 | %TESTCOMMENT Test conventional comments in text
212 |
213 | % Conventional comments in text
214 | tokens = tokenize_code('% this is a comment');
215 | obj.assertLength(tokens, 1)
216 | obj.assertTrue(tokens(1).hasType('comment'));
217 |
218 | tokens = tokenize_code(' % this is a comment');
219 | obj.assertLength(tokens, 2)
220 | obj.assertTrue(tokens(1).hasType('space'));
221 | obj.assertTrue(tokens(2).hasType('comment'));
222 |
223 | txt = sprintf('%s\n%s', ...
224 | ' % this is a comment', ...
225 | ' && ...');
226 | tokens = tokenize_code(txt);
227 | obj.assertLength(tokens, 7)
228 | obj.assertTrue(tokens(1).hasType('space'));
229 | obj.assertTrue(tokens(2).hasType('comment'));
230 | obj.assertTrue(tokens(3).hasType('linebreak'));
231 | obj.assertTrue(tokens(4).hasType('space'));
232 | obj.assertTrue(tokens(5).hasType('punctuation'));
233 | obj.assertTrue(tokens(6).hasType('space'));
234 | obj.assertTrue(tokens(7).hasType('punctuation'));
235 | end
236 |
237 | function testCommentContinuationOperator(obj)
238 | %TESTCOMMENTCONTINUATIONOPERATOR Test comments that follow continuation operator
239 |
240 | % Test comments that follow continuation operator
241 | tokens = tokenize_code('... % this is a comment');
242 | obj.assertLength(tokens, 3)
243 | obj.assertTrue(tokens(1).hasType('punctuation'));
244 | obj.assertTrue(tokens(2).hasType('space'));
245 | obj.assertTrue(tokens(3).hasType('comment'));
246 |
247 | tokens = tokenize_code('... this is a comment');
248 | obj.assertLength(tokens, 3)
249 | obj.assertTrue(tokens(1).hasType('punctuation'));
250 | obj.assertTrue(tokens(2).hasType('space'));
251 | obj.assertTrue(tokens(3).hasType('comment'));
252 |
253 | tokens = tokenize_code(' ... % this is a comment');
254 | obj.assertLength(tokens, 4)
255 | obj.assertTrue(tokens(1).hasType('space'));
256 | obj.assertTrue(tokens(2).hasType('punctuation'));
257 | obj.assertTrue(tokens(3).hasType('space'));
258 | obj.assertTrue(tokens(4).hasType('comment'));
259 |
260 | tokens = tokenize_code('....');
261 | obj.assertLength(tokens, 2)
262 | obj.assertTrue(tokens(1).hasType('punctuation'));
263 | obj.assertTrue(tokens(2).hasType('comment'));
264 |
265 | tokens = tokenize_code('..., this is a comment');
266 | obj.assertLength(tokens, 2)
267 | obj.assertTrue(tokens(1).hasType('punctuation'));
268 | obj.assertTrue(tokens(2).hasType('comment'));
269 |
270 | tokens = tokenize_code('.*...');
271 | obj.assertLength(tokens, 2)
272 | obj.assertTrue(tokens(1).hasType('punctuation'));
273 | obj.assertTrue(tokens(2).hasType('punctuation'));
274 |
275 | tokens = tokenize_code(' &&...this is a comment');
276 | obj.assertLength(tokens, 4)
277 | obj.assertTrue(tokens(1).hasType('space'));
278 | obj.assertTrue(tokens(2).hasType('punctuation'));
279 | obj.assertTrue(tokens(3).hasType('punctuation'));
280 | obj.assertTrue(tokens(4).hasType('comment'));
281 |
282 | tokens = tokenize_code('&... this is a comment');
283 | obj.assertLength(tokens, 4)
284 | obj.assertTrue(tokens(1).hasType('punctuation'));
285 | obj.assertTrue(tokens(2).hasType('punctuation'));
286 | obj.assertTrue(tokens(3).hasType('space'));
287 | obj.assertTrue(tokens(4).hasType('comment'));
288 |
289 | % Test comments that follow continuation operator with line break
290 | txt = sprintf('%s\n%s', ...
291 | ' |... this is a comment', ...
292 | ' ||.... this is a comment');
293 | tokens = tokenize_code(txt);
294 | obj.assertLength(tokens, 10)
295 | obj.assertTrue(tokens(1).hasType('space'));
296 | obj.assertTrue(tokens(2).hasType('punctuation'));
297 | obj.assertTrue(tokens(3).hasType('punctuation'));
298 | obj.assertTrue(tokens(4).hasType('space'));
299 | obj.assertTrue(tokens(5).hasType('comment'));
300 | obj.assertTrue(tokens(6).hasType('linebreak'));
301 | obj.assertTrue(tokens(7).hasType('space'));
302 | obj.assertTrue(tokens(8).hasType('punctuation'));
303 | obj.assertTrue(tokens(9).hasType('punctuation'));
304 | obj.assertTrue(tokens(10).hasType('comment'));
305 |
306 | txt = sprintf('%s\n%s\n%s', ...
307 | ' % this is a comment', ...
308 | ' true||.... this is a comment', ...
309 | ' false% this is a comment');
310 | tokens = tokenize_code(txt);
311 | obj.assertLength(tokens, 12)
312 | obj.assertTrue(tokens(1).hasType('space'));
313 | obj.assertTrue(tokens(2).hasType('comment'));
314 | obj.assertTrue(tokens(3).hasType('linebreak'));
315 | obj.assertTrue(tokens(4).hasType('space'));
316 | obj.assertTrue(tokens(5).hasType('identifier'));
317 | obj.assertTrue(tokens(6).hasType('punctuation'));
318 | obj.assertTrue(tokens(7).hasType('punctuation'));
319 | obj.assertTrue(tokens(8).hasType('comment'));
320 | obj.assertTrue(tokens(9).hasType('linebreak'));
321 | obj.assertTrue(tokens(10).hasType('space'));
322 | obj.assertTrue(tokens(11).hasType('identifier'));
323 | obj.assertTrue(tokens(12).hasType('comment'));
324 | end
325 | end
326 | end
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Matlab Code Analyzer
2 | ==================
3 |
4 | MATLAB comes with the very important tool MLINT, which can check your code for common defects. Experience shows that these hints can be very helpful for cleaning up MATLAB code, and preventing simple errors.
5 |
6 | Crucially though, MLINT is not a style checker. That is where this program comes in:
7 |
8 | Say you have some code in `ugly_code.m`. You can analyze this code for problems using one simple command:
9 |
10 | ```matlab
11 | check ugly_code.m
12 | ```
13 |
14 | This might produce a report like this:
15 |
16 | ```
17 | Code Analysis for ugly_code.m
18 |
19 | Required files: ugly_code.m, ugly_toolbox.m
20 | Required toolboxes: MATLAB, Signal Processing Toolbox
21 |
22 | Function ugly_code (Line 1, col 18):
23 |
24 | Number of lines: 67 (high)
25 | Number of function arguments: 2 (good)
26 | Number of used variables: 5 (good)
27 | Max level of nesting: 3 (high)
28 | Code complexity: 6 (good)
29 |
30 | Line 1, col 1: too few comments (2 comments for 67 lines of code)
31 | Line 1, col 10: return argument 'szOut' is very short (used 5 times across 38 lines)
32 | Line 1, col 18: function argument 'testInput' is not mentioned in the documentation
33 | Line 15, col 84: very long line
34 | Line 20, col 22: no spaces after operator ','
35 | Line 27, col 1: incorrect indentation
36 | Line 27, col 1: variable 'szOut' is very short (used 5 times across 38 lines)
37 | Line 27, col 23: variable 'text' shadows a built-in
38 | Line 27, col 34: Eval should never be used
39 | Line 39, col 10: no spaces around operator '='
40 | ```
41 |
42 | A report like this will be printed for every function in the file, for script-files, and for classes. The more serious of these comments will be highlighted in red, whereas less important ones will stay black. Every line number is clickable and opens directly in the editor.
43 |
44 | Additionally, this comes with a settings file `check_settings.m`, which can change the thresholds on all warnings, and even enable or disable whole categories of warnings entirely.
45 |
46 | Contributing
47 | ------------
48 |
49 | While this file works well for our current applications, it is a complex piece of software, and it has not been thoroughly tested yet. If you find a bug, or would like to see a new feature, or would like to contribute a new feature, please feel free to open an issue or pull request.
50 |
51 | However, this is not my job, and I can not guarantee an immediate response, or support for every problem. That said, the code is available under the terms of the BSD 3-clause license, so feel free to use it however you like as long as you honor my authorship of it.
52 |
53 | Also, please bear in mind that all of the warnings generated by this program are just that: Warnings. They are *not* laws. If a slightly longer line improves readability, please *do not* make it shorter just to make the style analyzer happy. Please watch [this video](https://www.youtube.com/watch?v=wf-BqAjZb8M) for some context.
54 |
55 | And finally, while this style checker can find many issues, it is by no means perfect. It can not comment on whether your variable names are good or not, whether your comments are out of date or not, or whether your code makes intuitive sense when reading, or just results in confusion. For more in-depth heuristics on how to improve these aspects of your code, pleas read the wonderful [MATLAB Style Guidelines 1.0](http://mathworks.com/matlabcentral/fileexchange/46056-matlab-style-guidelines-2-0).
56 |
--------------------------------------------------------------------------------
/Token.m:
--------------------------------------------------------------------------------
1 | classdef Token < handle
2 | properties
3 | type
4 | text
5 | line
6 | col
7 | end
8 |
9 | methods
10 | function obj = Token(type, text, line, col)
11 | %TOKEN an atomic piece of source code
12 | % Each token references an atomic piece of source code TEXT at a
13 | % specific LINE and COL. Each TOKEN is tagged as a certain TYPE.
14 | % returns a new OBJ.
15 |
16 | obj.type = type;
17 | obj.text = text;
18 | obj.line = line;
19 | obj.col = col;
20 | end
21 |
22 | function yesNo = hasType(obj, type)
23 | %HASTYPE checks it OBJ has matching TYPE
24 | % YESNO is a boolean.
25 |
26 | yesNo = any(strcmp(obj.type, type));
27 | end
28 |
29 | function yesNo = hasText(obj, text)
30 | %HASTEXT checks it OBJ has matching TEXT
31 | % YESNO is a boolean.
32 |
33 | yesNo = any(strcmp(obj.text, text));
34 | end
35 |
36 | function yesNo = isEqual(obj, type, text)
37 | %ISEQUAL checks it OBJ has matching TYPE and TEXT
38 | % YESNO is a boolean.
39 |
40 | yesNo = obj.hasType(type) && obj.hasText(text);
41 | end
42 | end
43 | end
44 |
--------------------------------------------------------------------------------
/analyze_file.m:
--------------------------------------------------------------------------------
1 | function blocks = analyze_file(filename, tokenlist)
2 | %ANALYZE_FILE analyzes TOKENLIST and extracts information about BLOCKS
3 | % in FILENAME. TOKENLIST is assumed to be the content of FILENAME.
4 | %
5 | % Returns a struct array with fields:
6 | % - name: the function name
7 | % - body: the tokens that make up the body of the function
8 | % - nesting: how deeply is this block nested within other blocks
9 | % - children: other blocks nested within this block
10 | % (again as a struct array)
11 | % - variables: variables defined in this block, or properties if the
12 | % block is a class.
13 | % - arguments: function arguments of this block (if a function)
14 | % - returns: return variable names of this block (if a function)
15 | % - type: one of 'Function', 'Nested Function', 'Subfunction',
16 | % 'Class', or 'Script'.
17 | % - filename: the FILENAME.
18 |
19 | % (c) 2016, Bastian Bechtold
20 | % This code is licensed under the terms of the BSD 3-clause license
21 |
22 | beginnings = check_settings('beginnings');
23 |
24 | blocks = struct('name', {}, 'body', {}, 'nesting', {}, ...
25 | 'children', {}, 'variables', {}, ...
26 | 'arguments', {}, 'returns', {}, ...
27 | 'type', {}, 'filename', {});
28 | function_stack = struct('start', {}, 'nesting', {}, 'children', {});
29 | nesting = 0;
30 | is_first_block = true;
31 | main_type = '';
32 | for current_pos = 1:length(tokenlist)
33 | current_token = tokenlist(current_pos);
34 |
35 | % count the 'end's to figure out function extents:
36 | if current_token.isEqual('keyword', beginnings)
37 | nesting = nesting + 1;
38 | elseif current_token.isEqual('keyword', 'end')
39 | nesting = nesting - 1;
40 | end
41 |
42 | % determine file type (Script, Function, or Class):
43 | if isempty(main_type) && ...
44 | ~current_token.hasType({'linebreak', 'comment'})
45 | if current_token.isEqual('keyword', 'function')
46 | main_type = 'Function';
47 | elseif current_token.isEqual('keyword', 'classdef')
48 | main_type = 'Class';
49 | else
50 | main_type = 'Script';
51 | end
52 | end
53 |
54 | % pre-compute intermediate values for better readability:
55 | is_end_of_block = current_token.isEqual('keyword', 'end') && ...
56 | ~isempty(function_stack) && ...
57 | nesting == function_stack(end).nesting;
58 | is_end_of_function_file = current_pos == length(tokenlist) && ...
59 | ~isempty(function_stack);
60 | is_end_of_other_file = current_pos == length(tokenlist) && ...
61 | any(strcmp(main_type, {'Script' 'Class'}));
62 |
63 | % build a stack of function definitions:
64 | % We don't know where these functions end, yet. As soon as we
65 | % know the end, it will get appended to the block list. For
66 | % now, only record where the function starts.
67 | if current_token.isEqual('keyword', 'function')
68 | % include any leading space in the function body, so that
69 | % later analysis steps can figure out the initial
70 | % indentation of the function:
71 | if current_pos > 1 && tokenlist(current_pos-1).hasType('space')
72 | function_start = current_pos - 1;
73 | else
74 | function_start = current_pos;
75 | end
76 |
77 | % save the new function on the function stack:
78 | stack_frame = struct('start', function_start, ...
79 | 'nesting', nesting-1, ...
80 | 'children', []);
81 | function_stack = [function_stack stack_frame]; %#ok
82 |
83 | elseif is_end_of_block || is_end_of_function_file
84 | function_body = ...
85 | tokenlist(function_stack(end).start:current_pos);
86 |
87 | % determine function type (Top-Level, Nested, or Subfunction):
88 | if nesting > 0 && current_pos ~= length(tokenlist)
89 | block_type = 'Nested Function';
90 | elseif is_first_block
91 | block_type = main_type;
92 | is_first_block = false;
93 | else
94 | block_type = 'Subfunction';
95 | end
96 |
97 | % build block struct:
98 | new_block = struct( ...
99 | 'name', get_funcname(function_body), ...
100 | 'body', function_body, ...
101 | 'nesting', function_stack(end).nesting, ...
102 | 'children', function_stack(end).children, ...
103 | 'variables', {get_funcvariables(function_body)}, ...
104 | 'arguments', {get_funcarguments(function_body)}, ...
105 | 'returns', {get_funcreturns(function_body)}, ...
106 | 'type', block_type, 'filename', filename);
107 |
108 | % update function stack with new block struct:
109 | function_stack(end) = [];
110 | if nesting > 0 && ~isempty(function_stack)
111 | if isempty(function_stack(end).children)
112 | function_stack(end).children = new_block;
113 | else
114 | function_stack(end).children = ...
115 | [function_stack(end).children new_block];
116 | end
117 | else
118 | blocks = [blocks new_block]; %#ok
119 | end
120 |
121 | elseif is_end_of_other_file
122 | % in classes, variables contains properties:
123 | if strcmp(main_type, 'Script')
124 | variables = {get_variables(tokenlist)};
125 | else
126 | variables = {get_properties(tokenlist)};
127 | end
128 | blocks = struct('name', Token('special', filename, 0, 0), ...
129 | 'body', tokenlist, ...
130 | 'nesting', 0, ...
131 | 'children', blocks, ...
132 | 'variables', variables, ...
133 | 'arguments', [], ...
134 | 'returns', [], ...
135 | 'type', main_type, ...
136 | 'filename', filename);
137 | end
138 | end
139 | end
140 |
141 |
142 | function variables = get_properties(tokenlist)
143 | %GET_PROPERTIES extracts all assigned property VARIABLES from TOKENLIST
144 | % returns an object array of Tokens.
145 |
146 | variables = Token.empty;
147 | in_properties = false; % true whenever the loop is inside a properties
148 | % block.
149 | is_first = false; % true whenever the loop is between a line break and
150 | % the beginning of the line's content.
151 | for pos = 1:length(tokenlist)
152 | token = tokenlist(pos);
153 | if token.isEqual('keyword', 'properties')
154 | in_properties = true;
155 | is_first = false;
156 | elseif in_properties && token.isEqual('keyword', 'end')
157 | in_properties = false;
158 | end
159 | if token.hasType('linebreak')
160 | is_first = true;
161 | elseif token.hasType('identifier') && is_first && in_properties
162 | variables = [variables token]; %#ok
163 | is_first = false;
164 | end
165 | end
166 | end
167 |
168 |
169 | function variables = get_funcvariables(tokenlist)
170 | %GET_FUNCVARIABLES extracts all assigned VARIABLES from TOKENLIST
171 | %
172 | % See also: get_variables
173 |
174 | % skip the function declaration:
175 | end_declaration = search_token('pair', ')', tokenlist, 1, +1);
176 | variables = get_variables(tokenlist(end_declaration+1:end));
177 | end
178 |
179 |
180 | function variables = get_variables(tokenlist)
181 | %GET_VARIABLES extracts all assigned VARIABLES from TOKENLIST
182 | % Variables are things on the left hand side of equal signs which are not
183 | % enclosed in braces.
184 |
185 | variables = containers.Map();
186 | for token_idx = 1:length(tokenlist)
187 | token = tokenlist(token_idx);
188 | if token.isEqual('punctuation', '=')
189 | start = search_token('linebreak', [], tokenlist, token_idx, -1);
190 | lhs_tokens = tokenlist(start:token_idx);
191 | % all non-nested identifiers are assigned variable names
192 | nesting = 0;
193 | for this_token = lhs_tokens
194 | if this_token.isEqual('pair', {'{' '('})
195 | nesting = nesting + 1;
196 | elseif this_token.isEqual('pair', {'}' ')'})
197 | nesting = nesting - 1;
198 | elseif this_token.hasType('identifier') && ...
199 | nesting == 0 && ...
200 | ~variables.isKey(this_token.text)
201 | variables(this_token.text) = this_token;
202 | end
203 | end
204 | end
205 | end
206 | variables = variables.values();
207 | variables = [variables{:}]; % convert to object array
208 | if ~isempty(variables)
209 | % sort by column:
210 | [~, sort_idx] = sort([variables.col]);
211 | variables = variables(sort_idx);
212 | % sort by line (this preserves column ordering for variables
213 | % on the same line):
214 | [~, sort_idx] = sort([variables.line]);
215 | variables = variables(sort_idx);
216 | end
217 | end
218 |
219 |
220 | function name = get_funcname(tokenlist)
221 | %GET_FUNCNAME analyzes TOKENLIST to find function name
222 | % NAME is a Token
223 |
224 | pos = search_token('pair', '(', tokenlist, 1, +1);
225 | pos = search_token('identifier', [], tokenlist, pos, -1);
226 | name = tokenlist(pos);
227 | end
228 |
229 |
230 | function arguments = get_funcarguments(tokenlist)
231 | %GET_FUNCARGUMENTS analyzes TOKENLIST to find function return values
232 | % ARGUMENTS is an object array of Tokens.
233 |
234 | start = search_token('pair', '(', tokenlist, 1, +1);
235 | stop = search_token('pair', ')', tokenlist, start, +1);
236 | arguments = tokenlist(start+1:stop-1);
237 | % extract all identifiers:
238 | arguments = arguments(strcmp({arguments.type}, 'identifier'));
239 | end
240 |
241 |
242 | function returns = get_funcreturns(tokenlist)
243 | %GET_FUNCRETURNS analyzes TOKENLIST to find function return values
244 | % RETURNS is an object array of Tokens.
245 |
246 | start = search_token('keyword', 'function', tokenlist, 1, +1);
247 | pos = search_token('pair', '(', tokenlist, start, +1);
248 | stop = search_token('identifier', [], tokenlist, pos, -1);
249 | returns = tokenlist(start+1:stop-1);
250 | % extract all identifiers:
251 | returns = returns(strcmp({returns.type}, 'identifier'));
252 | end
253 |
254 |
255 | function token_idx = search_token(token_type, token_text, tokenlist, token_idx, increment)
256 | %SEARCH_TOKEN search TOKENLIST for token with TOKEN_TYPE and TOKEN_TEXT
257 | % starting from TOKEN_IDX and stepping with INCREMENT.
258 | %
259 | % To search for any Token with a given TOKEN_TYPE, leave TOKEN_TEXT empty
260 | % To search for any Token with a given TOKEN_TEXT, leave TOKEN_TYPE empty
261 | % Set INCREMENT to 1 for forward searching and -1 for backward searching
262 | %
263 | % Returns the TOKEN_IDX of the first matching token.
264 |
265 | if ~isempty(token_type) && ~isempty(token_text)
266 | while ~tokenlist(token_idx).isEqual(token_type, token_text)
267 | if token_idx + increment < 1 || ...
268 | token_idx + increment > length(tokenlist)
269 | break
270 | end
271 | token_idx = token_idx + increment;
272 | end
273 | elseif ~isempty(token_text)
274 | while ~tokenlist(token_idx).hasText(token_text)
275 | if token_idx + increment < 1 || ...
276 | token_idx + increment > length(tokenlist)
277 | break
278 | end
279 | token_idx = token_idx + increment;
280 | end
281 | elseif ~isempty(token_type)
282 | while ~tokenlist(token_idx).hasType(token_type)
283 | if token_idx + increment < 1 || ...
284 | token_idx + increment > length(tokenlist)
285 | break
286 | end
287 | token_idx = token_idx + increment;
288 | end
289 | end
290 | end
291 |
--------------------------------------------------------------------------------
/check.m:
--------------------------------------------------------------------------------
1 | function check(filename)
2 | %CHECK a source file FILENAME for problems
3 | %
4 | % CHECK does a deep analysis of the code in FILENAME, and reports on
5 | % problems with the code.
6 | %
7 | % Each function defined in the file is reported separately, with
8 | % separate statistics and warnings. Minor warnings are written in
9 | % black, while major warnings are printed red. Even though some
10 | % warnings are somewhat subjective, in general, at least all red
11 | % issues *should* be fixed.
12 | %
13 | % Every warning is presented as a clickable link that will jump to the
14 | % correct line in the editor.
15 | %
16 | % Many warnings have configurable settings in CHECK_SETTINGS. Note
17 | % though that *disabling* a warning does not count as *fixing* it.
18 | %
19 | % Warnings include:
20 | % - Required files to run the code
21 | % - Required toolboxes to run the code
22 | % - High number of lines
23 | % - High number of function arguments
24 | % - High number of used variables
25 | % - Too many levels of nesting
26 | % - Too much function complexity
27 | % - MLINT warnings
28 | % - missing documentation, or missing documentation of function arguments
29 | % - not enough comments
30 | % - incorrect or insufficient indentation
31 | % - excessive line length
32 | % - too short variable names
33 | % - no spaces around some operators
34 | % - use of dangerous functions like eval
35 |
36 | % (c) 2016, Bastian Bechtold
37 | % This code is licensed under the terms of the BSD 3-clause license
38 |
39 | [requiredFiles, requiredProducts] = ...
40 | matlab.codetools.requiredFilesAndProducts(filename);
41 | % manually fetch file name, since checkcode won't do it correctly
42 | fullfilename = which(filename);
43 | mlintInfo = ...
44 | checkcode(fullfilename, '-cyc', '-id', '-struct', '-fullpath');
45 |
46 | source_code = fileread(filename);
47 | tokens = tokenize_code(source_code);
48 | func_report = analyze_file(fullfilename, tokens);
49 |
50 | fprintf('Code Analysis for %s\n\n', filename);
51 |
52 | fprintf(' Required files: ');
53 | for file_idx = 1:length(requiredFiles)
54 | [~, basename, ext] = fileparts(requiredFiles{file_idx});
55 | fprintf('%s%s', basename, ext);
56 | if file_idx < length(requiredFiles)
57 | fprintf(', ');
58 | else
59 | fprintf('\n');
60 | end
61 | end
62 |
63 | fprintf(' Required toolboxes: ');
64 | for product_idx = 1:length(requiredProducts)
65 | fprintf('%s%s', requiredProducts(product_idx).Name);
66 | if product_idx < length(requiredProducts)
67 | fprintf(', ');
68 | else
69 | fprintf('\n\n');
70 | end
71 | end
72 |
73 | for func = func_report
74 | print_code_report(func, mlintInfo, 2);
75 | end
76 | end
77 |
78 |
79 | function print_code_report(func, mlintInfo, indentation)
80 | %PRINT_CODE_REPORT prints a comprehensive report about a code block FUNC
81 | % The printed text is indented at INDENTATION spaces.
82 | %
83 | % FUNC is analyzed for many common defects and stylistic mishaps, and
84 | % prints a nicely formatted list of issues, plus some additional
85 | % statistics about the code block.
86 | %
87 | % Depending on the type of code block (Function, Subfunction, Nested
88 | % Function, Class, Script) different kinds of statistics are reported.
89 | %
90 | % Additionally, many warnings are collected and presented, including
91 | % MLINT warnings from MLINTINFO.
92 |
93 | prefix = repmat(' ', 1, indentation);
94 | link = sprintf('Line %i, col %i', ...
95 | open_file_link(func.filename, func.name.line), ...
96 | func.name.line, func.name.col);
97 | fprintf('%s%s %s (%s):\n\n', ...
98 | prefix, func.type, func.name.text, link);
99 |
100 | functypes = {'Function', 'Subfunction', 'Nested Function'};
101 | if any(strcmp(func.type, functypes))
102 | stats = get_function_stats(func, mlintInfo);
103 | print_function_stats(stats, indentation+2);
104 | fprintf('\n');
105 | elseif strcmp(func.type, 'Class')
106 | stats = get_class_stats(func);
107 | print_class_stats(stats, indentation+2);
108 | fprintf('\n');
109 | elseif strcmp(func.type, 'Script')
110 | stats = get_script_stats(func);
111 | print_script_stats(stats, indentation+2);
112 | fprintf('\n');
113 | end
114 |
115 | reports = [report_documentation(func) ...
116 | report_comments(func.body) ...
117 | report_mlint_warnings(mlintInfo, func.body) ...
118 | report_indentation(func) ...
119 | report_line_length(func.body) ...
120 | report_variables(func.variables, func.body, 'variable') ...
121 | report_operators(func.body) ...
122 | report_eval(func.body)];
123 |
124 | if any(strcmp(func.type, functypes))
125 | reports = [reports ...
126 | report_variables(func.name, func.body, ...
127 | 'function') ...
128 | report_variables(func.arguments, func.body, ...
129 | 'function argument') ...
130 | report_variables(func.returns, func.body, ...
131 | 'return argument')];
132 | end
133 |
134 | if ~isempty(reports)
135 | % First, secondary sort by column
136 | report_tokens = [reports.token];
137 | [~, sort_idx] = sort([report_tokens.col]);
138 | reports = reports(sort_idx);
139 | % Second, primary sort by line (preserves secondary
140 | % sorting order in case of collisions)
141 | report_tokens = [reports.token];
142 | [~, sort_idx] = sort([report_tokens.line]);
143 | reports = reports(sort_idx);
144 | print_report(reports, indentation+2, func.filename);
145 | end
146 |
147 | fprintf('\n\n');
148 |
149 | for subfunc = func.children
150 | print_code_report(subfunc, mlintInfo, indentation+4)
151 | end
152 | end
153 |
154 |
155 | function class_stats = get_class_stats(class_struct)
156 | %GET_CLASS_STATS analyzes a script CLASS_STRUCT and
157 | % gathers some statistics CLASS_STATS about them.
158 | %
159 | % Statistics gathered (fieldname):
160 | % - number of lines (num_lines)
161 | % - number of properties (num_properties)
162 | % - number of methods (num_methods)
163 | %
164 | % The statistics are returned as struct CLASS_STATS
165 |
166 | class_stats.num_lines = length(split_lines(class_struct.body));
167 | class_stats.num_properties = length(class_struct.variables);
168 | class_stats.num_methods = length(class_struct.children);
169 | end
170 |
171 |
172 | function print_class_stats(class_stats, indentation)
173 | %PRINT_CLASS_STATS prints some general statistics CLSS_STATS about
174 | % a class. The printed text is indented at INDENTATION spaces.
175 | %
176 | % This function prints an evaluation of
177 | % - the number of lines in the function
178 | % - the number of properties
179 | % - the number of methods
180 | %
181 | % All of these values are evaluated as `good` if they are below a
182 | % certain low threshold; as `high` if they are above this threshold
183 | % and as `too high` and in red text if they exceed a high threshold.
184 | % The thresholds can be controlled using the settings
185 | % - `lo_class_num_lines` and `hi_class_num_lines`
186 | % - `lo_class_num_properties` and `hi_class_num_properties`
187 | % - `lo_class_num_methods` and `hi_class_num_methods`
188 |
189 | prefix = repmat(' ', 1, indentation);
190 |
191 | fprintf('%sNumber of lines: ', prefix);
192 | print_evaluation(class_stats.num_lines, ...
193 | check_settings('lo_class_num_lines'), ...
194 | check_settings('hi_class_num_lines'));
195 |
196 | fprintf('%sNumber of properties: ', prefix);
197 | print_evaluation(class_stats.num_properties, ...
198 | check_settings('lo_class_num_properties'), ...
199 | check_settings('hi_class_num_properties'));
200 |
201 | fprintf('%sNumber of methods: ', prefix);
202 | print_evaluation(class_stats.num_methods, ...
203 | check_settings('lo_class_num_methods'), ...
204 | check_settings('hi_class_num_methods'));
205 | end
206 |
207 |
208 | function script_stats = get_script_stats(script_struct)
209 | %GET_SCRIPT_STATS analyzes a script SCRIPT_STRUCT and
210 | % gathers some statistics SCRIPT_STATS about them.
211 | %
212 | % Statistics gathered (fieldname):
213 | % - number of lines (num_lines)
214 | % - number of variables used in the function (num_variables)
215 | % - the maximum level of indentation in the function (max_indentation)
216 | %
217 | % The statistics are returned as struct SCRIPT_STATS
218 |
219 | script_stats.num_lines = length(split_lines(script_struct.body));
220 | script_stats.num_variables = length(script_struct.variables);
221 |
222 | % max indentation
223 | keyword_indices = strcmp({script_struct.body.type}, 'keyword');
224 | keywords = script_struct.body(keyword_indices);
225 | indentation = 1;
226 | max_indentation = 0;
227 | for keyword = keywords
228 | if keyword.hasText({'if' 'for' 'parfor' 'while' 'switch'})
229 | indentation = indentation + 1;
230 | max_indentation = max(max_indentation, indentation);
231 | elseif keyword.hasText('end')
232 | indentation = indentation - 1;
233 | end
234 | end
235 | script_stats.max_indentation = max_indentation;
236 | end
237 |
238 |
239 | function print_script_stats(script_stats, indentation)
240 | %PRINT_SCRIPT_STATS prints some general statistics SCRIPT_STATS about
241 | % a script. The printed text is indented at INDENTATION spaces.
242 | %
243 | % This function prints an evaluation of
244 | % - the number of lines in the function
245 | % - the number of variables used in the script
246 | % - the maximum level of indentation in the script
247 | %
248 | % All of these values are evaluated as `good` if they are below a
249 | % certain low threshold; as `high` if they are above this threshold
250 | % and as `too high` and in red text if they exceed a high threshold.
251 | % The thresholds can be controlled using the settings
252 | % - `lo_script_num_lines` and `hi_script_num_lines`
253 | % - `lo_script_num_variables` and `hi_script_num_variables`
254 | % - `lo_script_max_indentation` and `hi_script_max_indentation`
255 | prefix = repmat(' ', 1, indentation);
256 |
257 | fprintf('%sNumber of lines: ', prefix);
258 | print_evaluation(script_stats.num_lines, ...
259 | check_settings('lo_script_num_lines'), ...
260 | check_settings('hi_script_num_lines'));
261 |
262 | fprintf('%sNumber of variables: ', prefix);
263 | print_evaluation(script_stats.num_variables, ...
264 | check_settings('lo_script_num_variables'), ...
265 | check_settings('hi_script_num_variables'));
266 |
267 | fprintf('%sNumber of variables: ', prefix);
268 | print_evaluation(script_stats.max_indentation, ...
269 | check_settings('lo_script_max_indentation'), ...
270 | check_settings('hi_script_max_indentation'));
271 | end
272 |
273 |
274 | function func_stats = get_function_stats(func_struct, mlintInfo)
275 | %GET_FUNCTION_STATS analyzes a function FUNC_STRUCT and MLINTINFO and
276 | % gathers some statistics FUNC_STATS about them.
277 | %
278 | % Statistics gathered (fieldname):
279 | % - number of lines (num_lines)
280 | % - number of function arguments (num_arguments)
281 | % - number of variables used in the function (num_variables)
282 | % - the maximum level of indentation in the function (max_indentation)
283 | % - the function complexity (complexity)
284 | %
285 | % The statistics are returned as struct FUNC_STATS
286 |
287 | func_stats.num_lines = length(split_lines(func_struct.body));
288 | func_stats.num_arguments = length(func_struct.arguments);
289 | func_stats.num_variables = length(func_struct.variables);
290 |
291 | % max indentation
292 | keyword_indices = strcmp({func_struct.body.type}, 'keyword');
293 | keywords = func_struct.body(keyword_indices);
294 | indentation = 1;
295 | max_indentation = 0;
296 | for keyword = keywords
297 | if keyword.hasText({'if' 'for' 'parfor' 'while' 'switch'})
298 | indentation = indentation + 1;
299 | max_indentation = max(max_indentation, indentation);
300 | elseif keyword.hasText('end')
301 | indentation = indentation - 1;
302 | end
303 | end
304 | func_stats.max_indentation = max_indentation;
305 |
306 | % cyclomatic complexity
307 | mlintInfo = mlintInfo(strcmp({mlintInfo.id}, 'CABE'));
308 | mlintInfo = mlintInfo([mlintInfo.line] == func_struct.body(1).line);
309 | assert(length(mlintInfo) == 1);
310 | pattern = '''(?[^'']+)'' is (?[0-9]+)';
311 | matches = regexp(mlintInfo.message, pattern, 'names');
312 | func_stats.complexity = str2double(matches.n);
313 | end
314 |
315 |
316 | function print_function_stats(func_stats, indentation)
317 | %PRINT_FUNCTION_STATS prints some general statistics FUNC_STATS about
318 | % a function. The printed text is indented at INDENTATION spaces.
319 | %
320 | % This function prints an evaluation of
321 | % - the number of lines in the function
322 | % - the number of function arguments
323 | % - the number of variables used in the function
324 | % - the maximum level of indentation in the function
325 | % - the function complexity
326 | %
327 | % All of these values are evaluated as `good` if they are below a
328 | % certain low threshold; as `high` if they are above this threshold
329 | % and as `too high` and in red text if they exceed a high threshold.
330 | % The thresholds can be controlled using the settings
331 | % - `lo_function_num_lines` and `hi_function_num_lines`
332 | % - `lo_function_num_arguments` and `hi_function_num_arguments`
333 | % - `lo_function_num_variables` and `hi_function_num_variables`
334 | % - `lo_function_max_indentation` and `hi_function_max_indentation`
335 | % - `lo_function_complexity` and `hi_function_complexity`
336 |
337 | prefix = repmat(' ', 1, indentation);
338 |
339 | fprintf('%sNumber of lines: ', prefix);
340 | print_evaluation(func_stats.num_lines, ...
341 | check_settings('lo_function_num_lines'), ...
342 | check_settings('hi_function_num_lines'));
343 |
344 | fprintf('%sNumber of function arguments: ', prefix);
345 | print_evaluation(func_stats.num_arguments, ...
346 | check_settings('lo_function_num_arguments'), ...
347 | check_settings('hi_function_num_arguments'));
348 |
349 | fprintf('%sNumber of used variables: ', prefix);
350 | print_evaluation(func_stats.num_variables, ...
351 | check_settings('lo_function_num_variables'), ...
352 | check_settings('hi_function_num_variables'));
353 |
354 | fprintf('%sMax level of nesting: ', prefix);
355 | print_evaluation(func_stats.max_indentation, ...
356 | check_settings('lo_function_max_indentation'), ...
357 | check_settings('hi_function_max_indentation'));
358 |
359 | fprintf('%sCode complexity: ', prefix);
360 | print_evaluation(func_stats.complexity, ...
361 | check_settings('lo_function_complexity'), ...
362 | check_settings('hi_function_complexity'));
363 | end
364 |
365 |
366 | function print_evaluation(value, low_thr, high_thr)
367 | %PRINT_EVALUATION prints an evaluation of VALUE.
368 | % LOW_THR and HIGH_THR mark thresholds, above which the value is
369 | % described as "(good)" -> "(high)" -> "(too high)" in red
370 |
371 | if value < low_thr
372 | fprintf('%i (good)\n', value);
373 | elseif value < high_thr
374 | fprintf('%i (high)\n', value);
375 | else
376 | fprintf('%i [\b(too high)]\b\n', value);
377 | end
378 | end
379 |
380 |
381 | function print_report(report, indentation, filename)
382 | %PRINT_REPORT prints the contents of REPORT at INDENTATION. Each REPORT
383 | % item is written as a link to the appropriate place in FILENAME.
384 |
385 | prefix = repmat(' ', 1, indentation);
386 |
387 | for report_entry = report
388 | % print severe report_entrys in red:
389 | % red text is created by surrounding it with `[` and
390 | % `]`. The `` will delete the preceding
391 | % bracket and not show up in the text itself, but it will be
392 | % interpreted as a flag to change the text color. This is an
393 | % ancient ASCII convention.
394 | if report_entry.severity == 2
395 | fprintf('%sLine %i, col %i: [\b%s]\b\n', ...
396 | prefix, ...
397 | open_file_link(filename, report_entry.token.line), ...
398 | report_entry.token.line, ...
399 | report_entry.token.col, ...
400 | report_entry.message);
401 |
402 | % print regular report_entrys in black:
403 | else
404 | fprintf('%sLine %i, col %i: %s\n', ...
405 | prefix, ...
406 | open_file_link(filename, report_entry.token.line), ...
407 | report_entry.token.line, ...
408 | report_entry.token.col, ...
409 | report_entry.message);
410 | end
411 | end
412 | end
413 |
414 |
415 | function report = report_comments(tokenlist)
416 | %REPORT_COMMENTS REPORTs on the number of comments in TOKENLIST.
417 | %
418 | % Comments should not describe the code itself, but provide context
419 | % for reading the code. In other words, they should describe the
420 | % *why*, not the *what.
421 | %
422 | % returns a struct array REPORT with fields `token`, `message`, and
423 | % `severity`.
424 | %
425 | % This check can be switched off by setting `do_check_comments` in
426 | % CHECK_SETTINGS to FALSE.
427 |
428 | report = struct('token', {}, 'severity', {}, 'message', {});
429 | if ~check_settings('do_check_comments')
430 | return
431 | end
432 |
433 | linelist = split_lines(tokenlist);
434 | num_lines = length(linelist);
435 | num_comments = 0;
436 | for line_idx = 1:length(linelist)
437 | line_tokens = linelist{line_idx};
438 | if any(strcmp({line_tokens.type}, 'comment'))
439 | num_comments = num_comments + 1;
440 | end
441 | end
442 |
443 | usage = sprintf('(%i comments for %i lines of code)', ...
444 | num_comments, num_lines);
445 | if num_comments/num_lines < 0.1
446 | report = struct('token', tokenlist(1), ...
447 | 'severity', 2, ...
448 | 'message', ['too few comments ' usage]);
449 | elseif num_comments/num_lines < 0.2
450 | report = struct('token', tokenlist(1), ...
451 | 'severity', 1, ...
452 | 'message', ['very few comments ' usage]);
453 | end
454 | end
455 |
456 |
457 | function report = report_documentation(func_struct)
458 | %REPORT_DOCUMENTATION REPORTs on problems with the documentation of the
459 | % function in FUNC_STRUCT.
460 | %
461 | % Documentation is very important for humans. Code is not primarily
462 | % written for the machine to execute, but mostly for humans to read.
463 | % But many ideas are more efficiently described in prose than in code,
464 | % hence we write documentation. Functions in particular should always
465 | % be documented.
466 | %
467 | % Problems might be:
468 | % - the function name is not mentioned in the documentation
469 | % - the function arguments are not mentioned
470 | % - the function return values are not mentioned
471 | % - there is no documentation
472 | %
473 | % returns a struct array REPORT with fields `token`, `message`, and
474 | % `severity`.
475 | %
476 | % This check can be switched off by setting `do_check_documentation` in
477 | % CHECK_SETTINGS to FALSE.
478 |
479 | report = struct('token', {}, 'severity', {}, 'message', {});
480 | if ~check_settings('do_check_documentation')
481 | return
482 | end
483 |
484 | doc_text = get_function_documentation(func_struct.body);
485 | if isempty(doc_text)
486 | msg = 'there is no documentation';
487 | report = [report struct('token', func_struct.body(1), ...
488 | 'severity', 2, ...
489 | 'message', msg)];
490 | return
491 | end
492 | template = '%s ''%s'' is not mentioned in the documentation';
493 | [~, funcname, ~] = fileparts(func_struct.name.text);
494 | if isempty(strfind(lower(doc_text), lower(funcname)))
495 | msg = sprintf(template, 'function name', func_struct.name.text);
496 | report = [report struct('token', func_struct.name, ...
497 | 'severity', 2, ...
498 | 'message', msg)];
499 | end
500 | for variable = func_struct.arguments
501 | if isempty(strfind(lower(doc_text), lower(variable.text))) && ...
502 | ~strcmp(doc_text, 'varargin')
503 | msg = sprintf(template, 'function argument', variable.text);
504 | report = [report struct('token', variable, ...
505 | 'severity', 2, ...
506 | 'message', msg)]; %#ok
507 | end
508 | end
509 | for variable = func_struct.returns
510 | if isempty(strfind(lower(doc_text), lower(variable.text))) && ...
511 | ~strcmp(doc_text, 'varargout')
512 | msg = sprintf(template, 'return argument', variable.text);
513 | report = [report struct('token', variable, ...
514 | 'severity', 2, ...
515 | 'message', msg)]; %#ok
516 | end
517 | end
518 | end
519 |
520 |
521 | function doc_text = get_function_documentation(tokenlist)
522 | %GET_FUNCTION_DOCUMENTATION extracts function documentation from TOKENLIST
523 | %
524 | % returns DOC_TEXT as a string
525 |
526 | % skip function declaration
527 | token_idx = 1;
528 | while token_idx <= length(tokenlist) && ...
529 | ~tokenlist(token_idx).isEqual('pair', ')')
530 | token_idx = token_idx + 1;
531 | end
532 | token_idx = token_idx + 2;
533 |
534 | % find documentation
535 | doc_types = {'comment' 'space' 'linebreak'};
536 | start = token_idx;
537 | while token_idx <= length(tokenlist) && ...
538 | tokenlist(token_idx).hasType(doc_types)
539 | token_idx = token_idx + 1;
540 | end
541 |
542 | % extract documentation text
543 | comment_tokens = tokenlist(start:token_idx-1);
544 | comment_tokens = ...
545 | comment_tokens(strcmp({comment_tokens.type}, 'comment'));
546 | doc_text = [comment_tokens.text];
547 | end
548 |
549 |
550 | function report = report_eval(tokenlist)
551 | %REPORT_EVAL REPORTs on uses of `eval` in TOKENLIST.
552 | %
553 | % Using `eval` is *never* the right thing to do. There is *always*
554 | % a better way. Seriously.
555 | %
556 | % returns a struct array REPORT with fields `token`, `message`, and
557 | % `severity`.
558 | %
559 | % This check can be switched off by setting `do_check_eval` in
560 | % CHECK_SETTINGS to FALSE.
561 |
562 | report = struct('token', {}, 'severity', {}, 'message', {});
563 | if ~check_settings('do_check_eval')
564 | return
565 | end
566 |
567 | eval_tokens = tokenlist(strcmp({tokenlist.text}, 'eval') & ...
568 | strcmp({tokenlist.type}, 'identifier'));
569 | for t = eval_tokens
570 | msg = 'Eval should never be used';
571 | report = [report struct('token', t, ...
572 | 'severity', 2, ...
573 | 'message', msg)]; %#ok
574 | end
575 | end
576 |
577 |
578 | function report = report_operators(tokenlist)
579 | %REPORT_OPERATORS reports on incorrectly used operators in TOKENLIST
580 | %
581 | % To improve readability, operators should be treated like punctuation
582 | % in regular English, i.e. be preceded and followed by spaces just like
583 | % in English and math. In particular:
584 | % - relational operators such as `>`, `<`, `==`, `~=`, `<=`, `>=`, `=`,
585 | % `||`, and `&&` should be surrounded by spaces.
586 | % - punctuation such as `,` and `;` should be followed by a space.
587 | % - unary operators such as `@` and `...` should be preceded by a space.
588 | %
589 | % returns a struct array REPORT with fields `token`, `message`, and
590 | % `severity`.
591 | %
592 | % This check can be switched off by setting `do_check_operators` in
593 | % CHECK_SETTINGS to FALSE.
594 |
595 | report = struct('token', {}, 'severity', {}, 'message', {});
596 | if ~check_settings('do_check_operators')
597 | return
598 | end
599 |
600 | space_around_operators = { '>' '<' '==' '>=' '<=' '~=' ...
601 | '=' '||' '&&'};
602 | space_after_operators = { ',' ';' };
603 | space_before_operators = { '@' '...' };
604 |
605 | op_indices = find(strcmp({tokenlist.type}, 'punctuation'));
606 | for op_idx = op_indices
607 | has_space_before = op_idx > 1 && ...
608 | tokenlist(op_idx-1).hasType('space');
609 | has_space_after = op_idx < length(tokenlist) && ...
610 | tokenlist(op_idx+1).hasType('space');
611 | has_newline_after = op_idx < length(tokenlist) && ...
612 | tokenlist(op_idx+1).hasText(sprintf('\n'));
613 | if tokenlist(op_idx).hasText(space_around_operators) && ...
614 | (~has_space_before || ~has_space_after)
615 | msg = sprintf('no spaces around operator ''%s''', ...
616 | tokenlist(op_idx).text);
617 | report = [report struct('token', tokenlist(op_idx), ...
618 | 'severity', 1, ...
619 | 'message', msg)]; %#ok
620 | elseif tokenlist(op_idx).hasText(space_after_operators) && ...
621 | ~has_space_after && ~has_newline_after
622 | msg = sprintf('no spaces after operator ''%s''', ...
623 | tokenlist(op_idx).text);
624 | report = [report struct('token', tokenlist(op_idx), ...
625 | 'severity', 1, ...
626 | 'message', msg)]; %#ok
627 | elseif tokenlist(op_idx).hasText(space_before_operators) && ...
628 | ~has_space_before
629 | msg = sprintf('no spaces before operator ''%s''', ...
630 | tokenlist(op_idx).text);
631 | report = [report struct('token', tokenlist(op_idx), ...
632 | 'severity', 1, ...
633 | 'message', msg)]; %#ok
634 | end
635 | end
636 | end
637 |
638 |
639 | function report = report_variables(varlist, tokenlist, description)
640 | %REPORT_VARIABLES checks all variables in VARLIST, as used in TOKENLIST,
641 | % and REPORTs on problems with these variables. DESCRIPTION is used
642 | % to describe the variable in REPORT.
643 | %
644 | % Problems with variables can be:
645 | % - The variable shadows a built-in
646 | % - The variable has a very short name and is used very often.
647 | %
648 | % In general, variable name lengths should correlate with the amount
649 | % of code they are used in. If variables are used over a long piece
650 | % of code, the programmer will stumble across the variable often,
651 | % and it should have a descriptive name. Short variable names are
652 | % only allowed if they are ephemeral, such as loop counters in small
653 | % loops. There, they don't need to be remembered for long, thus a short
654 | % name is permissible.
655 | %
656 | % returns a struct array REPORT with fields `token`, `message`, and
657 | % `severity`.
658 | %
659 | % This check can be switched off by setting `do_check_variables` in
660 | % CHECK_SETTINGS to FALSE.
661 |
662 | report = struct('token', {}, 'severity', {}, 'message', {});
663 | if ~check_settings('do_check_variables')
664 | return
665 | end
666 |
667 | for variable = varlist
668 | if does_shadow(variable.text) && ...
669 | ~any(strcmp(variable.text, {'varargin', 'varargout'}))
670 | msg = sprintf('%s ''%s'' shadows a built-in', ...
671 | description, variable.text);
672 | report = [report struct('token', variable, ...
673 | 'severity', 2, ...
674 | 'message', msg)]; %#ok
675 | end
676 | [numuses, spread] = get_variable_usage(variable.text, tokenlist);
677 | usage_descr = sprintf('(used %i times across %i lines)', ...
678 | numuses, spread);
679 | varlen = length(variable.text);
680 |
681 | short_spread = check_settings('lo_varname_short_spread');
682 | short_length = check_settings('lo_varname_short_length');
683 | long_spread = check_settings('lo_varname_long_spread');
684 | long_length = check_settings('lo_varname_long_length');
685 | slightly_too_short = ...
686 | (spread > short_spread && varlen <= short_length) || ...
687 | (spread > long_spread && varlen <= long_length);
688 |
689 | short_spread = check_settings('hi_varname_short_spread');
690 | short_length = check_settings('hi_varname_short_length');
691 | long_spread = check_settings('hi_varname_long_spread');
692 | long_length = check_settings('hi_varname_long_length');
693 | much_too_short = ...
694 | (spread > short_spread && varlen <= short_length) || ...
695 | (spread > long_spread && varlen <= long_length);
696 |
697 |
698 | if slightly_too_short
699 | msg = sprintf('%s ''%s'' is very short %s', ...
700 | description, variable.text, usage_descr);
701 | report = [report struct('token', variable, ...
702 | 'severity', 1, ...
703 | 'message', msg)]; %#ok
704 | elseif much_too_short
705 | msg = sprintf('%s ''%s'' is too short %s', ...
706 | description, variable.text, usage_descr);
707 | report = [report struct('token', variable, ...
708 | 'severity', 2, ...
709 | 'message', msg)]; %#ok
710 | end
711 | end
712 | end
713 |
714 |
715 | function [numuses, linerange] = get_variable_usage(varname, tokenlist)
716 | %GET_VARIABLE_USAGE finds all uses of variable VARNAME in TOKENLIST
717 | % Returns the number of uses NUMUSES and the range of lines LINERANGE
718 | % in which the variable is used.
719 |
720 | uses = tokenlist(strcmp({tokenlist.text}, varname) & ...
721 | strcmp({tokenlist.type}, 'identifier'));
722 | numuses = length(uses);
723 | linelist = [uses.line];
724 | linerange = max(linelist)-min(linelist);
725 | end
726 |
727 |
728 | function report = report_mlint_warnings(mlint_info, tokenlist)
729 | %REPORT_MLINT_WARNINGS reads through MLINT_INFO and REPORTs on all messages
730 | % that refer to the code in TOKENLIST.
731 | %
732 | % returns a struct array REPORT with fields `token`, `message`, and
733 | % `severity`.
734 | %
735 | % This check can be switched off by setting `do_check_mlint_warnings` in
736 | % CHECK_SETTINGS to FALSE.
737 |
738 | report = struct('token', {}, 'severity', {}, 'message', {});
739 | if ~check_settings('do_check_mlint_warnings')
740 | return
741 | end
742 |
743 | mlint_info = mlint_info([mlint_info.line] >= tokenlist(1).line);
744 | mlint_info = mlint_info([mlint_info.line] <= tokenlist(end).line);
745 | mlint_info = mlint_info(~strcmp({mlint_info.id}, 'CABE'));
746 | if isempty(mlint_info)
747 | return
748 | end
749 | for idx = 1:length(mlint_info)
750 | mlint_msg = mlint_info(idx);
751 | token = Token('special', 'mlint warning', ...
752 | mlint_msg.line, mlint_msg.column(1));
753 | report = [report struct('token', token, ...
754 | 'severity', 2, ...
755 | 'message', mlint_msg.message)]; %#ok
756 | end
757 | end
758 |
759 |
760 | function is_builtin = does_shadow(varname)
761 | %DOES_SHADOW figures out if variable with name VARNAME shadows a built-in
762 | % function or variable.
763 | %
764 | % returns a boolean IS_BUILTIN.
765 |
766 | if any(exist(varname) == [2 3 4 5 6 8]) %#ok
767 | % now we know that something with name `varname` exists. But is it
768 | % a built-in, or something I wrote?
769 | % `which` can tell, in one of three spellings:
770 | shadows = which(varname, '-all');
771 | builtinfun = 'is a built-in method';
772 | builtinstr = 'built-in';
773 | for idx = 1:length(shadows)
774 | shadow = shadows{idx};
775 | if ( length(shadow) >= length(matlabroot) && ...
776 | strcmp(shadow(1:length(matlabroot)), matlabroot) ) || ...
777 | ( length(shadow) >= length(builtinstr) && ...
778 | strcmp(shadow(1:length(builtinstr)), builtinstr) ) || ...
779 | ( length(shadow) >= length(builtinfun) && ...
780 | strcmp(shadow(end-length(builtinfun)+1:end), builtinfun) )
781 | is_builtin = true;
782 | return
783 | end
784 | end
785 | end
786 | is_builtin = false;
787 | end
788 |
789 |
790 | function report = report_line_length(tokenlist)
791 | %REPORT_LINE_LENGTH walks through TOKENLIST and REPORTs on the length of
792 | % all lines.
793 | %
794 | % While line length should not matter with today's high-resolution
795 | % displays, it is still useful to limit line lengths in order to be
796 | % able to fit several editor panes next to one another, or to be able
797 | % print the source code.
798 | %
799 | % - By default, lines longer than 75 characters are flagged
800 | % as `very long`, and
801 | % - lines longer than 90 characters are flagged as `too long`.
802 | %
803 | % returns a struct array REPORT with fields `token`, `message`, and
804 | % `severity`.
805 | %
806 | % This check can be switched off by setting `do_check_line_length` in
807 | % CHECK_SETTINGS to FALSE.
808 |
809 | report = struct('token', {}, 'message', {}, 'severity', {});
810 | if ~check_settings('do_check_line_length')
811 | return
812 | end
813 | lo_line_length = check_settings('lo_line_length');
814 | hi_line_length = check_settings('hi_line_length');
815 |
816 | linelist = split_lines(tokenlist);
817 | for line_idx = 1:length(linelist)
818 | line_tokens = linelist{line_idx};
819 | line_text = [line_tokens.text];
820 | if length(line_text) > lo_line_length
821 | report_token = Token('special', 'line warning', ...
822 | line_tokens(1).line, ...
823 | length(line_text));
824 | report = [report struct('token', report_token, ...
825 | 'message', 'line very long', ...
826 | 'severity', 1)]; %#ok
827 | elseif length(line_text) > hi_line_length
828 | report_token = Token('special', 'line warning', ...
829 | line_tokens(1).line, ...
830 | length(line_text));
831 | report = [report struct('token', report_token, ...
832 | 'message', 'line too long', ...
833 | 'severity', 2)]; %#ok
834 | end
835 | end
836 | end
837 |
838 |
839 | function report = report_indentation(func_struct)
840 | %REPORT_INDENTATION parses FUNC_STRUCT and REPORTs about its indentation.
841 | %
842 | % Indentation is one of the primary means of making code easy to read,
843 | % by highlighting the structure of the code. If code is not indented
844 | % correctly, it can be hard to see where where nested blocks (if, for,
845 | % etc.) begin and end.
846 | %
847 | % The first line is assumed to be indented correctly, and subsequent
848 | % indentation follows the normal MATLAB indentation rules:
849 | %
850 | % - Indent after `for`, `parfor`, `while`, `if`, `switch`, `classdef`,
851 | % `events`, `properties`, `enumeration`, `methods`,
852 | % `function`.
853 | % - Dedent for `end`
854 | % - Dedent momentarily for `else`, `elseif`, `case`, `otherwise`.
855 | % - Comments are allowed to be indented one level out, and any amount of
856 | % deeper indentation than the source code.
857 | % - Continuation lines must be indented deeper than the surrounding
858 | % source code.
859 | %
860 | % returns a struct array REPORT with fields `token`, `message`, and
861 | % `severity`.
862 | %
863 | % This check can be switched off by setting `do_check_indentation` in
864 | % CHECK_SETTINGS to FALSE.
865 | %
866 | % The setting `indentation_check_like_matlab` controls whether
867 | % indentation should be checked like MATLAB does it (top-level function
868 | % bodies are not indented in function files) or how every other language
869 | % on this planet does it (function bodies are always indented).
870 |
871 | report = struct('token', {}, 'message', {}, 'severity', {});
872 | if ~check_settings('do_check_indentation')
873 | return
874 | end
875 |
876 | linelist = split_lines(func_struct.body);
877 |
878 | nesting = func_struct.nesting;
879 | function_nesting = func_struct.nesting;
880 |
881 | is_switch_nesting = false;
882 |
883 | for line_idx = 1:length(linelist)
884 | line_tokens = linelist{line_idx};
885 | is_continuation = is_continuation_line(line_idx, linelist);
886 |
887 | if isempty(line_tokens)
888 | continue
889 | end
890 |
891 | first_nonspace = get_first_nonspace(line_tokens);
892 |
893 |
894 | if ~is_continuation
895 | [nesting, function_nesting, correction] = ...
896 | indentation_rule(nesting, function_nesting, first_nonspace);
897 |
898 | % Special case for switch
899 | if first_nonspace.isEqual('keyword', 'switch')
900 | % Increment nesting by 1 for switch statement
901 | nesting = nesting + 1;
902 | correction = correction - 1;
903 | is_switch_nesting = true;
904 | end
905 |
906 | if first_nonspace.isEqual('keyword', 'end') && is_switch_nesting
907 | % Reverse nesting increment for switch statement at 'end'
908 | nesting = nesting - 1;
909 | is_switch_nesting = false;
910 | end
911 | end
912 |
913 | increment = check_settings('indentation_step');
914 | expected_indent = (nesting+correction) * increment;
915 | expected_indent = max(expected_indent, 0);
916 |
917 | current_indent = get_line_indentation(line_tokens);
918 |
919 | incorrect_comment = ...
920 | first_nonspace.hasType('comment') && ...
921 | ~(current_indent >= expected_indent) && ...
922 | current_indent ~= expected_indent-increment;
923 | incorrect_normal_line = ...
924 | ~first_nonspace.hasType('comment') && ...
925 | ~is_continuation && ...
926 | current_indent ~= expected_indent;
927 | incorrect_continuation_line = ...
928 | ~first_nonspace.hasType('comment') && ...
929 | is_continuation && ...
930 | current_indent <= expected_indent;
931 |
932 | if incorrect_comment || incorrect_normal_line || ...
933 | incorrect_continuation_line
934 | report_token = Token('special', 'indentation warning', ...
935 | line_tokens(1).line, line_tokens(1).col);
936 | report_entry = struct('token', report_token, ...
937 | 'message', 'incorrect indentation', ...
938 | 'severity', 2);
939 | report = [report report_entry]; %#ok
940 | end
941 | end
942 | end
943 |
944 |
945 | function yesNo = is_continuation_line(line_idx, linelist)
946 | %IS_CONTINUATION_LINE checks if LINELIST{LINE_IDX} is a continuation
947 | % of the previous line. YESNO is a boolean.
948 |
949 | if line_idx > 1
950 | previous_line = linelist{line_idx-1};
951 | yesNo = any(strcmp({previous_line.text}, '...'));
952 | else
953 | yesNo = false;
954 | end
955 | end
956 |
957 |
958 | function [nesting, function_nesting, correction] = indentation_rule(nesting, function_nesting, first_token)
959 | %INDENTATION_RULE decides about the indentation of the current line
960 | % NESTING and FUNCTION_NESTING will change depending on the
961 | % FIRST_TOKEN on the current line.
962 | %
963 | % NESTING holds the current nesting within if/for/function blocks and
964 | % FUNCTION_NESTING holds the current nesting within function blocks.
965 | % CORRECTION is an offset on NESTING for the current line only.
966 | %
967 | % In case of scripts and class files, FUNCTION_NESTING is
968 | % effectively ignored. In case of function files, FUNCTION_NESTING
969 | % is used to determine whether the current function is a top-level
970 | % function (whose body should not be indented) or a nested function
971 | % (whose body should be indented).
972 | %
973 | % All indentations are given and returned as integer levels of
974 | % indentation. Depending on your editor setup, one level might correspond
975 | % to 2, 3, 4, or 8 spaces.
976 | %
977 | % The correct indentation for the current line is (by default):
978 | % (nesting + correction)*4 spaces
979 |
980 | beginnings = check_settings('beginnings');
981 | middles = check_settings('middles');
982 |
983 | % deactivate function file rules in class files:
984 | if first_token.isEqual('keyword', 'classdef')
985 | function_nesting = nan;
986 | end
987 |
988 | if ~check_settings('indentation_check_like_matlab')
989 | function_nesting = nan;
990 | end
991 |
992 | % beginning of a function:
993 | if first_token.isEqual('keyword', 'function')
994 | function_nesting = function_nesting + 1;
995 | nesting = nesting + 1;
996 | correction = -1;
997 | % any other beginning:
998 | elseif first_token.isEqual('keyword', beginnings)
999 | nesting = nesting + 1;
1000 | correction = -1;
1001 | % end of a function in:
1002 | elseif first_token.isEqual('keyword', 'end') && ...
1003 | nesting == function_nesting
1004 | function_nesting = function_nesting - 1;
1005 | nesting = nesting - 1;
1006 | if function_nesting == 1
1007 | correction = +1;
1008 | else
1009 | correction = 0;
1010 | end
1011 | % any other end:
1012 | elseif first_token.isEqual('keyword', 'end')
1013 | nesting = nesting - 1;
1014 | correction = 0;
1015 | % any middle (else, elseif, case):
1016 | elseif first_token.isEqual('keyword', middles)
1017 | correction = -1;
1018 | % a normal line:
1019 | else
1020 | correction = 0;
1021 | end
1022 |
1023 | % if this is in a top-level function:
1024 | if function_nesting == 1
1025 | correction = correction - 1;
1026 | end
1027 | end
1028 |
1029 |
1030 | function indentation = get_line_indentation(line_tokens)
1031 | %GET_LINE_INDENTATION returns the number of spaces at the beginning of
1032 | % LINE_TOKENS. INDENTATION is an integer.
1033 |
1034 | if ~isempty(line_tokens) && line_tokens(1).hasType('space')
1035 | indentation = length(line_tokens(1).text);
1036 | else
1037 | indentation = 0;
1038 | end
1039 | end
1040 |
1041 |
1042 | function token = get_first_nonspace(tokenlist)
1043 | %GET_FIRST_NONSPACE returns the first TOKEN in TOKENLIST that is not a
1044 | % token of type space.
1045 | % This can be useful to return the first "real" token on a line.
1046 |
1047 | token_idx = 1;
1048 | while token_idx < length(tokenlist) && ...
1049 | tokenlist(token_idx).hasType('space')
1050 | token_idx = token_idx + 1;
1051 | end
1052 | token = tokenlist(token_idx);
1053 | end
1054 |
1055 |
1056 | function linelist = split_lines(tokens)
1057 | %SPLIT_LINES splits TOKENS into lines.
1058 | % returns a cell array LINELIST of Token-arrays.
1059 |
1060 | linelist = {};
1061 | line_start = 1;
1062 | linebreaks = {sprintf('\n'), sprintf('\r\n')};
1063 | for pos = 1:length(tokens)+1
1064 | if pos == length(tokens)+1 || ...
1065 | tokens(pos).isEqual('linebreak', linebreaks)
1066 | linelist = [linelist {tokens(line_start:pos-1)}]; %#ok
1067 | line_start = pos + 1;
1068 | end
1069 | end
1070 | end
1071 |
1072 |
1073 | function link = open_file_link(filename, linenum)
1074 | %OPEN_FILE_LINK returns a link target for HTML links
1075 | % the LINK is supposed to be used in ... links inside
1076 | % MATLAB. It will generate a linke that opens FILENAME at LINENUM in the
1077 | % MATLAB editor.
1078 |
1079 | prefix = 'matlab.desktop.editor.openAndGoToLine';
1080 | link = sprintf('matlab:%s(''%s'', %i);', prefix, filename, linenum);
1081 | end
1082 |
--------------------------------------------------------------------------------
/check_settings.m:
--------------------------------------------------------------------------------
1 | function value = check_settings(name)
2 | %CHECK_SETTINGS returns settings vor CHECK.
3 | % CHECK_SETTINGS(NAME) returns the VALUE of the settings called NAME.
4 | %
5 | % Create a local copy of this file and overwrite values if you want
6 | % custom behavior in a specific project.
7 |
8 | % thresholds for the number of lines in classes:
9 | settings.lo_class_num_lines = 200;
10 | settings.hi_class_num_lines = 400;
11 | % thresholds for the number of properties in classes:
12 | settings.lo_class_num_properties = 10;
13 | settings.hi_class_num_properties = 15;
14 | % thresholds for the number of methods in classes:
15 | settings.lo_class_num_methods = 10;
16 | settings.hi_class_num_methods = 20;
17 |
18 | % thresholds for the number of lines in scripts:
19 | settings.lo_script_num_lines = 100;
20 | settings.hi_script_num_lines = 200;
21 | % thresholds for the number of variables in scripts:
22 | settings.lo_script_num_variables = 10;
23 | settings.hi_script_num_variables = 20;
24 | % thresholds for the level of indentation in scripts:
25 | settings.lo_script_max_indentation = 4;
26 | settings.hi_script_max_indentation = 8;
27 |
28 | % thresholds for the number of lines in functions:
29 | settings.lo_function_num_lines = 50;
30 | settings.hi_function_num_lines = 100;
31 | % thresholds for the number of arguments in functions:
32 | settings.lo_function_num_arguments = 3;
33 | settings.hi_function_num_arguments = 5;
34 | % thresholds for the number of variables in functions:
35 | settings.lo_function_num_variables = 7;
36 | settings.hi_function_num_variables = 15;
37 | % thresholds for the level of indentation in functions:
38 | settings.lo_function_max_indentation = 3;
39 | settings.hi_function_max_indentation = 6;
40 | % thresholds for the complexity of functions:
41 | settings.lo_function_complexity = 10;
42 | settings.hi_function_complexity = 15;
43 |
44 | % thresholds for the line length of files:
45 | settings.lo_line_length = 75;
46 | settings.hi_line_length = 90;
47 |
48 | % threshold for the variable length and spread (spread is the
49 | % number of lines in which a variable is used).
50 | % Read this as "if a variable name is less than 3 characters
51 | % long, it should be use in no more than 3 lines":
52 | settings.lo_varname_short_length = 3;
53 | settings.lo_varname_short_spread = 3;
54 | settings.lo_varname_long_length = 5;
55 | settings.lo_varname_long_spread = 10;
56 | settings.hi_varname_short_length = 3;
57 | settings.hi_varname_short_spread = 5;
58 | settings.hi_varname_long_length = 5;
59 | settings.hi_varname_long_spread = 15;
60 |
61 | % switches to switch whole modules on or off:
62 | settings.do_check_comments = true;
63 | settings.do_check_documentation = true;
64 | settings.do_check_eval = true;
65 | settings.do_check_operators = true;
66 | settings.do_check_variables = true;
67 | settings.do_check_mlint_warnings = true;
68 | settings.do_check_line_length = true;
69 | settings.do_check_indentation = true;
70 |
71 | % indent by this many spaces per level of indentation:
72 | settings.indentation_step = 4;
73 | % Matlab does not indent top-level function bodies. Most other
74 | % languages would think this behavior funny:
75 | settings.indentation_check_like_matlab = true;
76 |
77 | % keywords for tokenize_code
78 | settings.keywords = {'for' 'try' 'while' 'if' 'else' 'elseif' 'switch' ...
79 | 'case' 'otherwise' 'function' 'classdef' 'methods' ...
80 | 'properties' 'events' 'enumeration' 'parfor' ...
81 | 'return' 'break' 'continue' 'catch', 'arguments'};
82 |
83 | % keyword beginnings which are considered for indentation calculation
84 | settings.beginnings = {'for' 'parfor' 'while' 'if' 'switch' 'classdef' ...
85 | 'events' 'properties' 'enumeration' 'methods' ...
86 | 'function' 'try', 'arguments'};
87 | % keyword middles which are considered for indentation calculation
88 | settings.middles = {'else' 'elseif' 'case' 'otherwise' 'catch'};
89 |
90 | value = settings.(name);
91 | end
92 |
--------------------------------------------------------------------------------
/run_unittests.m:
--------------------------------------------------------------------------------
1 | function run_unittests()
2 | %RUN_UNITTESTS Runs all unit tests
3 |
4 | import matlab.unittest.TestSuite
5 | import matlab.unittest.TestRunner
6 |
7 | try
8 | % Create a test suite
9 | suite = ...
10 | TestSuite.fromPackage('UnitTest', ...
11 | 'IncludingSubpackages', true);
12 |
13 | % Run all tests
14 | runner = TestRunner.withTextOutput;
15 | result = runner.run(suite);
16 |
17 | % Display results
18 | disp(table(result));
19 | disp(result);
20 |
21 | % Throw an error if any test failed
22 | if sum([result(:).Failed]) + sum([result(:).Incomplete]) > 0
23 | error('There are failing unittests!')
24 | end
25 | catch err
26 | disp(err.getReport)
27 | end
28 | end
29 |
--------------------------------------------------------------------------------
/testFiles/MatlabArgumentClass.m:
--------------------------------------------------------------------------------
1 | classdef MatlabArgumentClass < matlab.mixin.Heterogeneous
2 | %MATLABARGUMENTCLASS This is an example class for testing the
3 | % argument validation
4 | %
5 | % Some more comments to make the checker happy
6 | % Some more comments to make the checker happy
7 | % returns a new OBJ.
8 |
9 | %
10 | %
11 | properties (Access = private)
12 | property1 (1,1) string = "Hello World"
13 |
14 | property2 (1,:) char = 'Hello World'
15 |
16 | property3 {mustBeTextScalar}
17 | end
18 |
19 | methods (Access = protected)
20 |
21 | function obj = foo_function(input1, input2, options)
22 | %FOO_FUNCTION This is an example function for testing the
23 | % indentation check
24 | % output1 = foo_function: input1, input2
25 | % Some more comments to make the checker happy
26 |
27 | arguments
28 | input1 (1,1) string
29 | input2 {mustBeText}
30 | options.?matlab.mixin.Heterogeneous
31 | end
32 |
33 | try
34 | input1 = 42;
35 | catch
36 | input2 = 42;
37 | end
38 | % Some more comments to make the checker happy
39 | if input1
40 | obj = 1;
41 | elseif input2
42 | obj = 2;
43 | else
44 | obj = 0;
45 | end
46 |
47 | obj.property3 = options;
48 |
49 | end
50 |
51 | function foobar = second_function(barfoo)
52 | %SECOND_FUNCTION This is an example function for testing the
53 | % indentation check
54 | % foobar, barfoo
55 | foobar = barfoo;
56 | end
57 |
58 | function varargout = variable_length_of_in_and_output(varargin)
59 | %VARIABLE_LENGTH_OF_IN_AND_OUTPUT is provided with input param
60 | % VARARGIN and output parameter VARARGOUT
61 | varargout = varargin;
62 | end
63 |
64 | function output = test_linebreak_with_continuation_operator(inputarg)
65 | %TEST_LINEBREAK_WITH_CONTINUATION_OPERATOR is a test to verify
66 | % line continuation operator
67 | % INPUTARG, OUTPUT
68 |
69 | assignment_at_first_line = ...
70 | inputarg;
71 |
72 | assignment_at_second_line = ... some comment
73 | assignment_at_first_line;
74 |
75 | output = .... 4 dots give also comment
76 | assignment_at_second_line;
77 | end
78 |
79 | function test_switch_case(inputarg)
80 | %TEST_SWITCH_CASE test indentation of switch case
81 | % INPUTARG
82 | % Some more comments to make the checker happy
83 |
84 | switch inputarg
85 | case 1
86 | return
87 | case 2
88 | return
89 | otherwise
90 | return
91 | end
92 | end
93 | end
94 | end
95 |
--------------------------------------------------------------------------------
/testFiles/MatlabIndentedClass.m:
--------------------------------------------------------------------------------
1 | classdef MatlabIndentedClass
2 | %MATLABINDENTEDCLASS This is an example class for testing the
3 | % indentation check
4 | %
5 | % Some more comments to make the checker happy
6 | % Some more comments to make the checker happy
7 | % returns a new OBJ.
8 |
9 | %
10 | %
11 | properties(Access = private)
12 | foobar
13 | end
14 |
15 | methods(Access = protected)
16 |
17 | function output1 = foo_function(input1, input2)
18 | %FOO_FUNCTION This is an example function for testing the
19 | % indentation check
20 | % output1 = foo_function: input1, input2
21 | % Some more comments to make the checker happy
22 |
23 | try
24 | input1 = 42;
25 | catch
26 | input2 = 42;
27 | end
28 | % Some more comments to make the checker happy
29 | if input1
30 | output1 = 1;
31 | elseif input2
32 | output1 = 2;
33 | else
34 | output1 = 0;
35 | end
36 |
37 | end
38 |
39 | function foobar = second_function(barfoo)
40 | %SECOND_FUNCTION This is an example function for testing the
41 | % indentation check
42 | % foobar, barfoo
43 | foobar = barfoo;
44 | end
45 |
46 | function varargout = variable_length_of_in_and_output(varargin)
47 | %VARIABLE_LENGTH_OF_IN_AND_OUTPUT is provided with input param
48 | % VARARGIN and output parameter VARARGOUT
49 | varargout = varargin;
50 | end
51 |
52 | function output = test_linebreak_with_continuation_operator(inputarg)
53 | %TEST_LINEBREAK_WITH_CONTINUATION_OPERATOR is a test to verify
54 | % line continuation operator
55 | % INPUTARG, OUTPUT
56 |
57 | assignment_at_first_line = ...
58 | inputarg;
59 |
60 | assignment_at_second_line = ... some comment
61 | assignment_at_first_line;
62 |
63 | output = .... 4 dots give also comment
64 | assignment_at_second_line;
65 | end
66 |
67 | function test_switch_case(inputarg)
68 | %TEST_SWITCH_CASE test indentation of switch case
69 | % INPUTARG
70 | % Some more comments to make the checker happy
71 |
72 | switch inputarg
73 | case 1
74 | return
75 | case 2
76 | return
77 | otherwise
78 | return
79 | end
80 | end
81 | end
82 | end
83 |
--------------------------------------------------------------------------------
/test_MatlabIndentedClass.m:
--------------------------------------------------------------------------------
1 | function test_MatlabIndentedClass()
2 |
3 | assert(check_settings('indentation_check_like_matlab') == true)
4 |
5 | addpath('testFiles')
6 | check('testFiles/MatlabIndentedClass.m');
7 |
8 | end
--------------------------------------------------------------------------------
/test_check.m:
--------------------------------------------------------------------------------
1 | %% Tokenizing a text should not change the content
2 | text = fileread('check.m');
3 | tokens = tokenize_code(text);
4 | reconstructed_text = horzcat(tokens.text);
5 | assert(strcmp(reconstructed_text, text))
6 |
7 |
8 | %% Function names should be extracted
9 | report = analyze_file('', tokenize_code('function foo(); end'));
10 | assert(strcmp(report.name.text, 'foo'))
11 |
12 | report = analyze_file('', tokenize_code('function x = foo(); end'));
13 | assert(strcmp(report.name.text, 'foo'))
14 |
15 | report = analyze_file('', tokenize_code('function [x, y] = foo(); end'));
16 | assert(strcmp(report.name.text, 'foo'))
17 |
18 |
19 | %% Function return names should be extracted
20 | report = analyze_file('', tokenize_code('function foo(); end'));
21 | assert(isempty(report.returns))
22 |
23 | report = analyze_file('', tokenize_code('function x = foo(); end'));
24 | assert(strcmp(report.returns(1).text, 'x'))
25 | assert(length(report.returns) == 1)
26 |
27 | report = analyze_file('', tokenize_code('function [x, y] = foo(); end'));
28 | assert(strcmp(report.returns(1).text, 'x'))
29 | assert(strcmp(report.returns(2).text, 'y'))
30 | assert(length(report.returns) == 2)
31 |
32 |
33 | %% Function arguments should be extracted
34 | report = analyze_file('', tokenize_code('function foo(); end'));
35 | assert(isempty(report.arguments))
36 |
37 | report = analyze_file('', tokenize_code('function foo(x); end'));
38 | assert(strcmp(report.arguments(1).text, 'x'))
39 | assert(length(report.arguments) == 1)
40 |
41 | report = analyze_file('', tokenize_code('function foo(x, y); end'));
42 | assert(strcmp(report.arguments(1).text, 'x'))
43 | assert(strcmp(report.arguments(2).text, 'y'))
44 | assert(length(report.arguments) == 2)
45 |
46 |
47 | %% Operators should be parsed correctly
48 | tokens = tokenize_code('a>=-b');
49 | assert(tokens(2).hasText('>='))
50 | assert(tokens(3).hasText('-'))
51 |
52 |
53 | %% Transpose Operators should not be strings
54 | tokens = tokenize_code('a''');
55 | assert(tokens(2).isEqual('punctuation', ''''))
56 |
57 | tokens = tokenize_code('a.''');
58 | assert(tokens(2).isEqual('punctuation', '.'''))
59 |
60 | tokens = tokenize_code('a''+''a''.''');
61 | assert(tokens(2).isEqual('punctuation', ''''))
62 | assert(tokens(4).isEqual('string', '''a'''))
63 | assert(tokens(5).isEqual('punctuation', '.'''))
64 |
65 |
66 | %% differentiate commands from expressions
67 | tokens = tokenize_code('help me please % test');
68 | assert(tokens(1).isEqual('identifier', 'help'))
69 | assert(tokens(3).isEqual('string', 'me'))
70 | assert(tokens(5).isEqual('string', 'please'))
71 | assert(tokens(7).isEqual('comment', '% test'))
72 |
73 |
74 | %% differentiate keyword end from variable end
75 | tokens = tokenize_code('if a(end); end');
76 | assert(tokens(5).isEqual('identifier', 'end'))
77 | assert(tokens(9).isEqual('keyword', 'end'))
78 |
79 |
80 | %% differentiate semicolons from linebreaks
81 | tokens = tokenize_code('[1;2];3');
82 | assert(tokens(3).isEqual('punctuation', ';'))
83 | assert(tokens(6).isEqual('linebreak', ';'))
84 |
85 |
86 | %% Identify block comments
87 | comment = sprintf('%%{ \n foo bar \n %%}');
88 | tokens = tokenize_code(comment);
89 | assert(length(tokens) == 1)
90 | assert(tokens.isEqual('comment', comment))
91 |
92 | tokens = tokenize_code(sprintf('x\n%s\nx', comment));
93 | assert(length(tokens) == 5)
94 | assert(tokens(3).isEqual('comment', comment))
95 |
96 |
97 | %% line breaks should break lines
98 | tokens = tokenize_code(',foo bar');
99 | assert(tokens(1).hasType('linebreak'))
100 | assert(tokens(4).hasType('string'))
101 |
102 | tokens = tokenize_code(';foo bar');
103 | assert(tokens(1).hasType('linebreak'))
104 | assert(tokens(4).hasType('string'))
105 |
106 |
107 | %% line breaks should not break lines within brackets
108 | tokens = tokenize_code('[a;b];');
109 | assert(tokens(3).hasType('punctuation'))
110 | assert(tokens(6).hasType('linebreak'))
111 |
112 | tokens = tokenize_code('[a,b],');
113 | assert(tokens(3).hasType('punctuation'))
114 | assert(tokens(6).hasType('linebreak'))
115 |
116 | %% comments follow continuation operator
117 | tokens = tokenize_code('... % this is a comment');
118 | assert(tokens(1).hasType('punctuation'));
119 | assert(tokens(3).hasType('comment'));
120 |
121 | tokens = tokenize_code('... this is a comment');
122 | assert(tokens(1).hasType('punctuation'));
123 | assert(tokens(2).hasType('space'));
124 | assert(tokens(3).hasType('comment'));
125 |
126 | tokens = tokenize_code('....');
127 | assert(tokens(1).hasType('punctuation'));
128 | assert(tokens(2).hasType('comment'));
129 |
130 | tokens = tokenize_code('.*...');
131 | assert(tokens(1).hasType('punctuation'));
132 | assert(tokens(2).hasType('punctuation'));
--------------------------------------------------------------------------------
/tokenize_code.m:
--------------------------------------------------------------------------------
1 | function tokenlist = tokenize_code(source_code)
2 | %TOKENIZE_CODE splits M-code into Tokens
3 | % TOKENIZE(SOURCE_CODE) splits the SOURCE_CODE into interpretable
4 | % parts. It returns an object array of Tokens TOKENLIST, where each
5 | % token has a 'type', a 'text', a 'line', and a 'col'. Concatenating
6 | % all 'text's recreates the original SOURCE_CODE.
7 | % 'type' can be one of:
8 | % - 'keyword'
9 | % - 'identifier'
10 | % - 'space'
11 | % - 'punctuation'
12 | % - 'property'
13 | % - 'string'
14 | % - 'number'
15 | % - 'pair'
16 | % - 'linebreak'
17 | % - 'comment'
18 | % - 'escape'
19 | %
20 | % See also: Token
21 |
22 | % (c) 2016, Bastian Bechtold
23 | % This code is licensed under the terms of the BSD 3-clause license
24 |
25 | punctuation = '=.&|><~+-*^/\:@?';
26 | open_pairs = '{[(';
27 | close_pairs = '}])';
28 | escapes = '!%';
29 |
30 | keywords = check_settings('keywords');
31 |
32 | operators = { '+' '-' '*' '/' '^' '\' ...
33 | '.+' '.-' '.*' './' '.^' '.\' ...
34 | '>' '<' '~' '==' '>=' '<=' '~=' ...
35 | '@' '=' ',' ';' '||' '&&' '|' '&' '...' ':' '.?'};
36 | unary_operators = '+-@~.';
37 |
38 | spaces = sprintf(' \t');
39 | breaks = sprintf('\n\r');
40 | number_start = '0123456789';
41 | number_body = [number_start 'eEij.'];
42 | name_start = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
43 | name_body = [name_start '0123456789_'];
44 |
45 | tokenlist = Token.empty;
46 | pos = 1; % the current character position in the source_code
47 | line_num = 1; % the current line number
48 | line_start = pos; % where the current line started
49 | is_first_symbol = true; % the first symbol can have special meaning
50 | source_code = [source_code sprintf('\n')]; % ensure proper file end
51 | nesting = 0; % count braces, since some operators have different
52 | % meaning inside and outside braces
53 | while pos < length(source_code)
54 | letter = source_code(pos);
55 | % a variable or a function or a keyword:
56 | if any(letter == name_start)
57 | symbol = skip(name_body);
58 | % keywords such as `if` or `classdef`
59 | if any(strcmp(symbol, keywords))
60 | is_first_symbol = false;
61 | add_token('keyword', symbol);
62 | % the keyword `end`:
63 | elseif strcmp(symbol, 'end') && nesting == 0
64 | add_token('keyword', symbol);
65 | % anything else is just a variable or function name:
66 | else
67 | add_token('identifier', symbol);
68 | % if this is the start of a command, the rest of the line
69 | % needs to be interpreted as strings.
70 | % Note: this is not the case if the the identifier is inside a
71 | % 'properties' or 'arguments' block. In that case, the rest of
72 | % the line needs to be interpreted as validation routine.
73 | last_keyword_idx = find(strcmp({tokenlist.type}, 'keyword'), 1, 'last');
74 | is_argument_validation_command = ~isempty(last_keyword_idx) && ...
75 | any(strcmp(tokenlist(last_keyword_idx).text, {'properties' 'arguments'}), 2);
76 | if is_first_symbol && nesting == 0 && ~is_argument_validation_command
77 | is_first_symbol = false;
78 | saved_pos = pos;
79 | first_space = skip(spaces);
80 | first_word = skip_unless([spaces breaks ';,%']);
81 | pos = saved_pos;
82 | % commands are any single identifier that is not
83 | % followed by space-operator-space:
84 | if ~any(strcmp(first_word, operators)) && ...
85 | ~isempty(first_space)
86 | parse_command()
87 | end
88 | end
89 | end
90 | % a sequence of one or more spaces or tabs:
91 | elseif any(letter == spaces)
92 | symbol = skip(spaces);
93 | add_token('space', symbol);
94 | % any binary or unary operator, such as `+`, `>=`, or `.foo`
95 | elseif any(letter == punctuation)
96 | is_first_symbol = false;
97 | % property access begins with a `.` operator, and includes a
98 | % name, such as `.foo`. Classifying this as punctuation makes
99 | % it easier to differentiate it from variable/function names.
100 | if letter == '.' && pos < length(source_code) && ...
101 | any(source_code(pos+1) == name_start)
102 | pos = pos + 1;
103 | symbol = [letter skip(name_body)];
104 | add_token('property', symbol);
105 | % any other operator:
106 | else
107 | symbol = skip(punctuation);
108 | % one operator:
109 | % Multiple operators can be present in 'symbol', e.g. '&&...' or
110 | % '|...'. Find largest operator at start of symbol.
111 | largest_start_operator = find_pattern(operators);
112 | if ~isempty(largest_start_operator)
113 | % Add operator and keep remainder of symbol for next
114 | % iteration.
115 | add_token('punctuation', largest_start_operator);
116 | pos = pos - length(symbol) + length(largest_start_operator);
117 | % All text on the same line after '...' must be interpreted
118 | % as a comment.
119 | if strcmp(largest_start_operator, '...')
120 | symbol = skip(spaces);
121 | if ~isempty(symbol)
122 | add_token('space', symbol)
123 | end
124 | symbol = skip_unless(breaks);
125 | if ~isempty(symbol)
126 | add_token('comment', symbol);
127 | end
128 | end
129 | % a binary operator, followed by a unary operator:
130 | elseif any(symbol(end) == unary_operators) && ...
131 | any(strcmp(symbol(1:end-1), operators))
132 | add_token('punctuation', symbol(1:end-1));
133 | add_token('punctuation', symbol(end));
134 | % element-wise transpose operator:
135 | % This has to be parsed here, so as to not confuse the `'`
136 | % with the beginning of a string.
137 | elseif strcmp(symbol, '.') && source_code(pos) == ''''
138 | pos = pos + 1;
139 | add_token('punctuation', '.''');
140 | % struct access operator such as `.(foo)`:
141 | % There is normally no `.` operator, but it makes sense to
142 | % classify `.(` as such here.
143 | elseif strcmp(symbol, '.') && source_code(pos) == '('
144 | add_token('punctuation', '.');
145 | % this should never happen:
146 | else
147 | error(['unknown operator ''' symbol '''']);
148 | end
149 | end
150 | % strings and transpose begin with `'`. The `.'` operator has
151 | % already been handled above:
152 | elseif letter == ''''
153 | % the first symbol cannot be transpose, so must be string
154 | if is_first_symbol
155 | string = skip_string('''');
156 | add_token('string', string);
157 | else
158 | previous = tokenlist(end);
159 |
160 | % transpose operator:
161 | % To differentiate the start of a string from the
162 | % transpose operator, we need to check whether the
163 | % previous token was a value or an operator. If a value,
164 | % `'` means transpose. If an operator, `'` marks the start
165 | % of a string.
166 | if previous.isEqual('pair', {'}' ']' ')'}) || ...
167 | previous.hasType({'identifier' 'number' 'property'})
168 | pos = pos + 1;
169 | add_token('punctuation', letter);
170 | % strings:
171 | else
172 | string = skip_string('''');
173 | add_token('string', string);
174 | end
175 | end
176 | is_first_symbol = false;
177 | % string that starts with double quotes (")
178 | elseif letter == '"'
179 | is_first_symbol = false;
180 | string = skip_string('"');
181 | add_token('string', string);
182 | % we don't make any distinction between different kinds of parens:
183 | elseif any(letter == open_pairs)
184 | is_first_symbol = false;
185 | pos = pos + 1;
186 | nesting = nesting + 1;
187 | add_token('pair', letter);
188 | elseif any(letter == close_pairs)
189 | pos = pos + 1;
190 | nesting = nesting - 1;
191 | add_token('pair', letter);
192 | % new lines are line breaks and increment the line:
193 | elseif any(letter == breaks)
194 | % split into individual line breaks
195 | start = pos;
196 | line_breaks = regexp(skip(breaks), '(\n)|(\r\n)', 'match');
197 | pos = start;
198 | for line_break = line_breaks
199 | pos = pos + length(line_break{1});
200 | add_token('linebreak', line_break{1});
201 | % add the token before incrementing the line to to avoid
202 | % confusing add_token
203 | line_num = line_num + 1;
204 | line_start = pos;
205 | end
206 | is_first_symbol = true;
207 | % `,` and `;` are line breaks that do not increment the line,
208 | % or simple operators if they occur within a pair
209 | elseif any(letter == ';,')
210 | pos = pos + 1;
211 | if nesting == 0
212 | add_token('linebreak', letter);
213 | is_first_symbol = true;
214 | else
215 | add_token('punctuation', letter);
216 | end
217 | % numbers are easy, and may contain `.`, `e`, `E`, `i`, and `j`
218 | elseif any(letter == number_start)
219 | is_first_symbol = false;
220 | symbol = skip(number_body);
221 | add_token('number', symbol);
222 | % finally, comments and `!` include the rest of the line,
223 | % unless they are block comments, in which case they might include
224 | % much more.
225 | elseif any(letter == escapes)
226 | comment = skip_line();
227 | if letter == '%'
228 | if ~isempty(regexp(comment, '^\%\{\s*$', 'once')) && ...
229 | is_first_symbol
230 | comment = [comment skip_block_comment()]; %#ok
231 | end
232 | add_token('comment', comment);
233 | else
234 | add_token('escape', comment);
235 | end
236 | else
237 | error('unknown identifier');
238 | end
239 | end
240 |
241 | function add_token(token_type, token_text)
242 | %ADD_TOKEN adds a new token to the token list, and annotates it
243 | % with the current line number and column. TOKEN_TYPE and TOKEN_TEXT
244 | % become the Token's `type` and `text` property.
245 | % this modifies TOKENLIST!
246 |
247 | char_num = pos-line_start-length(token_text)+1;
248 | tokenlist(length(tokenlist)+1) = Token(token_type, token_text, ...
249 | line_num, char_num);
250 | end
251 |
252 | function string = skip(letters)
253 | %SKIP skips LETTERS and returns skipped letters as STRING
254 | % this modifies POS!
255 |
256 | string_start = pos;
257 | while any(source_code(pos) == letters) && pos < length(source_code)
258 | pos = pos + 1;
259 | end
260 | string = source_code(string_start:pos-1);
261 | end
262 |
263 | function string = skip_unless(letters)
264 | %SKIP_UNLESS skips letters not in LETTERS and returns skipped letters
265 | % as STRING.
266 | % this modifies POS!
267 |
268 | string_start = pos;
269 | while all(source_code(pos) ~= letters)
270 | pos = pos + 1;
271 | end
272 | string = source_code(string_start:pos-1);
273 | end
274 |
275 | function string = skip_line()
276 | %SKIP_LINE skips to the end of the line and returns the line as STRING
277 | % this modifies POS!
278 |
279 | string_start = pos;
280 | while all(source_code(pos) ~= sprintf('\r\n'))
281 | pos = pos + 1;
282 | end
283 | string = source_code(string_start:pos-1);
284 | end
285 |
286 | function string = skip_string(quote_type)
287 | %SKIP_STRING skips to the end of the string and returns the STRING
288 | % the STRING includes both quotation marks. QUOTE_TYPE is the
289 | % type of quote character to look for (' or ").
290 | % this modifies POS!
291 |
292 | string_start = pos;
293 | while true
294 | if source_code(pos) ~= quote_type || pos == string_start
295 | pos = pos + 1;
296 | elseif length(source_code) > pos ...
297 | && source_code(pos+1) == quote_type
298 | pos = pos + 2;
299 | else % source_code(pos) == quote_type
300 | pos = pos + 1;
301 | break;
302 | end
303 | end
304 | string = source_code(string_start:pos-1);
305 | end
306 |
307 | function string = skip_block_comment()
308 | %SKIP_block_comment skips to the end of the block comment and returns
309 | % the whole multi-line block comment as STRING.
310 | % this modifies POS!
311 |
312 | block_start = pos;
313 | is_first_statement = false;
314 | while pos <= length(source_code)
315 | % line break:
316 | if any(source_code(pos) == sprintf('\n\r'))
317 | is_first_statement = true;
318 | % don't change `is_first_statement` while skipping spaces:
319 | elseif any(source_code(pos) == sprintf('\t '))
320 | % nothing changes
321 | % block comment ends must be alone on the line:
322 | elseif source_code(pos) == '%' && is_first_statement && ...
323 | pos < length(source_code) && source_code(pos+1) == '}'
324 | pos = pos + 2;
325 | break
326 | % any other character is just part of the comment:
327 | else
328 | is_first_statement = false;
329 | end
330 | pos = pos + 1;
331 | end
332 | string = source_code(block_start:pos-1);
333 | end
334 |
335 | function parse_command()
336 | %PARSE_COMMAND parses to the end of a command, and appends all args
337 | % to the token list.
338 | % this modifies POS and TOKENLIST!
339 |
340 | while pos < length(source_code)
341 | letter = source_code(pos);
342 | % commands can contain literal strings:
343 | if letter == ''''
344 | string_literal = skip_string('''');
345 | add_token('string', string_literal);
346 | elseif letter == '"'
347 | string_literal = skip_string('"');
348 | add_token('string', string_literal);
349 | % commands can contain spaces:
350 | elseif any(letter == spaces)
351 | symbol = skip(spaces);
352 | add_token('space', symbol);
353 | % commands end at `\n`, `%`, `,`, or `;`:
354 | elseif any(letter == [breaks '%,;'])
355 | break
356 | % any other non-space sequence is interpreted as a string:
357 | else
358 | str = skip_unless([breaks spaces '%,;']);
359 | add_token('string', str);
360 | end
361 | end
362 | end
363 |
364 | function pat_out = find_pattern(pat)
365 | %FIND_PATTERN Find pattern with most characters in symbol.
366 | % pat_out = FIND_PATTERN(pat) returns the pattern with which
367 | % SYMBOL starts and that has the most characters. The input
368 | % pat is a cell array of character vectors that represent the
369 | % patterns that should be tested. If symbol does not start
370 | % with any pattern defined by pat, pat_out is empty.
371 | pat_out = '';
372 | % Find pat location. If non-existent, idx is zero.
373 | pat_idx = cellfun(@(x) strfind(symbol, x), pat, 'UniformOutput', false);
374 | pat_idx(cellfun(@isempty, pat_idx)) = {0};
375 | pat_idx = [pat_idx{:}];
376 | % Only evaluate patterns with which symbol starts (i.e.
377 | % pat_idx == 1)
378 | if any(pat_idx == 1)
379 | start_pat_array = pat(pat_idx == 1);
380 | [~, max_start_idx] = max(cellfun(@length, start_pat_array));
381 | if length(max_start_idx) == 1
382 | pat_out = start_pat_array{max_start_idx};
383 | end
384 | end
385 | end
386 | end
387 |
--------------------------------------------------------------------------------