├── .github
└── workflows
│ └── python-package.yml
├── .gitignore
├── LICENSE
├── README.md
├── mathematica
├── __init__.py
├── builtins.py
├── lexer.py
└── style.py
├── mma.scss
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
├── __init__.py
└── test_lexer.py
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3 |
4 | name: Python package
5 |
6 | on:
7 | push:
8 | branches: [ "master" ]
9 | pull_request:
10 | branches: [ "master" ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | fail-fast: false
18 | matrix:
19 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
20 |
21 | steps:
22 | - uses: actions/checkout@v4
23 | - name: Set up Python ${{ matrix.python-version }}
24 | uses: actions/setup-python@v3
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | python -m pip install pynose
31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 | - name: Test with pynose
33 | run: |
34 | pynose
35 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *.iml
3 | build/
4 | dist/
5 | *.egg-info/
6 | __pycache__/
7 | *.pyc
8 | .sass-cache/
9 | scratch/
10 | .ropeproject/
11 | dev/
12 | pyproject.toml
13 | .python-version
14 | uv.lock
15 | .DS_Store
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016 rsmenon
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Mathematica lexer and highlighter for Pygments
2 |
3 | The most up-to-date lexer and highlighter for [_Mathematica_](http://wolfram.com/mathematica)/Wolfram Language
4 | source code using the [pygments](http://pygments.org) engine.
5 |
6 | 
7 | 
8 | 
9 | 
10 | ## Features
11 |
12 | It can currently lex and highlight:
13 |
14 | - All builtin functions in the ``System` `` context including unicode symbols like `π` except those
15 | that use characters from the private unicode space (e.g. `\[FormalA]`).
16 | - User defined symbols, including those in a context.
17 | - All operators including unicode operators like `∈` and `⊕`.
18 | - Comments, including multi line and nested.
19 | - Strings, including multi line and escaped quotes.
20 | - Patterns, slots (including named slots `#name` introduced in version 10) and slot sequences.
21 | - Message names (e.g. the `ivar` in `General::ivar`)
22 | - Numbers including base notation (e.g. `8 ^^ 23 == 19`) and scientific notation (e.g. `1 *^ 3 == 1000`).
23 | - Local variables in `Block`, `With` and `Module`.
24 |
25 | ### Example:
26 | ```
27 | (* An example highlighting the features of
28 | this Pygments plugin for Mathematica *)
29 | lissajous::usage = "An example Lissajous curve.\n" <>
30 | "Definition: f(t) = (sin(3t + π/2), sin(t))"
31 | lissajous = {Sin[2^^11 # + 0.005`10 * 1*^2 * Pi], Sin[#]} &;
32 |
33 | With[{max = 2 Pi, min = 0},
34 | ParametricPlot[lissajous[t], {t, min, max}] /. x_Line :> {Dashed, x}
35 | ]
36 | ```
37 |
38 |
39 | ## Installation
40 |
41 | ### Using `pip`
42 |
43 | Run `pip install pygments-mathematica` from the command line. That's it!
44 |
45 | ### From source code
46 |
47 | If you'd like to make modifications to the color scheme for personal use or if you'd like to try the
48 | most recent release that might not yet be available in PyPi, download and unzip the source code
49 | from the [latest release](https://github.com/rsmenon/pygments-mathematica/releases/latest). After
50 | you've [installed Pygments](http://pygments.org/download/) (`pip install Pygments` works well
51 | if you already have python setup on your system), run the following from the repo's root directory:
52 |
53 | ```bash
54 | python setup.py install
55 | ```
56 |
57 | ## Usage
58 |
59 | ### Server-side syntax highlighting in Jekyll, Octopress and other static websites
60 |
61 | To highlight _Mathematica_ code using this lexer, enclose the code between these liquid tags:
62 |
63 | ```
64 | {% highlight wl %}
65 |
66 | {% endhighlight %}
67 | ```
68 |
69 | You can also use `wolfram` and `wolfram-language` as the language hint. (See the note at the end of the section.)
70 |
71 | If you are using Jekyll, depending on your setup, you might need to add the following in your `_plugins/ext.rb`:
72 |
73 | ```ruby
74 | require 'pygments'
75 | Pygments.start('/site-packages/pygments/')
76 | ```
77 |
78 | > **NOTE:** Although this lexer is registered with the names `mathematica` and `mma` for use as language hints, the
79 | default lexer that ships with Pygments overrides this. Hence until this is incorporated into the main Pygments repository
80 | please use `wl` or `wolfram` or `wolfram-language` as the language hint.
81 |
82 | ### Highlighting in LaTeX documents
83 |
84 | _Mathematica_ code can be highlighted in LaTeX documents using the [minted](http://mirrors.rit.edu/CTAN/macros/latex/contrib/minted/minted.pdf) (PDF) package.
85 | The following minimal example shows how:
86 |
87 | ```latex
88 | \documentclass{article}
89 | \usepackage[english]{babel}
90 | \usepackage{fontspec}
91 | \setmonofont{Menlo}
92 |
93 | \usepackage{minted}
94 | \usemintedstyle{mathematica}
95 |
96 | \begin{document}
97 | \begin{minted}[linenos=true]{wolfram}
98 | (* An example highlighting the features of
99 | this Pygments plugin for Mathematica *)
100 | lissajous::usage = "An example Lissajous curve.\n" <>
101 | "Definition: f(t) = (sin(3t + Pi/2), sin(t))"
102 | lissajous = {Sin[2^^11 # + 0.005`10 * 1*^2 * π], Sin[#]} &;
103 |
104 | ParametricPlot[lissajous[t], {t, 0, 2 π}] /. x_Line :> {Dashed, x}
105 | \end{minted}
106 | \end{document}
107 | ```
108 |
109 | Saving the above as `mma.tex` and running `xelatex --shell-escape mma.tex` should produce a PDF with highlighted code.
110 |
111 | > *NOTE:* If your LaTeX colors don't show up properly, try deleting your `*.aux`, `*.log` files and any `_minted-mma/` directory before running XeLaTeX again.
112 |
113 | ### Pelican static page generator
114 |
115 | The [Pelican static generator](http://blog.getpelican.com/) is written in Python and uses Pygments by default. To use it there, you mark code blocks with the usual 4 spaces indent and you prepend it with `:::wl` if you are using Markdown
116 |
117 | ```
118 | :::wl
119 | FileNames["CodeGenerator.m", {$InstallationDirectory}, 4]
120 | (*
121 | {"/Applications/Development/Mathematica.app/SystemFiles/Links/GPUTools/CodeGenerator.m"}
122 | *)
123 | ```
124 |
125 | If you are using ReStructuredText, please mark your *Mathematica* code with
126 |
127 | ```
128 | .. code-block:: wl
129 |
130 |
131 | ```
132 |
133 | ### Command line usage
134 |
135 | The `pygmentize` command can be used to invoke this lexer and convert any _Mathematica_ file to an appropriately
136 | highlighted file in a different format. For example, to convert a file `package.m` to a HTML file, run
137 |
138 | ```bash
139 | pygmentize -O full,style=mathematica -f html -l wl -o package.html package.m
140 | ```
141 |
142 | ## Styles
143 |
144 | The default styles that come with Pygments do not go well with _Mathematica_ code. If you're using this lexer
145 | for highlighting source code on a website, use the `mma.scss` [Sass](http://sass-lang.com) file in this repository to obtain good default colors (as shown in the
146 | screenshot). You can, if you choose, modify the colors in the SCSS file and then convert it to CSS
147 | using the `scss` compiler as:
148 |
149 | ```
150 | scss mma.scss > mma.css
151 | ```
152 |
153 | For other applications including command line usage, the lexer ships with a style named `mathematica`.
154 | (See the arguments to the `pygmentize` command in the section above.) To use different colors, modify
155 | the style in `mathematica/style.py` and run `python setup.py install` again.
156 |
157 | If you fancy the default style that ships with the _Mathematica_ notebook, use the `mathematica-notebook` scheme.
158 |
159 | ## Limitations
160 |
161 | It cannot highlight lexically and dynamically scoped variables (e.g. the `x` in `With[{x = 1}, x + 1]` or
162 | the `Plus` in `Block[{Plus = Times}, 2 + 3]`, etc.) consistently throughout their scope. This would require a
163 | parser that further processes the stream of tokens and builds an AST that captures the semantics of the language.
164 |
165 | This is currently not a high priority since it is non-trivial to implement it within the framework
166 | by Pygments, but I am interested in supporting this eventually, so collaborations/pull requests are welcome :)
167 |
168 | ## Acknowledgments
169 |
170 | The lexing rules for _Mathematica_ syntax are largely based on two prior projects:
171 |
172 | - My [vim-mathematica](https://github.com/rsmenon/vim-mathematica) syntax highlighting plugin.
173 | - Patrick Scheibe's [Mathematica plugin for IntelliJ IDEA](https://github.com/halirutan/Mathematica-IntelliJ-Plugin) (if you develop in _Mathematica_ and
174 | haven't seen this yet, please do try it out. It's wonderful!).
175 |
176 |
--------------------------------------------------------------------------------
/mathematica/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2016 rsmenon
3 | # Licensed under the MIT License (https://opensource.org/licenses/MIT)
4 |
5 | from mathematica.lexer import MathematicaLexer
6 | from mathematica.style import MathematicaNotebookStyle, MathematicaStyle
7 |
--------------------------------------------------------------------------------
/mathematica/lexer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2016 rsmenon
3 | # Licensed under the MIT License (https://opensource.org/licenses/MIT)
4 |
5 | from collections import defaultdict
6 |
7 | from pygments.lexer import RegexLexer, include, words, bygroups
8 | from pygments.token import Token as PToken
9 |
10 | import mathematica.builtins as mma
11 |
12 |
13 | class Regex:
14 | UNICODE = mma.UNICODE_SYSTEM_UNDEFINED_SYMBOLS.union(mma.UNICODE_SYSTEM_SYMBOLS)
15 | IDENTIFIER = r'[a-zA-ZΑ-Ωα-ω\${unicode}][a-zA-ZΑ-Ωα-ω0-9\${unicode}]*'.format(unicode=''.join(UNICODE))
16 | NAMED_CHARACTER = r'\\\[{identifier}]'.format(identifier=IDENTIFIER)
17 | SYMBOLS = (r'[`]?({identifier}|{named_character})(`({identifier}|{named_character}))*[`]?'
18 | .format(identifier=IDENTIFIER, named_character=NAMED_CHARACTER))
19 | INTEGER = r'[0-9]+'
20 | FLOAT = r'({integer})?\.[0-9]+|{integer}\.'.format(integer=INTEGER)
21 | REAL = r'({integer}|{float})`({integer}|{float})?|{float}'.format(integer=INTEGER, float=FLOAT)
22 | BASE_NUMBER = r'{integer}\s*\^\^\s*({real}|{integer})'.format(integer=INTEGER, real=REAL)
23 | SCIENTIFIC_NUMBER = r'({real}|{integer})\s*\*\^\s*{integer}'.format(real=REAL, integer=INTEGER)
24 | PATTERNS = r'{symbol}:?\_{{1,3}}({symbol})?|({symbol})?:?\_{{1,3}}{symbol}'.format(symbol=SYMBOLS)
25 | SLOTS = r'#{symbol}|#\"{symbol}\"|#{{1,2}}[0-9]*'.format(symbol=SYMBOLS)
26 | MESSAGES = r'(::)(\s*)({symbol})'.format(symbol=SYMBOLS)
27 | GROUPINGS = words(mma.GROUPINGS).get()
28 | OPERATORS = words(mma.OPERATORS).get()
29 |
30 |
31 | class MToken:
32 | BUILTIN = PToken.Name.Builtin
33 | COMMENT = PToken.Comment
34 | GROUP = PToken.Punctuation
35 | LOCAL_SCOPE = PToken.Name.Variable.Class
36 | MESSAGE = PToken.Name.Exception
37 | NUMBER = PToken.Number
38 | OPERATOR = PToken.Operator
39 | PATTERN = PToken.Name.Tag
40 | SLOT = PToken.Name.Function
41 | STRING = PToken.String
42 | SYMBOL = PToken.Name.Variable
43 | UNKNOWN = PToken.Error
44 | WHITESPACE = PToken.Text.Whitespace
45 |
46 |
47 | class MathematicaLexer(RegexLexer):
48 | name = 'Mathematica'
49 | aliases = ['mathematica', 'mma', 'nb', 'wl', 'wolfram', 'wolfram-language']
50 | filenames = ['*.cdf', '*.m', '*.ma', '*.nb', '*.wl']
51 | mimetypes = [
52 | 'application/mathematica',
53 | 'application/vnd.wolfram.mathematica',
54 | 'application/vnd.wolfram.mathematica.package',
55 | 'application/vnd.wolfram.cdf',
56 | 'application/vnd.wolfram.cdf.text',
57 | ]
58 | tokens = {
59 | 'root': [
60 | (r'\(\*', MToken.COMMENT, 'comments'),
61 | (r'"', MToken.STRING, 'strings'),
62 | include('numbers'),
63 | (Regex.PATTERNS, MToken.PATTERN),
64 | (Regex.SLOTS, MToken.SLOT),
65 | (Regex.GROUPINGS, MToken.GROUP),
66 | (Regex.MESSAGES, bygroups(MToken.OPERATOR, MToken.WHITESPACE, MToken.MESSAGE)),
67 | (Regex.OPERATORS, MToken.OPERATOR),
68 | (Regex.SYMBOLS, MToken.SYMBOL),
69 | (r'\s+', MToken.WHITESPACE),
70 | ],
71 | 'comments': [
72 | (r'[^\*\(\)]+', MToken.COMMENT),
73 | (r'\*[^\)]', MToken.COMMENT),
74 | (r'\(\*', MToken.COMMENT, '#push'),
75 | (r'\*\)', MToken.COMMENT, '#pop'),
76 | (r'\([^\*]?|[^\*]?\)', MToken.COMMENT),
77 | ],
78 | 'numbers': [
79 | (Regex.BASE_NUMBER, MToken.NUMBER),
80 | (Regex.SCIENTIFIC_NUMBER, MToken.NUMBER),
81 | (Regex.REAL, MToken.NUMBER),
82 | (Regex.INTEGER, MToken.NUMBER),
83 | ],
84 | 'strings': [
85 | (r'([^"\\]|\\.)+', MToken.STRING),
86 | (r'"', MToken.STRING, '#pop'),
87 | ],
88 | }
89 |
90 | def get_tokens_unprocessed(self, text, stack=('root', )):
91 | ma = MathematicaAnnotations()
92 | annotations = (ma.builtins, ma.unicode, ma.lexical_scope)
93 | for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
94 | result = (index, token, value)
95 | for func in annotations:
96 | result = func(*result)
97 |
98 | yield result
99 |
100 |
101 | class _State(dict):
102 | def __getattr__(self, attr):
103 | return self.get(attr)
104 |
105 | __setattr__ = dict.__setitem__
106 |
107 |
108 | class MathematicaAnnotations:
109 | def __init__(self):
110 | self.scope = _State()
111 | self._reset_scope_state()
112 |
113 | @staticmethod
114 | def builtins(index, token, value):
115 | if token is MToken.SYMBOL and value in mma.SYSTEM_SYMBOLS:
116 | return index, MToken.BUILTIN, value
117 | else:
118 | return index, token, value
119 |
120 | @staticmethod
121 | def unicode(index, token, value):
122 | if token is MToken.UNKNOWN:
123 | if value in mma.UNICODE_SYSTEM_SYMBOLS:
124 | new_token = MToken.BUILTIN
125 | elif value in mma.UNICODE_GROUPINGS:
126 | new_token = MToken.GROUP
127 | elif value in mma.UNICODE_OPERATORS:
128 | new_token = MToken.OPERATOR
129 | elif value in mma.UNICODE_SYSTEM_UNDEFINED_SYMBOLS:
130 | new_token = MToken.SYMBOL
131 | else:
132 | new_token = MToken.UNKNOWN
133 | return index, new_token, value
134 | elif token is MToken.SYMBOL and value in mma.UNICODE_SYSTEM_SYMBOLS:
135 | new_token = MToken.BUILTIN
136 | return index, new_token, value
137 | else:
138 | return index, token, value
139 |
140 | def _reset_scope_state(self):
141 | # keyword = True denotes the presence of a trigger symbol such as Block, With, Module
142 | # When keyword is True and is followed by a [, then the parser enters an active state
143 | self.scope.keyword = False
144 | self.scope.active = False
145 |
146 | # level tracks the nestedness of local scopes (e.g. Block[{x = Block[{y = ...}, ...]}, ...])
147 | self.scope.level = 0
148 |
149 | # The next three variables are stacks that track opening and closing brackets, braces and
150 | # and other groupings (associations, angle brackets, etc.) at each level.
151 | # Braces are tracked only immediately after entering an active scope, which is where the
152 | # local variables are defined.
153 | self.scope.brackets = defaultdict(int)
154 | self.scope.braces = defaultdict(int)
155 | self.scope.other_groups = defaultdict(int)
156 |
157 | # stack_state is a tuple of the above three counters at each level when the parser is inside
158 | # a local variable definition region. i.e. when the parser is at { in Block[{x = 1}, x]
159 | self.scope.stack_state = defaultdict(int)
160 |
161 | # variables is the set of symbols/builtins that have been identified as being in a local
162 | # scope at each level. rhs is True when the parser is in the RHS of an assignment (= or :=)
163 | self.scope.variables = defaultdict(set)
164 | self.scope.rhs = defaultdict(bool)
165 |
166 | def _reset_scope_level(self, level):
167 | scope_vars = (self.scope.brackets, self.scope.braces, self.scope.other_groups,
168 | self.scope.stack_state, self.scope.variables, self.scope.rhs)
169 | [var.pop(level) for var in scope_vars if level in var]
170 |
171 | def _get_stack_state(self, level):
172 | return (
173 | self.scope.brackets[level],
174 | self.scope.braces[level],
175 | self.scope.other_groups[level],
176 | )
177 |
178 | def lexical_scope(self, index, token, value):
179 | level = self.scope.level
180 | if token is MToken.WHITESPACE:
181 | return index, token, value
182 |
183 | if self.scope.active and token is MToken.GROUP and value in ('<|', u'〈', u'〚'):
184 | self.scope.other_groups[level] += 1
185 | return index, token, value
186 | elif self.scope.active and token is MToken.GROUP and value in ('|>', u'〛', u'〉'):
187 | self.scope.other_groups[level] -= 1
188 | return index, token, value
189 |
190 | if self.scope.active and token is MToken.GROUP and value == '}':
191 | if self.scope.braces[level]:
192 | self.scope.braces[level] -= 1
193 |
194 | if not self.scope.braces[level]:
195 | self.scope.rhs[level] = False
196 |
197 | return index, token, value
198 |
199 | if self.scope.active and token is MToken.GROUP and value == ']':
200 | if self.scope.brackets[level]:
201 | self.scope.brackets[level] -= 1
202 | if not self.scope.brackets[level] and level:
203 | self._reset_scope_level(level)
204 | self.scope.level -= 1
205 |
206 | if not self.scope.level:
207 | self._reset_scope_state()
208 |
209 | return index, token, value
210 |
211 | if token is MToken.BUILTIN and value in ('Block', 'With', 'Module'):
212 | self.scope.keyword = True
213 | return index, token, value
214 |
215 | if token is MToken.GROUP and value == '[':
216 | # Enter an active state only if the preceding non-whitespace token is one of the scope
217 | # keyword symbols. If it is already in an active state, the counter is incremented.
218 | if self.scope.keyword:
219 | self.scope.active = True
220 | self.scope.level += 1
221 | self.scope.keyword = False
222 |
223 | if self.scope.active:
224 | self.scope.brackets[self.scope.level] += 1
225 |
226 | return index, token, value
227 |
228 | if self.scope.active and token is MToken.GROUP and value == '{':
229 | if level not in self.scope.variables:
230 | # The parser is not yet in the local variables section so initialize counters and
231 | # containers and take a snapshot of the stack state. The frozen stack state is used
232 | # later to identify the end of the RHS in an assignment expression.
233 | self.scope.variables[level] = set()
234 | self.scope.braces[level] += 1
235 | self.scope.stack_state[level] = self._get_stack_state(level)
236 | elif level in self.scope.variables and self.scope.braces[level]:
237 | # The parser is inside the local variables section.
238 | self.scope.braces[level] += 1
239 | else:
240 | # In all other cases don't modify the stack.
241 | pass
242 |
243 | return index, token, value
244 |
245 | if (self.scope.active and self.scope.braces[level] and
246 | token in (MToken.SYMBOL, MToken.BUILTIN)):
247 | # The parser is inside the local variables section and on a builtin or a generic symbol
248 | # token. If it isn't in the RHS of an assignment expression, then modify the token and
249 | # add the value to the list of local scope variables at this level.
250 | if not self.scope.rhs[level]:
251 | self.scope.variables[level].add(value)
252 | return index, MToken.LOCAL_SCOPE, value
253 | else:
254 | return index, token, value
255 |
256 | elif self.scope.active and self.scope.braces[level]:
257 | # If the parser is on an assignment operator, mark rhs = True so that symbols from the
258 | # RHS of the assignment are not considered as local variables. The rhs value is reset
259 | # when:
260 | # 1. the parser is on a , inside the local variables section and the stack state
261 | # is the same as when it entered the section. For example, in
262 | # Block[{x = 1, y = 2}, x + y], the stack state is the same at { and the first ,.
263 | # But in Block[{x = {1, a}, y = 2}, x + y], the stack state is not the same at {
264 | # and the first , so it is still part of the RHS.
265 | # 2. if it has exited the local variables section (handled earlier)
266 | if token is MToken.OPERATOR and value in ('=', ':='):
267 | self.scope.rhs[level] = True
268 | elif (token is MToken.GROUP and value == ',' and
269 | self._get_stack_state(level) == self.scope.stack_state[level]):
270 | self.scope.rhs[level] = False
271 |
272 | return index, token, value
273 |
274 | elif self.scope.active and token in (MToken.SYMBOL, MToken.BUILTIN):
275 | # If the code has reached here, the parser is outside the local variables section and in
276 | # the body of the scoping function.
277 | if value in self.scope.variables[level]:
278 | return index, MToken.LOCAL_SCOPE, value
279 | else:
280 | return index, token, value
281 |
282 | self.scope.keyword = False
283 | return index, token, value
284 |
--------------------------------------------------------------------------------
/mathematica/style.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2016 rsmenon
3 | # Licensed under the MIT License (https://opensource.org/licenses/MIT)
4 |
5 | from pygments.style import Style
6 |
7 | from mathematica.lexer import MToken
8 |
9 |
10 | class MathematicaStyle(Style):
11 | default_style = ''
12 | background_color = '#fefefe'
13 | styles = {
14 | MToken.BUILTIN: '#353f42',
15 | MToken.COMMENT: 'italic #aaaaaa',
16 | MToken.GROUP: '#555555',
17 | MToken.LOCAL_SCOPE: '#5d9066',
18 | MToken.MESSAGE: '#ab466a',
19 | MToken.NUMBER: '#b66a4b',
20 | MToken.OPERATOR: '#555555',
21 | MToken.PATTERN: 'italic #6E8413',
22 | MToken.SLOT: 'italic #6E8413',
23 | MToken.STRING: '#499A9F',
24 | MToken.SYMBOL: '#4b78b1',
25 | MToken.UNKNOWN: '#555555',
26 | }
27 |
28 |
29 | class MathematicaNotebookStyle(Style):
30 | default_style = ''
31 | background_color = '#ffffff'
32 | styles = {
33 | MToken.BUILTIN: 'bold #000000',
34 | MToken.COMMENT: 'bold #999999',
35 | MToken.GROUP: 'bold #000000',
36 | MToken.LOCAL_SCOPE: 'bold #3C7D91',
37 | MToken.MESSAGE: 'bold #666666',
38 | MToken.NUMBER: 'bold #000000',
39 | MToken.OPERATOR: 'bold #000000',
40 | MToken.PATTERN: 'bold italic #438958',
41 | MToken.SLOT: 'bold italic #438958',
42 | MToken.STRING: 'bold #666666',
43 | MToken.SYMBOL: 'bold #002CC3',
44 | MToken.UNKNOWN: 'bold #000000',
45 | }
46 |
--------------------------------------------------------------------------------
/mma.scss:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 rsmenon
3 | * Licensed under the MIT License (https://opensource.org/licenses/MIT)
4 | */
5 | $light-gray: rgb(153, 153, 153);
6 | $dark-gray: rgb(102, 102, 102);
7 | $black: #000;
8 | $orange: #b66a4b;
9 | $magenta: rgb(221, 17, 0);
10 | $blue: rgb(0, 44, 195);
11 | $cyan: #499A9F;
12 | $green: rgb(67, 137, 88);
13 |
14 | .highlight-wl .highlight,
15 | .language-mathematica,
16 | .language-mma,
17 | .language-nb,
18 | .language-wl,
19 | .language-wolfram,
20 | .language-wolfram-language {
21 | .c { // Comment
22 | color: $light-gray;
23 | font-style: italic;
24 | }
25 | .nb { // Builtins
26 | color: $black;
27 | }
28 | .nf { // Slots
29 | color: $green;
30 | font-style: italic;
31 | }
32 | .nt { // Patterns
33 | color: $green;
34 | font-style: italic;
35 | }
36 | .nv { // Non-builtin symbols
37 | color: $blue;
38 | }
39 | .o { // Operators
40 | color: $black;
41 | }
42 | .p { // Groupings
43 | color: $black;
44 | }
45 | .s { // Strings
46 | color: $dark-gray;
47 | }
48 | .vc { // Local scope variables
49 | color: $green;
50 | }
51 | .err { // Any unrecognized input that is not lexed correctly
52 | color: $dark-gray;
53 | }
54 | * {
55 | color: $black;
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | build==1.2.2.post1
2 | packaging==24.2
3 | Pygments==2.19.1
4 | pyproject_hooks==1.2.0
5 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 0
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2025 rsmenon
3 | # Licensed under the MIT License (https://opensource.org/licenses/MIT)
4 |
5 | u"""This is the most up-to-date lexer and highlighter for Mathematica/Wolfram Language source code \
6 | using the pygments engine.
7 |
8 | It currently supports:
9 |
10 | - All builtin functions in the ``System`` context including unicode symbols like ``π`` except \
11 | those that use characters from the private unicode space (e.g. ``\[FormalA]``).
12 | - User defined symbols, including those in a context.
13 | - All operators including unicode operators like ``∈`` and ``⊕``.
14 | - Comments, including multi line and nested.
15 | - Strings, including multi line and escaped quotes.
16 | - Patterns, slots (including named slots ``#name`` introduced in version 10) and slot sequences.
17 | - Message names (e.g. the ivar in ``General::ivar``)
18 | - Numbers including base notation (e.g. ``8 ^^ 23 == 19``) and scientific notation \
19 | (e.g. ``1 *^ 3 == 1000``).
20 | - Local variables in ``Block``, ``With`` and ``Module``.
21 |
22 | A Sass file containing the styles can be obtained from the package repository for use in static \
23 | website generators such as Jekyll, Octopress, Pelican, etc.
24 |
25 | © 2025 rsmenon
26 | """
27 |
28 | from setuptools import setup
29 |
30 | setup(
31 | name='pygments-mathematica',
32 | version='0.4.2',
33 | description='Mathematica/Wolfram Language Lexer for Pygments',
34 | long_description=__doc__,
35 | author='rsmenon',
36 | author_email='rsmenon@icloud.com',
37 | license='MIT',
38 | keywords='syntax highlighting mathematica',
39 | url='http://github.com/rsmenon/pygments-mathematica/',
40 | classifiers=[
41 | 'Development Status :: 4 - Beta',
42 | 'Intended Audience :: End Users/Desktop',
43 | 'License :: OSI Approved :: MIT License',
44 | 'Operating System :: OS Independent',
45 | 'Programming Language :: Python',
46 | 'Programming Language :: Python :: 3.9',
47 | 'Programming Language :: Python :: 3.10',
48 | 'Programming Language :: Python :: 3.11',
49 | 'Programming Language :: Python :: 3.12',
50 | 'Programming Language :: Python :: 3.13',
51 | 'Topic :: Text Processing',
52 | 'Topic :: Utilities',
53 | ],
54 | packages=['mathematica'],
55 | install_requires=['Pygments >= 2.19'],
56 | include_package_data=False,
57 | platforms=['any'],
58 | entry_points={
59 | 'pygments.lexers': [
60 | 'MathematicaLexer = mathematica:MathematicaLexer'
61 | ],
62 | 'pygments.styles': [
63 | 'mathematica = mathematica:MathematicaStyle',
64 | 'mathematicanotebook = mathematica:MathematicaNotebookStyle'
65 | ],
66 | },
67 | zip_safe=False
68 | )
69 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2016 rsmenon
3 | # Licensed under the MIT License (https://opensource.org/licenses/MIT)
4 |
--------------------------------------------------------------------------------
/tests/test_lexer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) 2016 rsmenon
3 | # Licensed under the MIT License (https://opensource.org/licenses/MIT)
4 |
5 | from nose.tools import assert_equal
6 | from pygments.token import Token
7 |
8 | import mathematica.builtins as mma
9 | from mathematica.lexer import MathematicaLexer, MToken
10 |
11 |
12 | class TestMathematicaLexer:
13 | def setup(self):
14 | self.lexer = MathematicaLexer()
15 |
16 | def verify(self, code, expected):
17 | expected.append((Token.Text.Whitespace, '\n'))
18 | returned = list(self.lexer.get_tokens(code))
19 | assert_equal(expected, returned)
20 |
21 | def verify_all(self, code_list, expected_list):
22 | for code, expected in zip(code_list, expected_list):
23 | self.verify(code, expected)
24 |
25 | def test_comments(self):
26 | code = '(* a comment *)'
27 | expected = [
28 | (MToken.COMMENT, '(*'),
29 | (MToken.COMMENT, ' a comment '),
30 | (MToken.COMMENT, '*)')
31 | ]
32 | self.verify(code, expected)
33 |
34 | def test_comments_with_code(self):
35 | code = '(* Plot[Sin[x], {x, 0, 2 Pi}] *)'
36 | expected = [
37 | (MToken.COMMENT, '(*'),
38 | (MToken.COMMENT, ' Plot[Sin[x], {x, 0, 2 Pi}] '),
39 | (MToken.COMMENT, '*)')
40 | ]
41 | self.verify(code, expected)
42 |
43 | def test_nested_comments(self):
44 | code = '(* foo (* bar *) baz *)'
45 | expected = [
46 | (MToken.COMMENT, '(*'),
47 | (MToken.COMMENT, ' foo '),
48 | (MToken.COMMENT, '(*'),
49 | (MToken.COMMENT, ' bar '),
50 | (MToken.COMMENT, '*)'),
51 | (MToken.COMMENT, ' baz '),
52 | (MToken.COMMENT, '*)'),
53 | ]
54 | self.verify(code, expected)
55 |
56 | def test_multiline_comment(self):
57 | code = '(* a comment\non two lines *)'
58 | expected = [
59 | (MToken.COMMENT, '(*'),
60 | (MToken.COMMENT, ' a comment\non two lines '),
61 | (MToken.COMMENT, '*)'),
62 | ]
63 | self.verify(code, expected)
64 |
65 | def test_strings(self):
66 | code = [
67 | '"a string"',
68 | '"a string \\" with a quote"',
69 | '"a string with a newline\\n"',
70 | '"a string with \\ two backslashes"',
71 | ]
72 | expected = [
73 | [
74 | (MToken.STRING, '"'),
75 | (MToken.STRING, 'a string'),
76 | (MToken.STRING, '"'),
77 | ],
78 | [
79 | (MToken.STRING, '"'),
80 | (MToken.STRING, 'a string \\" with a quote'),
81 | (MToken.STRING, '"'),
82 | ],
83 | [
84 | (MToken.STRING, '"'),
85 | (MToken.STRING, 'a string with a newline\\n'),
86 | (MToken.STRING, '"'),
87 | ],
88 | [
89 | (MToken.STRING, '"'),
90 | (MToken.STRING, 'a string with \\ two backslashes'),
91 | (MToken.STRING, '"'),
92 | ]
93 | ]
94 | self.verify_all(code, expected)
95 |
96 | def test_integers(self):
97 | code = '123'
98 | expected = [(MToken.NUMBER, '123')]
99 | self.verify(code, expected)
100 |
101 | def test_floats(self):
102 | code = ['1.23', '10.1', '.123']
103 | expected = [[(MToken.NUMBER, num)] for num in code]
104 | self.verify_all(code, expected)
105 |
106 | def test_precision_numbers(self):
107 | code = ['1`', '1.2`', '1.23`30', '20`20']
108 | expected = [[(MToken.NUMBER, num)] for num in code]
109 | self.verify_all(code, expected)
110 |
111 | def test_base_numbers(self):
112 | code = ['2^^101', '8 ^^ 17', '10^^ 3.4']
113 | expected = [[(MToken.NUMBER, num)] for num in code]
114 | self.verify_all(code, expected)
115 |
116 | def test_scientific_number(self):
117 | code = ['1*^3', '2 *^23', '1.23*^4']
118 | expected = [[(MToken.NUMBER, num)] for num in code]
119 | self.verify_all(code, expected)
120 |
121 | def test_patterns(self):
122 | code = [
123 | '_Head', '__Head', '___Head',
124 | 'x_Head', 'x__Head', 'x___Head',
125 | 'Foo`Bar_Head', 'Foo`Bar__Integer', 'Foo`Bar___Baz',
126 | 'Foo`Bar_Ctx`Baz', 'Foo`Bar__Ctx`Baz', 'Foo`Bar___Ctx`Baz`Qux',
127 | ]
128 | expected = [[(MToken.PATTERN, pat)] for pat in code]
129 | self.verify_all(code, expected)
130 |
131 | def test_slots(self):
132 | code = ['#', '#1', '#234']
133 | expected = [[(MToken.SLOT, st)] for st in code]
134 | self.verify_all(code, expected)
135 |
136 | def test_slot_sequences(self):
137 | code = ['##', '##2', '##23']
138 | expected = [[(MToken.SLOT, st)] for st in code]
139 | self.verify_all(code, expected)
140 |
141 | def test_association_slots(self):
142 | code = ['#foo', '#"foo"', '#foo`bar', '#Foo$1`Bar2$']
143 | expected = [[(MToken.SLOT, st)] for st in code]
144 | self.verify_all(code, expected)
145 |
146 | def test_operators(self):
147 | code = mma.OPERATORS
148 | expected = [[(MToken.OPERATOR, op)] for op in code]
149 | self.verify_all(code, expected)
150 |
151 | def test_messages(self):
152 | code = ['General::foo', 'Foo::bar', 'Foo`Bar::baz']
153 | expected = [
154 | [
155 | (MToken.BUILTIN, 'General'),
156 | (MToken.OPERATOR, '::'),
157 | (MToken.MESSAGE, 'foo')
158 | ],
159 | [
160 | (MToken.SYMBOL, 'Foo'),
161 | (MToken.OPERATOR, '::'),
162 | (MToken.MESSAGE, 'bar')
163 | ],
164 | [
165 | (MToken.SYMBOL, 'Foo`Bar'),
166 | (MToken.OPERATOR, '::'),
167 | (MToken.MESSAGE, 'baz')
168 | ],
169 | ]
170 | self.verify_all(code, expected)
171 |
172 | def test_symbols(self):
173 | code = ['foo', 'Foo', 'camelCase', 'Context`symbol', '`symbol', '$foo`bar', '$Bar`Baz`Qux']
174 | expected = [[(MToken.SYMBOL, sym)] for sym in code]
175 | self.verify_all(code, expected)
176 |
177 | def test_get(self):
178 | code = ['<'),
339 | (MToken.NUMBER, '1'),
340 | (MToken.GROUP, ','),
341 | (MToken.SYMBOL, 'b'),
342 | (MToken.OPERATOR, '->'),
343 | (MToken.NUMBER, '2'),
344 | (MToken.GROUP, '|>'),
345 | (MToken.GROUP, '}'),
346 | (MToken.GROUP, ','),
347 | (MToken.LOCAL_SCOPE, 'y'),
348 | (MToken.GROUP, ']'),
349 | (MToken.GROUP, ','),
350 | (MToken.LOCAL_SCOPE, 'z'),
351 | (MToken.OPERATOR, '='),
352 | (MToken.BUILTIN, 'With'),
353 | (MToken.GROUP, '['),
354 | (MToken.GROUP, '{'),
355 | (MToken.LOCAL_SCOPE, 'k'),
356 | (MToken.OPERATOR, '='),
357 | (MToken.GROUP, '{'),
358 | (MToken.NUMBER, '1'),
359 | (MToken.GROUP, ','),
360 | (MToken.NUMBER, '2'),
361 | (MToken.GROUP, '}'),
362 | (MToken.GROUP, '}'),
363 | (MToken.GROUP, ','),
364 | (MToken.LOCAL_SCOPE, 'k'),
365 | (MToken.OPERATOR, '*'),
366 | (MToken.NUMBER, '3'),
367 | (MToken.GROUP, ']'),
368 | (MToken.GROUP, '}'),
369 | (MToken.GROUP, ','),
370 | (MToken.WHITESPACE, ' '),
371 | (MToken.LOCAL_SCOPE, 'x'),
372 | (MToken.OPERATOR, '+'),
373 | (MToken.SYMBOL, 'y'),
374 | (MToken.OPERATOR, '*'),
375 | (MToken.BUILTIN, 'Block'),
376 | (MToken.GROUP, '['),
377 | (MToken.GROUP, '{'),
378 | (MToken.LOCAL_SCOPE, 'k'),
379 | (MToken.OPERATOR, '='),
380 | (MToken.NUMBER, '3'),
381 | (MToken.GROUP, '}'),
382 | (MToken.GROUP, ','),
383 | (MToken.SYMBOL, 'f'),
384 | (MToken.GROUP, '['),
385 | (MToken.LOCAL_SCOPE, 'k'),
386 | (MToken.GROUP, ']'),
387 | (MToken.GROUP, ']'),
388 | (MToken.GROUP, ']'),
389 | ]
390 | self.verify(code, expected)
391 |
392 | def test_string_closing_quote_on_newline(self):
393 | code = '"test string\n"abc'
394 | expected = [
395 | (MToken.STRING, '"'),
396 | (MToken.STRING, 'test string\n'),
397 | (MToken.STRING, '"'),
398 | (MToken.SYMBOL, 'abc'),
399 | ]
400 | self.verify(code, expected)
401 |
402 | def test_unicode_greek(self):
403 | code = [
404 | 'varλ1a',
405 | 'Δ',
406 | 'f[Δx_List] := Δx',
407 | 'a∂_',
408 | ]
409 | expected = [
410 | [(MToken.SYMBOL, 'varλ1a')],
411 | [(MToken.SYMBOL, 'Δ')],
412 | [
413 | (MToken.SYMBOL, 'f'),
414 | (MToken.GROUP, '['),
415 | (MToken.PATTERN, 'Δx_List'),
416 | (MToken.GROUP, ']'),
417 | (MToken.WHITESPACE, ' '),
418 | (MToken.OPERATOR, ':='),
419 | (MToken.WHITESPACE, ' '),
420 | (MToken.SYMBOL, 'Δx'),
421 | ],
422 | [(MToken.PATTERN, 'a∂_')],
423 | ]
424 | self.verify_all(code, expected)
--------------------------------------------------------------------------------