├── .gitignore
├── README.md
├── abstractions.py
├── dom_intervals.py
├── dom_signs.py
├── sem.py
├── sem_abs.py
└── tinyast.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .mypy_cache
3 | 
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![License: CC0-1.0](https://licensebuttons.net/l/zero/1.0/80x15.png)](http://creativecommons.org/publicdomain/zero/1.0/)
 2 | 
 3 | # Simple Abstract Interpreter
 4 | 
 5 | This is a simple abstract interpreter for the language described in
 6 | Chapter 3 of Rival and Yi, "[Introduction to Static Analysis: An
 7 | Abstract Interpretation
 8 | Perspective](https://mitpress.mit.edu/books/introduction-static-analysis)",
 9 | MIT Press, 2020. The implementation follows that description fairly
10 | closely. The book presents an OCaml based abstract interpreter in
11 | Chapter 7, but this interpreter is not influenced by that.
12 | 
13 | It implements a Signs value abstraction and an (Integer) Intervals
14 | value abstraction.
15 | 
16 | There is a full AST, a full concrete interpreter, and a full abstract
17 | interpreter. Note abstract implementations of operators is not yet
18 | complete for the two value abstractions.
19 | 
20 | The source code also uses type annotations, for use with `mypy`. This
21 | is not complete.
22 | 
23 | ## Course Website
24 | 
25 | This code accompanies Lectures [17](https://www.cs.rochester.edu/~sree/courses/csc-255-455/spring-2020/static/17-pa-ai.pdf), [18](https://www.cs.rochester.edu/~sree/courses/csc-255-455/spring-2020/static/18-ai.pdf) and [19](https://www.cs.rochester.edu/~sree/courses/csc-255-455/spring-2020/static/19-ai-3.pdf) of the [Spring 2020 edition of CSC255/455 Software Analysis and Improvement](https://www.cs.rochester.edu/~sree/courses/csc-255-455/spring-2020/) taught at the University of Rochester.
26 | 
27 | ## Non-simple Abstract Interpreters
28 | 
29 | Here is an incomplete list of abstract interpreters built for research or production use:
30 | 
31 |   1. [Astrée](http://www.astree.ens.fr/)
32 |   2. [Infer](https://fbinfer.com/)
33 |   3. [SPARTA](https://github.com/facebookincubator/SPARTA)
34 |   4. [IKOS](https://github.com/NASA-SW-VnV/ikos)
35 |   5. [Crab](https://github.com/seahorn/crab)
36 |   6. [MIRAI](https://github.com/facebookexperimental/MIRAI) (Rust-only)
37 | 


--------------------------------------------------------------------------------
/abstractions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # abstractions.py
 4 | #
 5 | # Implements abstractions over memory using value abstractions.
 6 | #
 7 | # Author: Sreepathi Pai
 8 | #
 9 | # Written for CSC2/455 Spring 2020
10 | #
11 | # To the extent possible under law, Sreepathi Pai has waived all
12 | # copyright and related or neighboring rights to abstractions.py. This
13 | # work is published from: United States.
14 | 
15 | from dom_intervals import IntervalsDomain, IntervalPoint
16 | from dom_signs import SignsDomain
17 | import logging
18 | 
19 | logger = logging.getLogger(__name__)
20 | 
21 | class NonRelationalAbstraction(object):
22 |     def __init__(self, domain):
23 |         self.dom = domain
24 | 
25 |     # construct an abstraction for a set of memories
26 |     def phi(self, M):
27 |         m_accum = {}
28 | 
29 |         for m in M:
30 |             m_abs = {}
31 |             for x in m:
32 |                 m_abs[x] = self.dom.phi(m[x])
33 | 
34 |             if len(m_accum) == 0:
35 |                 m_accum = m_abs
36 |             else:
37 |                 m_accum = self.union(m_accum, m_abs)
38 | 
39 | 
40 |         # also construct BOT
41 |         self.BOT = {}
42 |         for x in m_accum:
43 |             self.BOT[x] = self.dom.BOT
44 | 
45 |         return m_accum
46 | 
47 |     def lte(self, M0_abs, M1_abs):
48 |         for x in M0_abs:
49 |             if not self.dom.lte(M0_abs[x], M1_abs[x]): return False
50 | 
51 |         return True
52 | 
53 |     def union(self, m0, m1):
54 |         m = {}
55 |         for x in m0:
56 |             m[x] = self.dom.lub(m0[x], m1[x])
57 |             logger.debug(f"union: {m0[x]} U {m1[x]} = {m[x]}")
58 | 
59 |         return m
60 | 
61 |     def widen(self, m0, m1):
62 |         m = {}
63 |         for x in m0:
64 |             m[x] = self.dom.widen(m0[x], m1[x])
65 | 
66 |         return m
67 | 
68 |     # convenience function
69 |     def included(self, M_conc, M_abs):
70 |         M_c_abs = self.phi(M_conc)
71 |         return self.lte(M_c_abs, M_abs)
72 | 
73 | 
74 | def test_NonRelationalAbstraction():
75 |     nra = NonRelationalAbstraction(IntervalsDomain())
76 | 
77 |     M = [{'x': 25, 'y': 7, 'z': -12},
78 |          {'x': 28, 'y': -7, 'z': -11},
79 |          {'x': 20, 'y': 0, 'z': -10},
80 |          {'x': 35, 'y': 8, 'z': -9}]
81 | 
82 |     print(nra.phi(M))
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     test_NonRelationalAbstraction()
87 | 
88 | 


--------------------------------------------------------------------------------
/dom_intervals.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # dom_intervals.py
  4 | #
  5 | # An implementation of the intervals value abstraction. Assumes values
  6 | # are integers.
  7 | #
  8 | # Author: Sreepathi Pai
  9 | #
 10 | # Written for CSC2/455 Spring 2020
 11 | #
 12 | # To the extent possible under law, Sreepathi Pai has waived all
 13 | # copyright and related or neighboring rights to
 14 | # dom_intervals.py. This work is published from: United States.
 15 | #
 16 | # Note: this is still incomplete, and should throw NotImplementedErrors
 17 | #
 18 | 
 19 | import logging
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | class IntervalPoint(object):
 24 |     PINF = "+inf"
 25 |     NINF = "-inf"
 26 | 
 27 |     def __init__(self, pt):
 28 |         self.pt = pt
 29 | 
 30 |     def __eq__(self, other):
 31 |         # this equates infinity, which should be okay
 32 |         if isinstance(other, IntervalPoint):
 33 |             return other.pt == self.pt
 34 |         else:
 35 |             raise NotImplementedError(other)
 36 | 
 37 |     def __lt__(self, other: 'IntervalPoint'):
 38 |         if self.pt == self.PINF:
 39 |             # +inf, -inf/F; +inf, n/F; +inf, +inf/F
 40 |             return False
 41 | 
 42 |         if self.pt == self.NINF:
 43 |             # -inf, -inf/F; -inf, n/T; -inf, +inf/T
 44 |             return other.pt != self.NINF
 45 | 
 46 |         if other.pt == self.NINF:
 47 |             # n, -inf/F
 48 |             return False
 49 | 
 50 |         if other.pt == self.PINF:
 51 |             # n, +inf/F
 52 |             return True
 53 | 
 54 |         return self.pt < other.pt
 55 | 
 56 |     def __le__(self, other: 'IntervalPoint'):
 57 |         if self.pt == self.PINF:
 58 |             return other.pt == self.pt # +inf == +inf
 59 | 
 60 |         if self.pt == self.NINF:
 61 |             return True  # -inf <= -inf, n, +inf
 62 | 
 63 |         if other.pt == self.NINF:
 64 |             return False 
 65 | 
 66 |         if other.pt == self.PINF:
 67 |             # _, +inf
 68 |             return True
 69 | 
 70 |         return self.pt <= other.pt
 71 | 
 72 |     def __gt__(self, other: 'IntervalPoint'):
 73 |         if self.pt == self.PINF:
 74 |             return other.pt != self.PINF
 75 | 
 76 |         if self.pt == self.NINF:
 77 |             return False
 78 | 
 79 |         if other.pt == self.NINF:
 80 |             return True
 81 | 
 82 |         if other.pt == self.PINF:
 83 |             return False
 84 | 
 85 |         return self.pt > other.pt
 86 | 
 87 |     def __ge__(self, other: 'IntervalPoint'):
 88 |         if self.pt == self.PINF:
 89 |             # +inf, +inf/T; +inf, n/T; +inf, -inf/T
 90 |             return True
 91 | 
 92 |         if self.pt == self.NINF:
 93 |             # -inf, -inf/T; -inf, n/F; -inf, +inf/F
 94 |             return other.pt == self.NINF
 95 | 
 96 |         if other.pt == self.NINF:
 97 |             # n, -inf;
 98 |             return True
 99 | 
100 |         if other.pt == self.PINF:
101 |             return False
102 | 
103 |         # n, m
104 |         return self.pt >= other.pt
105 | 
106 |     def __add__(self, o):
107 |         if isinstance(o, IntervalPoint):
108 |             n = o.pt
109 |         elif isinstance(o, int):
110 |             n = o
111 |         else:
112 |             raise NotImplementedError
113 | 
114 |         sadd = {(self.NINF, self.NINF): self.NINF,
115 |                 (self.NINF, self.PINF): None,  # undefined -inf + +inf
116 |                 (self.PINF, self.NINF): None,  # undefined: +inf + -inf
117 |                 (self.PINF, self.PINF): self.PINF,
118 |                 }
119 | 
120 |         if (self.pt, n) in sadd:
121 |             res = sadd[(self.pt, n)]
122 |             if res is None: raise ValueError(f"Addition not defined on {self.pt} and {n}")
123 |         else:
124 |             if isinstance(self.pt, int) and isinstance(n, int):
125 |                 res = self.pt + n
126 |             else:
127 |                 # at least one of them is an infinity
128 |                 if self.pt != self.NINF and self.pt != self.PINF:
129 |                     res = n    # n + -inf = -inf, n + +inf = +inf
130 |                 else:
131 |                     res = self.pt # -inf - n = -inf, +inf - n = +inf
132 | 
133 |         return IntervalPoint(res)
134 | 
135 |     def __sub__(self, o):
136 |         if isinstance(o, IntervalPoint):
137 |             n = o.pt
138 |         elif isinstance(o, int):
139 |             n = o
140 |         else:
141 |             raise NotImplementedError
142 | 
143 |         sadd = {(self.NINF, self.NINF): None,  # undefined -inf - -inf = -inf + +inf
144 |                 (self.NINF, self.PINF): self.NINF,  # -inf - +inf
145 |                 (self.PINF, self.NINF): self.PINF,  # +inf - -inf
146 |                 (self.PINF, self.PINF): None,   # undefined +inf - +inf
147 |         }
148 | 
149 |         if (self.pt, n) in sadd:
150 |             res = sadd[(self.pt, n)]
151 |             if res is None: raise ValueError(f"Subtraction not defined on {self.pt} and {n}")
152 |         else:
153 |             if isinstance(self.pt, int) and isinstance(n, int):
154 |                 res = self.pt - n
155 |             else:
156 |                 # at least one of them is an infinity
157 |                 if self.pt != self.NINF and self.pt != self.PINF:
158 |                     res = n    # n - -inf = -inf, n - +inf = +inf
159 |                 else:
160 |                     res = self.pt # -inf - n = -inf, +inf - n = +inf
161 |         return IntervalPoint(res)
162 | 
163 |     def __str__(self):
164 |         return f"{self.pt}"
165 | 
166 |     __repr__ = __str__
167 | 
168 | # TODO: Define an Interval type
169 | 
170 | class IntervalsDomain(object):
171 |     PINF = IntervalPoint(IntervalPoint.PINF)
172 |     NINF = IntervalPoint(IntervalPoint.NINF)
173 |     BOT = "BOT"
174 |     TOP = (NINF, PINF)
175 |     finite_height = False
176 | 
177 |     def phi(self, v: int):
178 |         """Returns an abstract element for a concrete element"""
179 |         return (IntervalPoint(v), IntervalPoint(v)) # this is the math interval [v, v]
180 | 
181 |     # a best abstraction exists and is equal to phi
182 |     alpha = phi
183 | 
184 |     def _norm(self, av):
185 |         if isinstance(av, tuple):
186 |             if av[1] == self.NINF: return self.BOT #  ..., -inf)
187 |             if av[0] == self.PINF: return self.BOT #  (+inf, ...
188 | 
189 |             if av[0] > av[1]: return self.BOT
190 | 
191 |         return av
192 | 
193 |     def refine(self, l, r):
194 |         l = self._norm(l)
195 |         r = self._norm(r)
196 | 
197 |         if l == self.BOT: return r
198 |         if r == self.BOT: return l
199 | 
200 |         new_start = max(l[0], r[0])
201 |         new_end = min(l[1], r[1])
202 | 
203 |         return self._norm((new_start, new_end))
204 | 
205 |     # it helps to think of abstract elements as sets, with lte
206 |     # denoting set inclusion. So we're asking, is x included in y?
207 |     def lte(self, x, y):
208 |         # bot is always less than everything else
209 |         # empty set {} is always included
210 |         x = self._norm(x)
211 |         y = self._norm(y)
212 | 
213 |         if x is self.BOT: return True
214 |         if y is self.BOT: return False
215 | 
216 |         # top is only lte
217 |         # top is all possible values, so it is only included in itself
218 |         if x == self.TOP:
219 |             return y == self.TOP
220 | 
221 |         # check if x is included in y
222 |         if x[0] >= y[0] and x[1] <= y[1]:
223 |             return True
224 | 
225 |         return False
226 | 
227 |     def lub(self, x, y):
228 |         '''Least upper bound, the smallest set that includes both x and y'''
229 |         x = self._norm(x)
230 |         y = self._norm(y)
231 | 
232 |         if self.lte(x, y): return y # y includes x
233 |         if self.lte(y, x): return x # x includes y
234 | 
235 |         # note neither x nor y can be BOT at this point
236 | 
237 |         new_left = min(x[0], y[0])
238 |         new_right = max(x[1], y[1])
239 | 
240 |         return (new_left, new_right)
241 | 
242 |     def widen(self, x, y):
243 |         logger.debug(f"widen({x}, {y}")
244 | 
245 |         # assume x is previous and y is current
246 | 
247 |         # compute union
248 |         u = self.lub(x, y)
249 |         logger.debug(f"widen: u: {u}")
250 | 
251 |         if u[0] == x[0]:
252 |             # stationary left
253 |             return (u[0], u[1] if u[1] == x[1] else self.PINF)
254 |         elif u[1] == x[1]:
255 |             # stationary right
256 |             return (u[0] if u[0] == x[0] else self.NINF, u[1])
257 |         else:
258 |             return u
259 | 
260 |         assert False
261 | 
262 |     def f_binop(self, op, left, right):
263 |         def add(x, y):
264 |             return (x[0] + y[0], x[1] + y[1])
265 | 
266 |         def sub(x, y):
267 |             a = x[0] - y[1]   # smallest of first interval - largest of second interval
268 |             b = x[1] - y[0]   # largest of first interval - smallest of second interval
269 | 
270 |             assert a <= b, f"{a}, {b}"
271 |             return (a, b)
272 | 
273 |         def carry_out_op(op):
274 |             if left == self.BOT or right == self.BOT:
275 |                 return self.BOT
276 | 
277 |             return op(left, right)
278 | 
279 |         left = self._norm(left)
280 |         right = self._norm(right)
281 | 
282 |         if op == '+':
283 |             return carry_out_op(lambda x, y: add(x, y))
284 |         elif op == '-':
285 |             return carry_out_op(lambda x, y: sub(x, y))
286 |         else:
287 |             raise NotImplementedError(f'Operator {op}')
288 | 
289 |     def f_cmpop(self, op, left, c):
290 |         left = self._norm(left)
291 |         c = self._norm(c)
292 | 
293 |         # assume integers
294 |         if op == '<':
295 |             return (self.NINF, c[0] - 1), (c[0], self.PINF)
296 |         elif op == '<=':
297 |             return (self.NINF, c[0]), (c[0] + 1, self.PINF)
298 |         elif op == '>':
299 |             return (c[0] + 1, self.PINF), (self.NINF, c[0])
300 |         elif op == '>=':
301 |             return (c[0], self.PINF), (self.NINF, c[0] - 1)
302 |         else:
303 |             raise NotImplementedError(f'Operator {op}')
304 | 
305 | def test_IntervalPoint():
306 |     x = IntervalPoint(5)
307 |     y = IntervalPoint(6)
308 |     pinf = IntervalPoint(IntervalPoint.PINF)
309 |     ninf = IntervalPoint(IntervalPoint.NINF)
310 | 
311 |     assert not pinf < y
312 |     assert ninf < x
313 |     assert ninf < pinf
314 |     assert ninf <= ninf
315 |     assert y < pinf
316 | 
317 |     assert pinf > x
318 |     assert y > x
319 | 
320 |     assert min(ninf, x) == ninf
321 |     assert max(y, pinf) == pinf
322 | 
323 | 
324 | if __name__ == "__main__":
325 |     test_IntervalPoint()
326 | 


--------------------------------------------------------------------------------
/dom_signs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # dom_signs.py
  4 | #
  5 | # Implementation of a signs value abstraction.
  6 | #
  7 | # Author: Sreepathi Pai
  8 | #
  9 | # To the extent possible under law, Sreepathi Pai has waived all
 10 | # copyright and related or neighboring rights to dom_signs.py. This
 11 | # work is published from: United States.
 12 | #
 13 | # Note: This is still incomplete, but should throw NotImplementedErrors
 14 | #
 15 | import logging
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | #TODO: this is really a static class/enum?
 20 | class SignsDomain(object):
 21 |     LTZ = "[<= 0]"
 22 |     GTZ = "[>= 0]"
 23 |     EQZ = "[= 0]"
 24 |     TOP = "TOP"
 25 |     BOT = "BOT"
 26 |     finite_height = True
 27 | 
 28 |     def phi(self, v: int):
 29 |         """Returns an abstract element for a concrete element"""
 30 |         if v == 0:
 31 |             return self.EQZ
 32 |         elif v > 0:
 33 |             return self.GTZ
 34 |         elif v < 0:
 35 |             return self.LTZ
 36 |         else:
 37 |             raise ValueError(f"Unknown value for signs abstraction {v}")
 38 | 
 39 |     # a best abstraction exists and is equal to phi
 40 |     alpha = phi
 41 | 
 42 |     # it helps to think of abstract elements as sets, with lte
 43 |     # denoting set inclusion. So we're asking, is x included in y?
 44 |     def lte(self, x, y):
 45 |         # bot is always less than everything else
 46 |         # empty set {} is always included
 47 |         if x == self.BOT: return True
 48 | 
 49 |         # top is only lte
 50 |         # top is all possible values, so it is only included in itself
 51 |         if x == self.TOP:
 52 |             if y != self.TOP: return False
 53 |             return True
 54 | 
 55 |         # eqz is the set {0}, which is included in all sets (>=0, <=0) except {}
 56 |         if x == self.EQZ:
 57 |             if y == self.BOT: return False
 58 |             return True
 59 | 
 60 |         if x == self.LTZ or x == self.GTZ:
 61 |             if y == x: return True
 62 |             if y == self.TOP: return True
 63 | 
 64 |             # these sets are not included in {0} or {} or {>=0} [resp. {<=0}]
 65 |             return False
 66 | 
 67 |     def lub(self, x, y):
 68 |         '''Least upper bound, the smallest set that includes both x and y'''
 69 | 
 70 |         if self.lte(x, y): return y # y includes x
 71 |         if self.lte(y, x): return x # x includes y
 72 | 
 73 |         # if incomparable, then we return T
 74 |         return self.TOP
 75 | 
 76 |     def f_binop(self, op, left, right):
 77 |         if op == '+':
 78 |             return self.lub(left, right)
 79 |         elif op == '*':
 80 |             if left != right:
 81 |                 return self.lub(left, right)
 82 |             elif left == self.LTZ:
 83 |                 return self.GTZ  # - * - = +
 84 |             elif left == self.GTZ:
 85 |                 return self.GTZ  # + * + = +
 86 |         elif op == '-':
 87 |             if left == right:
 88 |                 if left != self.EQZ and left != self.BOT:
 89 |                     return self.TOP
 90 | 
 91 |                 return left # {0} - {0} => {0}, {} - {} => {}
 92 |             else:
 93 |                 return left   # {+ve} - {-ve} => positive, {-ve} - {+ve} => {-ve}
 94 | 
 95 |         else:
 96 |             raise NotImplementedError(f'Operator {op}')
 97 | 
 98 |     def refine(self, l, r):
 99 |         if self.lte(l, r): return l
100 |         if self.lte(r, l): return r
101 | 
102 |         return self.TOP
103 | 
104 |     def f_cmpop(self, op, left, c):
105 |         # (abst of c, op) : (variable's true domain, variables false domain)
106 |         abs_results = {(self.EQZ, '<'): (self.LTZ, self.GTZ),
107 |                        (self.EQZ, '<='): (self.LTZ, self.GTZ),
108 |                        (self.EQZ, '>'): (self.GTZ, self.LTZ),
109 |                        (self.EQZ, '>='): (self.GTZ, self.LTZ),
110 |                        (self.EQZ, '!='): (self.TOP, self.EQZ),
111 | 
112 |                        (self.GTZ, '>'): (self.GTZ, self.TOP),
113 |                        (self.GTZ, '<'): (self.TOP, self.GTZ),
114 |                        (self.GTZ, '<='): (self.TOP, self.GTZ),
115 |                        (self.GTZ, '>='): (self.GTZ, self.TOP),
116 |                        }
117 | 
118 |         key = (c, op)
119 |         if key not in abs_results:
120 |             raise NotImplementedError(f"{key} not implemented")
121 | 
122 |         return abs_results[key]
123 | 


--------------------------------------------------------------------------------
/sem.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # sem.py
  4 | #
  5 | # An implementation of the concrete semantics, including an
  6 | # interpreter
  7 | #
  8 | # Author: Sreepathi Pai
  9 | #
 10 | # Written for CSC2/455 Spring 2020
 11 | #
 12 | # To the extent possible under law, Sreepathi Pai has waived all
 13 | # copyright and related or neighboring rights to sem.py. This work
 14 | # is published from: United States.
 15 | 
 16 | from typing import Dict, List
 17 | from tinyast import *
 18 | import random
 19 | import logging
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | # map of variables (here str, instead of Var) -> values
 24 | #TODO: we could use var if we defined hash to be on the name of Var?
 25 | Memory = Dict[str, int]
 26 | 
 27 | def f_binop(op: BinaryOps, left: Scalar, right: Scalar) -> Scalar:
 28 |     if op == '+':
 29 |         return left + right
 30 |     elif op == '-':
 31 |         return left - right
 32 |     elif op == '*':
 33 |         return left * right
 34 |     elif op == '/':
 35 |         return left // right
 36 |     else:
 37 |         raise NotImplementedError(f"Unknown operator: {op}")
 38 | 
 39 | def f_cmpop(op: ComparisonOps, left: Scalar, right: Scalar) -> bool:
 40 |     if op == '<':
 41 |         return left < right
 42 |     elif op == '>':
 43 |         return left > right
 44 |     elif op == '<=':
 45 |         return left <= right
 46 |     elif op == '>=':
 47 |         return left >= right
 48 |     elif op == '!=':
 49 |         return left != right
 50 |     else:
 51 |         raise NotImplementedError(f"Unknown comparison operator: {op}")
 52 | 
 53 | def evaluate_Expr(E: Expr, m: Memory) -> Scalar:
 54 |     if isinstance(E, Scalar):
 55 |         return E
 56 |     elif isinstance(E, Var):
 57 |         return m[E.name]
 58 |     elif isinstance(E, BinOp):
 59 |         return f_binop(E.op,
 60 |                        evaluate_Expr(E.left, m),
 61 |                        evaluate_Expr(E.right, m))
 62 | 
 63 | 
 64 | def evaluate_BoolExpr(B: BoolExpr, m: Memory) -> bool:
 65 |     return f_cmpop(B.op, m[B.left.name], B.right)
 66 | 
 67 | def filter_memory(B: BoolExpr, M: List[Memory], res = True) -> List[Memory]:
 68 |     out = [m for m in M if evaluate_BoolExpr(B, m) == res]
 69 |     return list(out) #TODO: why materialize this generator?
 70 | 
 71 | 
 72 | def union_memories(M0: List[Memory], M1: List[Memory]) -> List[Memory]:
 73 |     # this is, of course, ridiculous
 74 | 
 75 |     # convert everything to sets
 76 |     M0_set = set([frozenset(m.items()) for m in M0])
 77 |     M1_set = set([frozenset(m.items()) for m in M1])
 78 | 
 79 |     M_set = M0_set.union(M1_set)
 80 | 
 81 |     # convert back to lists of dicts
 82 |     return list([dict(m) for m in M_set])
 83 | 
 84 | # M is a set of memory states, it belongs to Powerset(Memory)
 85 | # We're using List, because set would choke on Dict and we don't have a frozendict type...
 86 | def evaluate_Cmd(C: Cmd, M: List[Memory]) -> List[Memory]:
 87 |     def update_memories(var, value_lambda):
 88 |         out = []
 89 |         for m in M:
 90 |             # not sure using dicts is gaining us anything when we're copying dicts around...
 91 |             m_out = dict(m)
 92 |             m_out[var] = value_lambda(m)
 93 |             out.append(m_out)
 94 | 
 95 |         return out
 96 | 
 97 |     if isinstance(C, Skip):
 98 |         return M
 99 |     elif isinstance(C, Program):
100 |         return evaluate_Cmd(C.program, M)
101 |     elif isinstance(C, Assign):
102 |         return update_memories(C.left.name, lambda m: evaluate_Expr(C.right, m))
103 |     elif isinstance(C, Input):
104 |         n = random.randint(0, 100) # could be anything, actually
105 |         return update_memories(C.var.name, lambda _: n)
106 |     elif isinstance(C, Seq):
107 |         return evaluate_Cmd(C.cmd1, evaluate_Cmd(C.cmd0, M))
108 |     elif isinstance(C, IfThenElse):
109 |         then_memory = evaluate_Cmd(C.then_, filter_memory(C.cond, M))
110 |         else_memory = evaluate_Cmd(C.else_, filter_memory(C.cond, M, res = False))
111 | 
112 |         return union_memories(then_memory, else_memory)
113 |     elif isinstance(C, While):
114 |         # L0 but we apply filter at the end
115 |         out = [m for m in M] # copy all input states
116 | 
117 |         # the next loop computes L1, L2, L3, ....
118 |         # identify those memories where condition is true
119 | 
120 |         pre_iter_memories = filter_memory(C.cond, out)
121 |         accum: List[Memory] = []
122 |         while len(pre_iter_memories):
123 |             logger.debug(f"pre_iter_memories: {pre_iter_memories}")
124 |             after_iter_memories = evaluate_Cmd(C.body, pre_iter_memories)
125 |             logger.debug(f"after_iter_memories: {after_iter_memories}")
126 |             accum = union_memories(accum, after_iter_memories)
127 |             logger.debug(f"accum: {accum}")
128 | 
129 |             # only keep memories where the condition is true for the next iteration
130 |             pre_iter_memories = filter_memory(C.cond, after_iter_memories)
131 | 
132 |         # This computes L0 U (L1 U L2...) and retains only those memory states where the loop has
133 |         # terminated.
134 |         #
135 |         # we have exited the loop, so only keep those memories where condition is false
136 |         out = filter_memory(C.cond, union_memories(out, accum), res=False)
137 | 
138 |         return out
139 |     else:
140 |         raise NotImplementedError(f"Don't know how to interpret {type(C).__name__}({C})")
141 | 
142 | def test_evaluate_Expr():
143 |     x = Var('x')
144 |     y = Var('y')
145 | 
146 |     m = {'x': 5, 'y': 6}
147 | 
148 |     x1 = BinOp('+', x, y)
149 |     ex1 = evaluate_Expr(x1, m)
150 |     assert ex1 == 11, ex1
151 | 
152 | def test_evaluate_BoolExpr():
153 |     x = Var('x')
154 |     y = Var('y')
155 | 
156 |     m = {'x': 5, 'y': 6}
157 | 
158 |     b1 = BoolExpr('<', x, 6)
159 |     eb1 = evaluate_BoolExpr(b1, m)
160 |     assert eb1 == True, eb1
161 | 
162 | def test_evaluate_Cmd():
163 |     #TODO: actually put in asserts for testing. Right now, rely on visual inspection...
164 | 
165 |     x = Var('x')
166 |     y = Var('y')
167 | 
168 |     m1 = {'x': 5, 'y': 6}
169 |     m2 = {'x': 8, 'y': 7}
170 | 
171 |     M_in = [m1, m2]
172 | 
173 |     s = Program(Skip())
174 |     M_out = evaluate_Cmd(s, M_in)
175 |     print(M_out)
176 | 
177 |     pasgn = Program(Assign(x, 9))
178 |     M_out = evaluate_Cmd(pasgn, M_in)
179 |     print(M_out)
180 | 
181 |     pinput = Program(Input(y))
182 |     M_out = evaluate_Cmd(pinput, M_in)
183 |     print(M_out)
184 | 
185 |     pseq = Program(sequence([Assign(x, 10), Assign(y, 11)]))
186 |     M_out = evaluate_Cmd(pseq, M_in)
187 |     print(M_out)
188 | 
189 |     pite = Program(IfThenElse(BoolExpr('>', x, 7),
190 |                            Assign(y, BinOp('-', x, 7)),
191 |                            Assign(y, BinOp('-', 7, x))
192 |                            )
193 |                 )
194 |     M_out = evaluate_Cmd(pite, M_in)
195 |     print(M_out)
196 | 
197 |     ploop = Program(While(BoolExpr('<', x, 7),
198 |                           Seq(Assign(y, BinOp('-', y, 1)),
199 |                               Assign(x, BinOp('+', x, 1)))
200 |                     ))
201 |     M_out = evaluate_Cmd(ploop, M_in)
202 |     print(M_in, M_out)
203 | 
204 | def test_While():
205 |     x = Var('x')
206 |     y = Var('y')
207 | 
208 |     m1 = {x.name: 4, y.name: 0}
209 |     m2 = {x.name: 8, y.name: 0}
210 |     m3 = {x.name: 5, y.name: 0}
211 |     M_in = [m1, m2, m3]
212 |     print(M_in)
213 | 
214 |     p = Program(While(BoolExpr('<', x, 7),
215 |                       Seq(Assign(y, BinOp('+', y, 1)),
216 |                           Assign(x, BinOp('+', x, 1)))))
217 |     print(p)
218 |     M_out = evaluate_Cmd(p, M_in)
219 |     print(M_out)
220 | 
221 | if __name__ == "__main__":
222 |     logging.basicConfig(level = logging.DEBUG)
223 |     test_evaluate_Expr()
224 |     test_evaluate_BoolExpr()
225 |     test_evaluate_Cmd()
226 |     test_While()
227 | 


--------------------------------------------------------------------------------
/sem_abs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # sem_abs.py
  4 | #
  5 | # An implementation of the abstract semantics, including an abstract
  6 | # interpreter.
  7 | #
  8 | # Author: Sreepathi Pai
  9 | #
 10 | # Written for CSC2/455 Spring 2020
 11 | #
 12 | # To the extent possible under law, Sreepathi Pai has waived all
 13 | # copyright and related or neighboring rights to sem_abs.py. This work
 14 | # is published from: United States.
 15 | 
 16 | from typing import List, Dict, Union, Tuple
 17 | from tinyast import *
 18 | import random
 19 | import abstractions
 20 | import logging
 21 | 
 22 | from sem import evaluate_Cmd # for testing
 23 | 
 24 | Abstraction = Union[abstractions.NonRelationalAbstraction]
 25 | AbstractMemory = Dict[str, Abstraction]
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | def evaluate_Expr_abs(E: Expr, m: AbstractMemory, vabs):
 30 |     if isinstance(E, Scalar):
 31 |         return vabs.phi(E)
 32 |     elif isinstance(E, Var):
 33 |         return m[E.name]
 34 |     elif isinstance(E, BinOp):
 35 |         return vabs.f_binop(E.op,
 36 |                             evaluate_Expr_abs(E.left, m, vabs),
 37 |                             evaluate_Expr_abs(E.right, m, vabs))
 38 | 
 39 | def evaluate_BoolExpr_abs(B: BoolExpr, m: AbstractMemory, vabs):
 40 |     return vabs.f_cmpop(B.op, m[B.left.name], vabs.phi(B.right))
 41 | 
 42 | def filter_memory_abs(B: BoolExpr, M_abs: AbstractMemory, vabs) -> Tuple[AbstractMemory, AbstractMemory]:
 43 |     true_abs, false_abs = evaluate_BoolExpr_abs(B, M_abs, vabs)
 44 |     var_abs = M_abs[B.left.name]
 45 |     logger.debug(f"true: {true_abs}, false: {false_abs}, value: {var_abs}")
 46 | 
 47 |     true_abs = vabs.refine(var_abs, true_abs)
 48 |     logger.debug(f"refined true: {true_abs}")
 49 |     if true_abs != vabs.BOT:
 50 |         # may enter true part
 51 |         M_abs_true = dict(M_abs)
 52 |         M_abs_true[B.left.name] = true_abs
 53 |     else:
 54 |         M_abs_true = dict([(m, vabs.BOT) for m in M_abs])
 55 | 
 56 |     false_abs =  vabs.refine(var_abs, false_abs)
 57 |     logger.debug(f"refined false: {false_abs}")
 58 | 
 59 |     if false_abs != vabs.BOT:
 60 |         # may enter false part
 61 |         M_abs_false = dict(M_abs)
 62 |         M_abs_false[B.left.name] = false_abs
 63 |     else:
 64 |         M_abs_false = dict([(m, vabs.BOT) for m in M_abs])
 65 | 
 66 |     return M_abs_true, M_abs_false
 67 | 
 68 | def abs_iter(F_abs, M_abs, abstraction):
 69 |     R = M_abs
 70 |     logger.debug(f'M0: {R}')
 71 |     k = 1
 72 |     while True:
 73 |         T = R
 74 |         if abstraction.dom.finite_height:
 75 |             R = abstraction.union(R, F_abs(R))
 76 |         else:
 77 |             R = abstraction.widen(R, F_abs(R))
 78 | 
 79 |         logger.debug(f'M{k}: {R}')
 80 |         if R == T: break
 81 |         k = k + 1
 82 |         if k > 5: break
 83 | 
 84 |     return T
 85 | 
 86 | # M_abs is the abstract set of memory states
 87 | def evaluate_Cmd_abs(C: Cmd, M_abs: AbstractMemory, abstraction) -> AbstractMemory:
 88 |     def update_abs_memories(var, value_lambda):
 89 |         out = dict(M_abs)
 90 |         out[var] = value_lambda(M_abs)
 91 |         return out
 92 | 
 93 |     # C[BOT] -> BOT
 94 |     if M_abs == abstraction.BOT:
 95 |         return M_abs
 96 | 
 97 |     # the value abstraction
 98 |     v_abs = abstraction.dom
 99 | 
100 |     if isinstance(C, Skip):
101 |         return M_abs
102 |     elif isinstance(C, Program):
103 |         return evaluate_Cmd_abs(C.program, M_abs, abstraction)
104 |     elif isinstance(C, Assign):
105 |         return update_abs_memories(C.left.name, lambda m: evaluate_Expr_abs(C.right, m, v_abs))
106 |     elif isinstance(C, Input):
107 |         return update_abs_memories(C.var.name, lambda _: v_abs.TOP)
108 |     elif isinstance(C, Seq):
109 |         return evaluate_Cmd_abs(C.cmd1, evaluate_Cmd_abs(C.cmd0, M_abs, abstraction), abstraction)
110 |     elif isinstance(C, IfThenElse):
111 |         then_memory, else_memory = filter_memory_abs(C.cond, M_abs, v_abs)
112 |         logger.debug(f"ite: part-wise precondition: then: {then_memory}, else: {else_memory}")
113 |         then_memory = evaluate_Cmd_abs(C.then_, then_memory, abstraction)
114 |         else_memory = evaluate_Cmd_abs(C.else_, else_memory, abstraction)
115 | 
116 |         logger.debug(f"ite: part-wise postcondition: then: {then_memory}, else: {else_memory}")
117 |         ite_memory = abstraction.union(then_memory, else_memory)
118 | 
119 |         logger.debug(f"ite: postcondition: {ite_memory}")
120 |         return ite_memory
121 |     elif isinstance(C, While):
122 |         def F_abs(MM_abs):
123 |             pre_memory, _ = filter_memory_abs(C.cond, MM_abs, v_abs)
124 |             post_memory = evaluate_Cmd_abs(C.body, pre_memory, abstraction)
125 |             return post_memory
126 | 
127 |         _, out = filter_memory_abs(C.cond, abs_iter(F_abs, M_abs, abstraction), v_abs)
128 |         return out
129 |     else:
130 |         raise NotImplementedError(f"Don't know how to interpret {type(C).__name__}({C})")
131 | 
132 | def test_evaluate_Cmd_abs():
133 |     #TODO: actually put in asserts for testing. Right now, rely on visual inspection...
134 | 
135 |     x = Var('x')
136 |     y = Var('y')
137 | 
138 |     m1 = {'x': 5, 'y': 6}
139 |     m2 = {'x': 8, 'y': 7}
140 | 
141 |     nra_abs = abstractions.NonRelationalAbstraction(abstractions.IntervalsDomain())
142 |     M_in = [m1, m2]
143 |     M_in_abs = nra_abs.phi(M_in)
144 | 
145 |     s = Program(Skip())
146 | 
147 |     M_out_abs = evaluate_Cmd_abs(s, M_in_abs, nra_abs)
148 |     M_out = evaluate_Cmd(s, M_in)
149 | 
150 |     print(M_out, M_out_abs)
151 |     assert nra_abs.included(M_out, M_out_abs)
152 | 
153 |     pasgn = Program(Assign(x, 9))
154 |     M_out = evaluate_Cmd(pasgn, M_in)
155 |     M_out_abs = evaluate_Cmd_abs(pasgn, M_in_abs, nra_abs)
156 |     print(M_out, M_out_abs)
157 |     assert nra_abs.included(M_out, M_out_abs)
158 | 
159 |     pinput = Program(Input(y))
160 |     M_out = evaluate_Cmd(pinput, M_in)
161 |     M_out_abs = evaluate_Cmd_abs(pinput, M_in_abs, nra_abs)
162 |     print(M_out, M_out_abs)
163 |     assert nra_abs.included(M_out, M_out_abs)
164 | 
165 |     pseq = Program(sequence([Assign(x, BinOp('+', 10, 11)), Assign(y, 11)]))
166 |     M_out = evaluate_Cmd(pseq, M_in)
167 |     M_out_abs = evaluate_Cmd_abs(pseq, M_in_abs, nra_abs)
168 |     print(M_out, M_out_abs)
169 |     assert nra_abs.included(M_out, M_out_abs)
170 | 
171 |     print(M_in)
172 |     #M_in_abs = {'x': nra_abs.dom.TOP, 'y': nra_abs.dom.TOP}
173 |     print(M_in_abs)
174 |     pite = Program(IfThenElse(BoolExpr('>', x, 7),
175 |                            Assign(y, BinOp('-', x, 7)),
176 |                            Assign(y, BinOp('-', 7, x)),
177 |                            )
178 |                 )
179 |     M_out = evaluate_Cmd(pite, M_in)
180 |     M_out_abs = evaluate_Cmd_abs(pite, M_in_abs, nra_abs)
181 |     print(M_out, M_out_abs)
182 |     assert nra_abs.included(M_out, M_out_abs)
183 | 
184 |     print("loop_start")
185 |     print(M_in_abs)
186 |     ploop = Program(While(BoolExpr('<', x, 7),
187 |                           Seq(Assign(y, BinOp('-', y, 1)),
188 |                               Assign(x, BinOp('+', x, 1)))
189 |                     ))
190 |     print(ploop)
191 |     M_out = evaluate_Cmd(ploop, M_in)
192 |     M_out_abs = evaluate_Cmd_abs(ploop, M_in_abs, nra_abs)
193 |     print(M_in, M_out)
194 |     print(M_out, M_out_abs)
195 |     assert nra_abs.included(M_out, M_out_abs)
196 | 
197 | def test_ite_bot_abs():
198 |     x = Var('x')
199 |     y = Var('y')
200 | 
201 |     m1 = {'x': 5, 'y': 6}
202 |     m2 = {'x': 8, 'y': 7}
203 | 
204 |     nra_abs = abstractions.NonRelationalAbstraction(abstractions.IntervalsDomain())
205 |     M_in = [m1, m2]
206 |     M_in_abs = nra_abs.phi(M_in)
207 | 
208 |     pite = Program(IfThenElse(BoolExpr('>', x, 9),
209 |                               Assign(x, 10),
210 |                               Assign(y, BinOp('-', 7, x)),
211 |     )
212 |     )
213 | 
214 |     M_out = evaluate_Cmd(pite, M_in)
215 |     M_out_abs = evaluate_Cmd_abs(pite, M_in_abs, nra_abs)
216 | 
217 |     print(M_out, M_out_abs)
218 |     print(nra_abs.included(M_out, M_out_abs))
219 | 
220 | def test_infinite_loop_abs():
221 |     x = Var('x')
222 | 
223 |     nra_abs = abstractions.NonRelationalAbstraction(abstractions.IntervalsDomain())
224 |     M_in_abs = {'x': nra_abs.dom.TOP}
225 |     nra_abs.BOT = {'x': nra_abs.dom.BOT}
226 | 
227 |     ploop2 = Program(Seq(Assign(x, 0),
228 |                          While(BoolExpr('>=', x, 0),
229 |                                Assign(x, BinOp('+', x, 1)))
230 |     ))
231 |     print(ploop2)
232 |     M_out_abs = evaluate_Cmd_abs(ploop2, M_in_abs, nra_abs)
233 |     print(M_out_abs)
234 | 
235 | def test_infinite_loop_abs_2():
236 |     x = Var('x')
237 | 
238 |     nra_abs = abstractions.NonRelationalAbstraction(abstractions.IntervalsDomain())
239 |     M_in_abs = {'x': nra_abs.dom.TOP}
240 |     nra_abs.BOT = {'x': nra_abs.dom.BOT}
241 | 
242 |     ploop3 = Program(Seq(Assign(x, 0),
243 |                          While(BoolExpr('<=', x, 100),
244 |                                IfThenElse(BoolExpr('>=', x, 50),
245 |                                   Assign(x, 10),
246 |                                   Assign(x, BinOp('+', x, 1))
247 |                                ))
248 |     ))
249 |     print(ploop3)
250 |     M_out_abs = evaluate_Cmd_abs(ploop3, M_in_abs, nra_abs)
251 |     print(M_out_abs)
252 | 
253 | if __name__ == "__main__":
254 |     logging.basicConfig(level = logging.DEBUG)
255 |     test_ite_bot_abs()
256 |     test_infinite_loop_abs()
257 |     test_infinite_loop_abs_2()
258 |     test_evaluate_Cmd_abs()
259 | 


--------------------------------------------------------------------------------
/tinyast.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # tinyast.py
  4 | #
  5 | # An AST for the tiny language presented in Chapter 3 of Rival and Yi.
  6 | #
  7 | # Author: Sreepathi Pai
  8 | #
  9 | # Written for CSC2/455 Spring 2020
 10 | #
 11 | # To the extent possible under law, Sreepathi Pai has waived all
 12 | # copyright and related or neighboring rights to tinyast.py. This work
 13 | # is published from: United States.
 14 | 
 15 | from typing import Union
 16 | from typing_extensions import Literal
 17 | 
 18 | BinaryOps = Literal['+', '-', '*', '/']
 19 | ComparisonOps = Literal['<', '>', '==', '<=', '>=', '!=']
 20 | 
 21 | class Node(object):
 22 |     pass
 23 | 
 24 | class Var(Node):
 25 |     def __init__(self, name: str):
 26 |         self.name = name
 27 | 
 28 |     def __str__(self):
 29 |         return self.name
 30 | 
 31 | Scalar = int # restrict Scalars to ints in this implementation
 32 | Expr = Union[Scalar, Var, 'BinOp']
 33 | 
 34 | class BinOp(Node):
 35 |     def __init__(self, op: BinaryOps, left: Expr, right: Expr):
 36 |         self.op = op
 37 |         self.left = left
 38 |         self.right = right
 39 | 
 40 |     def __str__(self):
 41 |         return f"({str(self.left)} {self.op} {str(self.right)})"
 42 | 
 43 |     __repr__ = __str__
 44 | 
 45 | class BoolExpr(Node):
 46 |     def __init__(self, op: ComparisonOps, left: Var, right: Scalar):
 47 |         self.op = op
 48 |         self.left = left
 49 |         self.right = right
 50 | 
 51 |     def __str__(self):
 52 |         return f"{str(self.left)} {self.op} {str(self.right)}"
 53 | 
 54 |     __repr__ = __str__
 55 | 
 56 | class Cmd(Node):
 57 |     pass
 58 | 
 59 | class Skip(Cmd):
 60 |     def __init__(self):
 61 |         pass
 62 | 
 63 |     def __str__(self):
 64 |         return "skip"
 65 | 
 66 | class Seq(Cmd):
 67 |     def __init__(self, cmd0: Cmd, cmd1: Cmd):
 68 |         self.cmd0 = cmd0
 69 |         self.cmd1 = cmd1
 70 | 
 71 |     def __str__(self):
 72 |         return f"{str(self.cmd0)}; {str(self.cmd1)}"
 73 | 
 74 | class Assign(Cmd):
 75 |     def __init__(self, left: Var, right: Expr):
 76 |         self.left = left
 77 |         self.right = right
 78 | 
 79 |     def __str__(self):
 80 |         return f"{str(self.left)} := {str(self.right)}"
 81 | 
 82 | class Input(Cmd):
 83 |     def __init__(self, var: Var):
 84 |         self.var = var
 85 | 
 86 |     def __str__(self):
 87 |         return f"input({self.var})"
 88 | 
 89 | class IfThenElse(Cmd):
 90 |     def __init__(self, cond: BoolExpr, then_: Cmd, else_: Cmd):
 91 |         self.cond = cond
 92 |         self.then_ = then_
 93 |         self.else_ = else_
 94 | 
 95 |     def __str__(self):
 96 |         return f"if({str(self.cond)}) {{ {str(self.then_)} }} else {{ { str(self.else_) } }}"
 97 | 
 98 | class While(Cmd):
 99 |     def __init__(self, cond: BoolExpr, body: Cmd):
100 |         self.cond = cond
101 |         self.body = body
102 | 
103 |     def __str__(self):
104 |         return f"while({str(self.cond)}) {{ {str(self.body)} }}"
105 | 
106 | class Program(Node):
107 |     def __init__(self, cmd: Cmd):
108 |         self.program = cmd
109 | 
110 |     def __str__(self):
111 |         return f"{str(self.program)}"
112 | 
113 | # convenience function to turn a list into a sequence of cmds
114 | def sequence(l: list) -> Seq:
115 |     if len(l) == 0: raise ValueError("Can't convert an empty list into a Seq")
116 | 
117 |     if len(l) == 1: return Seq(l[0], Skip())
118 | 
119 |     if len(l) == 2: return Seq(l[0], l[1])
120 | 
121 |     return Seq(l[0], sequence(l[1:]))
122 | 
123 | def test_Program():
124 |     x = Var('x')
125 |     y = Var('y')
126 | 
127 |     t = Program(IfThenElse(BoolExpr('>', x, 7),
128 |                            Assign(y, BinOp('-', x, 7)),
129 |                            Assign(y, BinOp('-', 7, x))
130 |                            )
131 |                 )
132 |     print(t)
133 | 
134 | if __name__ == "__main__":
135 |     test_Program()
136 | 
137 | 


--------------------------------------------------------------------------------