├── README.md ├── clr.py ├── firstFollow.java └── firstfollow.py /README.md: -------------------------------------------------------------------------------- 1 | # CLR-Parser 2 | Run the file clr.py for making CLR table for grammar you provide and then parse a string for the same. 3 | 4 | Sample input : 5 |      6 | 7 | Example Grammar -\ 8 |          S->AA\ 9 |          A->aA\ 10 |          A->b\ 11 |          end 12 | 13 | Example Strings -\ 14 |          aaabab\ 15 |          bb\ 16 |          abb\ 17 |          bab 18 | 19 | # Running the parser 20 | 21 | 1.$python3 clr.py 22 | 23 | 2.Then enter the grammer, the grammer will be parsed and r-r, s-r conflicts will indicate that the grammar is not CLR. 24 | 25 | 3.Then enter the string to be parsed. 26 | -------------------------------------------------------------------------------- /clr.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | from collections import OrderedDict 3 | from pprint import pprint 4 | import firstfollow 5 | from firstfollow import production_list, nt_list as ntl, t_list as tl 6 | nt_list, t_list=[], [] 7 | 8 | class State: 9 | 10 | _id=0 11 | def __init__(self, closure): 12 | self.closure=closure 13 | self.no=State._id 14 | State._id+=1 15 | 16 | class Item(str): 17 | def __new__(cls, item, lookahead=list()): 18 | self=str.__new__(cls, item) 19 | self.lookahead=lookahead 20 | return self 21 | 22 | def __str__(self): 23 | return super(Item, self).__str__()+", "+'|'.join(self.lookahead) 24 | 25 | 26 | def closure(items): 27 | 28 | def exists(newitem, items): 29 | 30 | for i in items: 31 | if i==newitem and sorted(set(i.lookahead))==sorted(set(newitem.lookahead)): 32 | return True 33 | return False 34 | 35 | 36 | global production_list 37 | 38 | while True: 39 | flag=0 40 | for i in items: 41 | 42 | if i.index('.')==len(i)-1: continue 43 | 44 | Y=i.split('->')[1].split('.')[1][0] 45 | 46 | if i.index('.')+1') 54 | 55 | if head!=Y: continue 56 | 57 | newitem=Item(Y+'->.'+body, lastr) 58 | 59 | if not exists(newitem, items): 60 | items.append(newitem) 61 | flag=1 62 | if flag==0: break 63 | 64 | return items 65 | 66 | def goto(items, symbol): 67 | 68 | global production_list 69 | initial=[] 70 | 71 | for i in items: 72 | if i.index('.')==len(i)-1: continue 73 | 74 | head, body=i.split('->') 75 | seen, unseen=body.split('.') 76 | 77 | 78 | if unseen[0]==symbol and len(unseen) >= 1: 79 | initial.append(Item(head+'->'+seen+unseen[0]+'.'+unseen[1:], i.lookahead)) 80 | 81 | return closure(initial) 82 | 83 | 84 | def calc_states(): 85 | 86 | def contains(states, t): 87 | 88 | for s in states: 89 | if len(s) != len(t): continue 90 | 91 | if sorted(s)==sorted(t): 92 | for i in range(len(s)): 93 | if s[i].lookahead!=t[i].lookahead: break 94 | else: return True 95 | 96 | return False 97 | 98 | global production_list, nt_list, t_list 99 | 100 | head, body=production_list[0].split('->') 101 | 102 | 103 | states=[closure([Item(head+'->.'+body, ['$'])])] 104 | 105 | while True: 106 | flag=0 107 | for s in states: 108 | 109 | for e in nt_list+t_list: 110 | 111 | t=goto(s, e) 112 | if t == [] or contains(states, t): continue 113 | 114 | states.append(t) 115 | flag=1 116 | 117 | if not flag: break 118 | 119 | return states 120 | 121 | 122 | def make_table(states): 123 | 124 | global nt_list, t_list 125 | 126 | def getstateno(t): 127 | 128 | for s in states: 129 | if len(s.closure) != len(t): continue 130 | 131 | if sorted(s.closure)==sorted(t): 132 | for i in range(len(s.closure)): 133 | if s.closure[i].lookahead!=t[i].lookahead: break 134 | else: return s.no 135 | 136 | return -1 137 | 138 | def getprodno(closure): 139 | 140 | closure=''.join(closure).replace('.', '') 141 | return production_list.index(closure) 142 | 143 | SLR_Table=OrderedDict() 144 | 145 | for i in range(len(states)): 146 | states[i]=State(states[i]) 147 | 148 | for s in states: 149 | SLR_Table[s.no]=OrderedDict() 150 | 151 | for item in s.closure: 152 | head, body=item.split('->') 153 | if body=='.': 154 | for term in item.lookahead: 155 | if term not in SLR_Table[s.no].keys(): 156 | SLR_Table[s.no][term]={'r'+str(getprodno(item))} 157 | else: SLR_Table[s.no][term] |= {'r'+str(getprodno(item))} 158 | continue 159 | 160 | nextsym=body.split('.')[1] 161 | if nextsym=='': 162 | if getprodno(item)==0: 163 | SLR_Table[s.no]['$']='accept' 164 | else: 165 | for term in item.lookahead: 166 | if term not in SLR_Table[s.no].keys(): 167 | SLR_Table[s.no][term]={'r'+str(getprodno(item))} 168 | else: SLR_Table[s.no][term] |= {'r'+str(getprodno(item))} 169 | continue 170 | 171 | nextsym=nextsym[0] 172 | t=goto(s.closure, nextsym) 173 | if t != []: 174 | if nextsym in t_list: 175 | if nextsym not in SLR_Table[s.no].keys(): 176 | SLR_Table[s.no][nextsym]={'s'+str(getstateno(t))} 177 | else: SLR_Table[s.no][nextsym] |= {'s'+str(getstateno(t))} 178 | 179 | else: SLR_Table[s.no][nextsym] = str(getstateno(t)) 180 | 181 | return SLR_Table 182 | 183 | def augment_grammar(): 184 | 185 | for i in range(ord('Z'), ord('A')-1, -1): 186 | if chr(i) not in nt_list: 187 | start_prod=production_list[0] 188 | production_list.insert(0, chr(i)+'->'+start_prod.split('->')[0]) 189 | return 190 | 191 | def main(): 192 | 193 | global production_list, ntl, nt_list, tl, t_list 194 | 195 | firstfollow.main() 196 | 197 | print("\tFIRST AND FOLLOW OF NON-TERMINALS") 198 | for nt in ntl: 199 | firstfollow.compute_first(nt) 200 | firstfollow.compute_follow(nt) 201 | print(nt) 202 | print("\tFirst:\t", firstfollow.get_first(nt)) 203 | print("\tFollow:\t", firstfollow.get_follow(nt), "\n") 204 | 205 | 206 | augment_grammar() 207 | nt_list=list(ntl.keys()) 208 | t_list=list(tl.keys()) + ['$'] 209 | 210 | print(nt_list) 211 | print(t_list) 212 | 213 | j=calc_states() 214 | 215 | ctr=0 216 | for s in j: 217 | print("Item{}:".format(ctr)) 218 | for i in s: 219 | print("\t", i) 220 | ctr+=1 221 | 222 | table=make_table(j) 223 | print('_____________________________________________________________________') 224 | print("\n\tCLR(1) TABLE\n") 225 | sym_list = nt_list + t_list 226 | sr, rr=0, 0 227 | print('_____________________________________________________________________') 228 | print('\t| ','\t| '.join(sym_list),'\t\t|') 229 | print('_____________________________________________________________________') 230 | for i, j in table.items(): 231 | 232 | print(i, "\t| ", '\t| '.join(list(j.get(sym,' ') if type(j.get(sym))in (str , None) else next(iter(j.get(sym,' '))) for sym in sym_list)),'\t\t|') 233 | s, r=0, 0 234 | 235 | for p in j.values(): 236 | if p!='accept' and len(p)>1: 237 | p=list(p) 238 | if('r' in p[0]): r+=1 239 | else: s+=1 240 | if('r' in p[1]): r+=1 241 | else: s+=1 242 | if r>0 and s>0: sr+=1 243 | elif r>0: rr+=1 244 | print('_____________________________________________________________________') 245 | print("\n", sr, "s/r conflicts |", rr, "r/r conflicts") 246 | print('_____________________________________________________________________') 247 | print("Enter the string to be parsed") 248 | Input=input()+'$' 249 | try: 250 | stack=['0'] 251 | a=list(table.items()) 252 | '''print(a[int(stack[-1])][1][Input[0]]) 253 | b=list(a[int(stack[-1])][1][Input[0]]) 254 | print(b[0][0]) 255 | print(a[0][1]["S"])''' 256 | print("productions\t:",production_list) 257 | print('stack',"\t \t\t \t",'Input') 258 | print(*stack,"\t \t\t \t",*Input,sep="") 259 | while(len(Input)!=0): 260 | b=list(a[int(stack[-1])][1][Input[0]]) 261 | if(b[0][0]=="s" ): 262 | #s=Input[0]+b[0][1:] 263 | stack.append(Input[0]) 264 | stack.append(b[0][1:]) 265 | Input=Input[1:] 266 | print(*stack,"\t \t\t \t",*Input,sep="") 267 | elif(b[0][0]=="r" ): 268 | s=int(b[0][1:]) 269 | #print(len(production_list),s) 270 | l=len(production_list[s])-3 271 | #print(l) 272 | prod=production_list[s] 273 | l*=2 274 | l=len(stack)-l 275 | stack=stack[:l] 276 | s=a[int(stack[-1])][1][prod[0]] 277 | #print(s,b) 278 | stack+=list(prod[0]) 279 | stack.append(s) 280 | print(*stack,"\t \t\t \t",*Input,sep="") 281 | elif(b[0][0]=="a"): 282 | print("\n\tString Accepted\n") 283 | break 284 | except: 285 | print('\n\tString INCORRECT for given Grammar!\n') 286 | return 287 | 288 | if __name__=="__main__": 289 | main() 290 | 291 | 292 | 293 | 294 | 295 | -------------------------------------------------------------------------------- /firstFollow.java: -------------------------------------------------------------------------------- 1 | import java.util.regex.*; 2 | import java.util.*; 3 | 4 | /* 5 | Sample input--> 6 | S->aA 7 | A->aA 8 | A->b 9 | end 10 | */ 11 | //Wherever there is need of char use Character instead 12 | //Symbol everywhere is a char, never a string 13 | //------------------------------------------------------ 14 | public class firstFollow { 15 | static LinkedHashMap terminal_list = new LinkedHashMap<>(); 16 | static LinkedHashMap nonterminal_list = new LinkedHashMap<>(); 17 | static List productions = new ArrayList(); 18 | public static void main(String[] args) { 19 | Scanner sc = new Scanner(System.in); 20 | String input; 21 | while(true) { 22 | input = sc.nextLine(); 23 | input = input.replace(" ", ""); 24 | System.out.println(input); 25 | if(input.toLowerCase().equals("end") || input.toLowerCase().equals(""))break; 26 | productions.add(input); 27 | } 28 | //When calling wrappedMAin anywhere scan productions into 'productions' first 29 | wrappedMain(); 30 | for(nonTerminal x : nonterminal_list.values()) { 31 | printFirstFollow(x); 32 | } 33 | } 34 | //-------------------------FIRST FUNCTION -------------------------------// 35 | public static Set compute_first(Character symbol) { 36 | if(terminal_list.keySet().contains(symbol)) 37 | return new HashSet<>(Arrays.asList(symbol)); 38 | for(String x : productions) { 39 | String[] prod = x.split("->",2); 40 | Character head = prod[0].charAt(0); 41 | String body = prod[1]; 42 | 43 | if(head!=symbol) continue; 44 | if(body=="" || body=="^"){ 45 | nonterminal_list.get(symbol).add_first(new Character[]{(char)94}); 46 | //If S-> ^ or ''(blank) then first of S will contain ^ (char)94 ! 47 | continue; 48 | } 49 | for(int i=0;i nxt = compute_first(body.charAt(i)); 52 | boolean flag = nxt.remove('^'); 53 | nonterminal_list.get(symbol).add_first(nxt.toArray(new Character[nxt.size()])); 54 | 55 | if(!flag){ break;} 56 | if(i==body.length()-1) 57 | nonterminal_list.get(symbol).add_first(new Character[]{(char)94}); 58 | } 59 | } 60 | return nonterminal_list.get(symbol).first; 61 | } 62 | //-------------------------FOllOW FUNCTION -------------------------------// 63 | public static void compute_follow(Character symbol) { 64 | if(symbol==nonterminal_list.keySet().toArray()[0]) 65 | //if symbol is the first character ie everything is derived from symbol then 66 | nonterminal_list.get(symbol).add_follow(new Character[] {'$'}); 67 | for(String x : productions) { 68 | String[] prod = x.split("->",2); 69 | Character head = prod[0].charAt(0); 70 | String body = prod[1]; 71 | for(int i=0;i nxt = compute_first(body.charAt(i+1)); 76 | nxt.remove('^'); 77 | nonterminal_list.get(symbol).add_follow(nxt.toArray(new Character[nxt.size()])); 78 | } 79 | if(i==body.length()-1 || compute_first(body.charAt(i+1)).contains('^') && B!=head) { 80 | Set nxt = get_follow(head); 81 | nonterminal_list.get(symbol).add_follow(nxt.toArray(new Character[nxt.size()])); 82 | } 83 | } 84 | } 85 | } 86 | public static Set get_follow(Character symbol){ 87 | if(terminal_list.containsKey(symbol)) 88 | return null; 89 | return nonterminal_list.get(symbol).follow; 90 | } 91 | //-------------------------MAIN FUNCTION -------------------------------// 92 | public static void wrappedMain() { 93 | Pattern term =Pattern.compile("[a-z ^]"); 94 | Pattern nonterm =Pattern.compile("[A-Z]"); 95 | for(int i=0;i",2); 102 | Character head = prod[0].charAt(0); 103 | String body = prod[1]; 104 | if(!nonterminal_list.containsKey(head)) 105 | nonterminal_list.put(head, new nonTerminal(head)); 106 | Matcher tm = term.matcher(body); 107 | //for all terminals in the body of the production 108 | while(tm.find()) { 109 | Character s = body.charAt(tm.start()); 110 | if(!terminal_list.containsKey(s)) 111 | terminal_list.put(s,new terminal(s)); 112 | } 113 | //for all non-terminals in the body of the production 114 | Matcher ntm = nonterm.matcher(body); 115 | while(ntm.find()) { 116 | Character s = body.charAt(ntm.start()); 117 | if(!terminal_list.containsKey(s)) 118 | nonterminal_list.put(s,new nonTerminal(s)); 119 | } 120 | 121 | } 122 | } 123 | //-------------------------PRINT FUNCTION -------------------------------// 124 | /* 125 | ---DONT USE THIS FUNCTION--- 126 | */ 127 | public static void printFirstFollow(nonTerminal nt) { 128 | 129 | compute_first(nt.symbol); 130 | compute_follow(nt.symbol); 131 | get_follow(nt.symbol); 132 | System.out.print("First is of "+nt.symbol+" is :"); 133 | nt.first.forEach(System.out::print); 134 | System.out.println(); 135 | System.out.print("Follow is of "+nt.symbol+" is :"); 136 | nt.follow.forEach(System.out::print); 137 | System.out.println(); 138 | } 139 | } 140 | //------------------------------------------------------ 141 | class terminal{ 142 | public char symbol; 143 | public terminal(char sym) { //constructor 144 | this.symbol=sym; 145 | } 146 | } 147 | //------------------------------------------------------ 148 | 149 | class nonTerminal{ 150 | public char symbol; 151 | Set first = new HashSet(); 152 | Set follow = new HashSet(); 153 | 154 | public nonTerminal(char sym ) { 155 | this.symbol= sym; 156 | } 157 | 158 | public void add_first(Character[] symbols) { 159 | List symbols_list = Arrays.asList(symbols ); 160 | first.addAll(new HashSet(symbols_list)); 161 | } 162 | public void add_follow(Character[] symbols) { 163 | List symbols_list = Arrays.asList(symbols); 164 | follow.addAll(new HashSet(symbols_list)); 165 | } 166 | 167 | } -------------------------------------------------------------------------------- /firstfollow.py: -------------------------------------------------------------------------------- 1 | from re import * 2 | from collections import OrderedDict 3 | 4 | t_list=OrderedDict() 5 | nt_list=OrderedDict() 6 | production_list=[] 7 | 8 | # ------------------------------------------------------------------ 9 | 10 | class Terminal: 11 | 12 | def __init__(self, symbol): 13 | self.symbol=symbol 14 | 15 | def __str__(self): 16 | return self.symbol 17 | 18 | # ------------------------------------------------------------------ 19 | 20 | class NonTerminal: 21 | 22 | def __init__(self, symbol): 23 | self.symbol=symbol 24 | self.first=set() 25 | self.follow=set() 26 | 27 | def __str__(self): 28 | return self.symbol 29 | 30 | def add_first(self, symbols): self.first |= set(symbols) #union operation 31 | 32 | def add_follow(self, symbols): self.follow |= set(symbols) 33 | 34 | # ------------------------------------------------------------------ 35 | 36 | def compute_first(symbol): #chr(1013) corresponds (ϵ) in Unicode 37 | 38 | global production_list, nt_list, t_list 39 | 40 | # if X is a terminal then first(X) = X 41 | if symbol in t_list: 42 | return set(symbol) 43 | 44 | for prod in production_list: 45 | head, body=prod.split('->') 46 | 47 | if head!=symbol: continue 48 | 49 | # if X -> is a production, then first(X) = epsilon 50 | if body=='': 51 | nt_list[symbol].add_first(chr(1013)) 52 | continue 53 | 54 | 55 | 56 | for i, Y in enumerate(body): 57 | # for X -> Y1 Y2 ... Yn, first(X) = non-epsilon symbols in first(Y1) 58 | # if first(Y1) contains epsilon, 59 | # first(X) = non-epsilon symbols in first(Y2) 60 | # if first(Y2) contains epsilon 61 | # ... 62 | if body[i]==symbol: continue 63 | t=compute_first(Y) 64 | nt_list[symbol].add_first(t-set(chr(1013))) 65 | if chr(1013) not in t: 66 | break 67 | # for i=1 to n, if Yi contains epsilon, then first(X)=epsilon 68 | if i==len(body)-1: 69 | nt_list[symbol].add_first(chr(1013)) 70 | 71 | return nt_list[symbol].first 72 | 73 | # ------------------------------------------------------------------ 74 | 75 | def get_first(symbol): #wrapper method for compute_first 76 | 77 | return compute_first(symbol) 78 | 79 | # ------------------------------------------------------------------ 80 | 81 | def compute_follow(symbol): 82 | 83 | global production_list, nt_list, t_list 84 | 85 | # if A is the start symbol, follow (A) = $ 86 | if symbol == list(nt_list.keys())[0]: #this is okay since I'm using an OrderedDict 87 | nt_list[symbol].add_follow('$') 88 | 89 | for prod in production_list: 90 | head, body=prod.split('->') 91 | 92 | for i, B in enumerate(body): 93 | if B != symbol: continue 94 | 95 | # for A -> aBb, follow(B) = non-epsilon symbols in first(b) 96 | if i != len(body)-1: 97 | nt_list[symbol].add_follow(get_first(body[i+1]) - set(chr(1013))) 98 | 99 | # if A -> aBb where first(b) contains epsilon, or A -> aB then follow(B) = follow (A) 100 | if i == len(body)-1 or chr(1013) in get_first(body[i+1]) and B != head: 101 | nt_list[symbol].add_follow(get_follow(head)) 102 | 103 | # ------------------------------------------------------------------ 104 | 105 | def get_follow(symbol): 106 | 107 | global nt_list, t_list 108 | 109 | if symbol in t_list.keys(): 110 | return None 111 | 112 | return nt_list[symbol].follow 113 | 114 | # ------------------------------------------------------------------ 115 | 116 | def main(pl=None): 117 | 118 | print('''Enter the grammar productions (enter 'end' or return to stop) 119 | #(Format: "A->Y1Y2..Yn" {Yi - single char} OR "A->" {epsilon})''') 120 | 121 | global production_list, t_list, nt_list 122 | ctr=1 123 | 124 | #t_regex, nt_regex=r'[a-z\W]', r'[A-Z]' 125 | 126 | if pl==None: 127 | 128 | while True: 129 | 130 | #production_list.append(input('{})\t'.format(ctr))) 131 | 132 | production_list.append(input().replace(' ', '')) 133 | 134 | if production_list[-1].lower() in ['end', '']: 135 | del production_list[-1] 136 | break 137 | 138 | head, body=production_list[ctr-1].split('->') 139 | 140 | if head not in nt_list.keys(): 141 | nt_list[head]=NonTerminal(head) 142 | 143 | #for all terminals in the body of the production 144 | for i in body: 145 | if not 65<=ord(i)<=90: 146 | if i not in t_list.keys(): t_list[i]=Terminal(i) 147 | #for all non-terminals in the body of the production 148 | elif i not in nt_list.keys(): nt_list[i]=NonTerminal(i) 149 | 150 | ctr+=1 151 | 152 | '''if pl!=None: 153 | 154 | for i, prod in enumerate(pl): 155 | 156 | if prod.lower() in ['end', '']: 157 | del pl[i:] 158 | break 159 | 160 | head, body=prod.split('->') 161 | 162 | if head not in nt_list.keys(): 163 | nt_list[head]=NonTerminal(head) 164 | 165 | #for all terminals in the body of the production 166 | for i in finditer(t_regex, body): 167 | s=i.group() 168 | if s not in t_list.keys(): t_list[s]=Terminal(s) 169 | 170 | #for all non-terminals in the body of the production 171 | for i in finditer(nt_regex, body): 172 | s=i.group() 173 | if s not in nt_list.keys(): nt_list[s]=NonTerminal(s)''' 174 | 175 | return pl 176 | # ------------------------------------------------------------------ 177 | 178 | if __name__=='__main__': 179 | 180 | main() 181 | 182 | --------------------------------------------------------------------------------