├── input.txt
├── data.txt
├── items.txt
├── rules.txt
├── README.md
└── eclat.py


/input.txt:
--------------------------------------------------------------------------------
1 | 5 a b c d e
2 | 5 f g c h i
3 | 5 j g c h k
4 | 5 j l c d m
5 | 5 n l c d m
6 | 4 o g c h
7 | 5 o p q d k
8 | 


--------------------------------------------------------------------------------
/data.txt:
--------------------------------------------------------------------------------
1 | 5 1 2 3 4 5
2 | 5 6 7 3 8 9
3 | 5 10 7 3 8 11
4 | 5 10 12 3 4 13
5 | 5 14 12 3 4 13
6 | 4 15 7 3 8
7 | 5 15 16 17 4 11
8 | 


--------------------------------------------------------------------------------
/items.txt:
--------------------------------------------------------------------------------
 1 |  ['4', '5'] : 4 
 2 |  ['3', '5'] : 5 
 3 |  ['4'] : 5 
 4 |  ['4', '3'] : 4 
 5 |  ['5'] : 6 
 6 |  ['4', '3', '5'] : 3 
 7 |  ['3'] : 6 
 8 |  ['7'] : 3 
 9 |  ['7', '3'] : 3 
10 | 


--------------------------------------------------------------------------------
/rules.txt:
--------------------------------------------------------------------------------
1 | Rule: ['3'] ==> ['5'] : 5 : 0.8333 
2 | Rule: ['4'] ==> ['3'] : 4 : 0.8 
3 | Rule: ['4'] ==> ['5'] : 4 : 0.8 
4 | Rule: ['4', '3'] ==> ['5'] : 3 : 0.75 
5 | Rule: ['7'] ==> ['3'] : 3 : 1.0 
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Eclat-Python-Implementation
 2 | Here is a fast implementation of Pattern mining algorithm for finding frequent itemsets in a transactional database named Eclat
 3 |  Moreover, extraction association rules has been implemented. 
 4 | 
 5 | The example of running the Algorithms is :
 6 | 
 7 | python3 eclat.py input.txt support confidence output_FreqItems Output_Rules.txt 
 8 | 
 9 | Example: python3 eclat.py input.txt 3 0.4 items.txt rules.txt
10 | 
11 | Requirements:
12 | Python >=3.n
13 | 
14 | If you provide your input file, you should take care about the delimiter. You could change it in the code at read_Data function call.
15 | 
16 | 


--------------------------------------------------------------------------------
/eclat.py:
--------------------------------------------------------------------------------
  1 | import numpy as np, itertools
  2 | import pandas as pd
  3 | np.random.seed(1)
  4 | kot = 0
  5 | FreqItems = dict()
  6 | support = dict()
  7 | 
  8 | 
  9 | def eclat(prefix, items, dict_id):
 10 |     while items:
 11 |         i,itids = items.pop()
 12 |         isupp = len(itids)
 13 |         if isupp >= minsup:
 14 | 
 15 |             FreqItems[frozenset(prefix + [i])] = isupp
 16 |             suffix = []
 17 |             for j, ojtids in items:
 18 |                 jtids = itids & ojtids
 19 |                 if len(jtids) >= minsup:
 20 |                     suffix.append((j,jtids))
 21 | 
 22 |             dict_id += 1
 23 |             eclat(prefix+[i], sorted(suffix, key=lambda item: len(item[1]), reverse=True), dict_id)
 24 | 
 25 | def rules(FreqItems, confidence):
 26 |     Rules = []
 27 |     cnt = 0
 28 | 
 29 |     for items, support in FreqItems.items():
 30 |         if (len(items) > 1):
 31 |             all_perms = list(itertools.permutations(items, len(items)))
 32 |             for lst in all_perms:
 33 |                 antecedent = lst[:len(lst) - 1]
 34 |                 consequent = lst[-1:]
 35 | 
 36 |                 conf = float(FreqItems[frozenset(items)]/FreqItems[frozenset(antecedent)]*100)
 37 |                 if (conf >= confidence):
 38 |                     cnt += 1
 39 |                     lift = float(conf/FreqItems[frozenset(consequent)])
 40 |                     if lift >= 1:
 41 |                         Rules.append((antecedent, consequent, support, conf, lift))
 42 | 
 43 | 
 44 |     print('Found %d Rules ' % (cnt))
 45 |     return Rules
 46 | 
 47 | 
 48 | def getantecendent(FreqItems, confidence):
 49 |     ant = []
 50 |     cnt = 0
 51 | 
 52 |     for items, support in FreqItems.items():
 53 |         if(len(items) > 1):
 54 |             all_perms = list(itertools.permutations(items, len(items)))
 55 |             for lst in all_perms:
 56 |                 antecedent = lst[:len(lst) - 1]
 57 |                 consequent = lst[-1:]
 58 | 
 59 |                 conf = float(FreqItems[frozenset(items)]/FreqItems[frozenset(antecedent)]*100)
 60 |                 if (conf >= confidence):
 61 |                     cnt += 1
 62 |                     lift = float(conf/FreqItems[frozenset(consequent)])
 63 |                     if lift >= 1:
 64 |                         ant.append((antecedent))
 65 | 
 66 |     print('Print %d attributes' % (cnt))
 67 |     return ant
 68 | 
 69 | def print_Frequent_Itemsets(output_FreqItems, FreqItems):
 70 |     file = open(output_FreqItems, 'w+')
 71 |     for item, support in FreqItems.items():
 72 |         file.write(" {} : {} \n".format(list(item), round(support,4)))
 73 | 
 74 | def print_Rules(output_Rules, Rules):
 75 |     file = open(output_Rules, 'w+')
 76 |     for a, b,supp, conf, lift in sorted(Rules):
 77 |         file.write("{} ==> {} support: {} confidence: {} \n".format((a), (b), round(supp, 4),round(conf, 4),round(lift, 4)))
 78 |     file.close()
 79 |     
 80 | def print_Antecendent(ant):
 81 |     file = open('output_antecendent.csv', 'w+')
 82 |     for a in sorted(ant):
 83 |         file.write("[] \n".format((a)))
 84 |     file.close()
 85 |     
 86 | def Read_Data(filename, delimiter=','):
 87 |     data = {}
 88 |     trans = 0
 89 |     f = open(filename, 'r', encoding="utf8")
 90 |     for row in f:
 91 |         trans += 1
 92 |         for item in row.split(delimiter):
 93 |             if item not in data:
 94 |                 data[item] = set()
 95 |             data[item].add(trans)
 96 |     f.close()
 97 |     return data
 98 | 
 99 | if __name__ == "__main__":
100 |     minsup   = 10
101 |     confidence = 75
102 |     output_FreqItems = 'output_freqitems.csv'
103 |     output_Rules = 'output_rule.csv'
104 |     dict_id = 0
105 |     data = Read_Data('input.txt', ',') #change the delimiter based on your input file
106 |     data.pop("\n",None)
107 |     data.pop("",None)
108 |     print('finished reading data..... \n Starting mining .....')
109 |     eclat([], sorted(data.items(), key=lambda item: len(item[1]), reverse=True), dict_id)
110 |     print('found %d Frequent items' % len(FreqItems))
111 |     Rules = rules(FreqItems, confidence)
112 |     print('Writing Rules .....')
113 | 
114 | 
115 | 
116 |     print_Frequent_Itemsets(output_FreqItems, FreqItems)
117 |     print_Rules(output_Rules, Rules)
118 |     Antecendent = getantecendent(FreqItems, confidence)
119 |     print_Antecendent(Antecendent)
120 |     
121 |     Ant1d = np.hstack(Antecendent)
122 |     
123 |     count = np.array(Ant1d)
124 |     unique, counts = np.unique(count, return_counts=True)
125 |     dict(zip(unique, counts))
126 |     counted = np.stack((unique, counts), axis=1)
127 |     appendFile = open('candidate.csv','w')
128 |     for i in range(0,len(counted)):
129 |         appendFile.write(str(unique[i])+";"+str(counts[i])+","+"\n")
130 |     appendFile.close()
131 |     
132 |     df = pd.DataFrame(counted, columns=['word','counter'])
133 |     df["counter"] = pd.to_numeric(df["counter"])
134 |     sortcounted = df.sort_values(["counter"], axis=0, 
135 |                      ascending=[False]) 
136 |     elimcounted = sortcounted.drop(sortcounted[sortcounted['counter']<2].index)
137 |     
138 |     listfrequent = list(elimcounted.iloc[:, 0].values)
139 |     
140 | 
141 | 


--------------------------------------------------------------------------------