├── .gitignore
├── LICENSE.txt
├── README.md
├── dataset
├── data.csv
├── data2.csv
├── data3.csv
├── data4.csv
├── data5.csv
├── data6.csv
├── data7.csv
├── kaggle.csv
├── tesco.csv
└── tesco2.csv
├── fpgrowth.PNG
├── fpgrowth_py
├── __init__.py
├── fpgrowth.py
└── utils.py
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | build
3 | apriori_py.egg-info
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 | Copyright (c) 2020 Chonyy
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 | The above copyright notice and this permission notice shall be included in all
10 | copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | ## How to use
15 |
16 | ### Install the Pypi package using pip
17 |
18 | ```
19 | pip install fpgrowth_py
20 | ```
21 |
22 | Then use it like
23 |
24 | ```python
25 | from fpgrowth_py import fpgrowth
26 | itemSetList = [['eggs', 'bacon', 'soup'],
27 | ['eggs', 'bacon', 'apple'],
28 | ['soup', 'bacon', 'banana']]
29 | freqItemSet, rules = fpgrowth(itemSetList, minSupRatio=0.5, minConf=0.5)
30 | print(freqItemSet)
31 | print(rules)
32 | # [[{'beer'}, {'rice'}, 0.6666666666666666], [{'rice'}, {'beer'}, 1.0]]
33 | # rules[0] --> rules[1], confidence = rules[2]
34 | ```
35 |
36 | ### Clone the repo
37 |
38 | Get a copy of this repo using git clone
39 | ```
40 | git clone https://github.com/chonyy/fpgrowth_py.git
41 | ```
42 |
43 | Run the program with dataset provided and **default** values for *minSupport* = 0.5 and *minConfidence* = 0.5
44 |
45 | ```
46 | python fpgrowth.py -f dataset.csv
47 | ```
48 |
49 | Run program with dataset and min support and min confidence
50 |
51 | ```
52 | python fpgrowth.py -f tesco2.csv -s 0.5 -c 0.5
53 | ```
54 |
--------------------------------------------------------------------------------
/dataset/data.csv:
--------------------------------------------------------------------------------
1 | 8585,948,1125,1289,1544,1651
2 | 656,783,1017,1273,1318,1429,1516
3 | 112,1034
4 | 647,704,929,1040,1094,1182,1194,1214,1250
5 | 11,89,870,1094,1605,1940
--------------------------------------------------------------------------------
/dataset/data2.csv:
--------------------------------------------------------------------------------
1 | 1040,2372,2872,3159,4750
2 | 1040,1280,2495,2872,3186,4750,4995
3 | 1040,2872,4750
4 | 1280,2872,3186,4995
5 | 1040,1280,2872,3186,4995
6 | 1280,3186,4995
7 | 1040,1280,2872,3186,4750,4995
8 | 1040,1280,2495,2872,3186,4750,4995
9 | 1040,1280,2495,2872,3186,4750,4995
10 | 1040,2872,3186,4750,4995
11 | 1040,2495,4750
12 | 1040,1280,2872,3186,4750,4995
13 | 1040,1280,2872,3186,4750,4995
14 | 1040,1280,2872,4750,4995
15 | 1040,2372,2495,3159,4750
16 | 1280,2872,3186,4995
17 | 1040,4750
18 | 1280,2872,3186,4995
19 | 1040,1280,2495,2872,4750
20 | 1040,1280,2372,2872,3186,4995
21 | 1280,2872,3186,4750,4995
22 | 1040,2872,4750
23 | 1040,2872,4750
24 | 1280,2872,3186,4750,4995
25 | 1040,2495,4750
26 | 1040,1280,2872,3186,4750,4995
27 | 2372,3159
28 | 1040,1280,2872,3186,4750,4995
29 | 1040,1280,2372,2495,2872,3159,3186,4750,4995
30 | 1040,2495,2872,4750
31 | 1280,2495,2872,3186,4995
32 | 1040,4750
33 | 1040,2872,3186,4750,4995
34 | 1040,1280,2872,4750,4995
35 | 1280,2872,3186,4750,4995
36 | 1040,1280,2495,2872,3186,4750,4995
37 | 1280,2872,3186,4995
38 | 1040,1280,2872,3186,4750,4995
39 | 1280,2872,3186,4995
40 | 1040,2495,2872,4750
41 | 1040,1280,2495,2872,3186,4750,4995
42 | 1040,1280,2495,2872,3186,4750,4995
43 | 1040,1280,2495,2872,4750,4995
44 | 1040,1280,3186,4750,4995
45 | 1040,1280,2372,2495,2872,3186,4750,4995
46 | 1280,2872,3186,4995
47 | 1040,2872,4750
48 | 1040,1280,2495,2872,3186,4750,4995
49 | 1040,1280,2872,4750
50 | 1040,2495,2872,4750
51 | 1280,2495,2872,3186,4995
52 | 1040,1280,2495,2872,4750,4995
53 | 1280,2872,3186,4995
54 | 1040,2495,4750
55 | 1040,2372,2872,3159,4750
56 | 1280,2872,3186,4995
57 | 1280,2495,3186,4995
58 | 1040,2872,4750
59 | 1040,2872,4750
60 | 1040,1280,2495,2872,3186,4750,4995
61 | 2495,2872,4750
62 | 1040,2372,2872,3159,4750
63 | 1040,2495,2872,4750
64 | 1280,2872,3186,4995
65 | 1280,2495,2872,3186,4995
66 | 2495,2872
67 | 1280,2872,4995
68 | 2872,3186,4995
69 | 1040,2372,4750
70 | 1280,2872,3186,4995
71 | 1040,2372,3159,3186,4750
72 | 1280,2872,3186,4995
73 | 1040,2372,2872,4750
74 | 1040,2495,2872,4750
75 | 1280,2872,3186,4995
76 | 1040,2495,4750
77 | 2495
78 | 1040,1280,2495,2872,4750,4995
79 | 1040,2872,4750
80 | 1040,2495,4750
81 | 1040,4750
82 | 2872,3186,4995
83 | 1040,2872,4750
84 | 1280,2872,3186,4995
85 | 1040,1280,2872,3186,4750,4995
86 | 2495,2872
87 | 1280,2872,3186
88 | 1280,2872,3186,4995
89 | 1040,2372,2872,3159
90 | 1040,1280,2495,2872,3186,4750
91 | 1040,1280,2872,3186,4750,4995
92 | 1040,1280,2495,2872,3186,4750,4995
93 | 1040,1280,2372,2872,3159,3186,4750,4995
94 | 1280,2495,2872,3186,4995
95 | 1040,1280,2872,4750,4995
96 | 1040,2372,2495,3159,4750
97 | 1280,2872,3186,4995
98 | 1040,1280,2495,2872,3159,3186,4750,4995
99 | 1040,2495,2872,4750,4995
100 | 1040,1280,2872,3186,4750,4995
101 | 1040,1280,2872,3186,4750
102 | 1040,4750
103 | 1040,1280,2495,2872,3186,4750,4995
104 | 1040,1280,2495,2872,3186,4750
105 | 1040,2495,2872,3186,4750
106 | 1040,1280,2872,3186,4750,4995
107 | 1040,1280,2872,3186,4995
108 | 1040,1280,2872,3186,4750
109 | 1040,1280,2872,4750,4995
110 | 1040,4750
111 | 1280,3186,4995
112 | 1040,1280,2872,3159,3186,4750,4995
113 | 1040,1280,2872,3186,4750,4995
114 | 1040,1280,2495,2872,3186,4750,4995
115 | 1040,1280,2372,2495,2872,3159,3186,4750
116 | 1040,1280,2872,3186,4750,4995
117 | 1040,4750
118 | 1040,1280,2495,2872,4750,4995
119 | 1280,2872,3186
120 | 1040,1280,2872,3186,4750,4995
121 | 1040,1280,2872,3186,4750,4995
122 | 1040,2872,4750
123 | 1040,1280,2872,3186,4750,4995
124 | 1040,1280,2495,2872,3186,4750
125 | 1280,2872,3186,4995
126 | 1040,1280,2872,3186,4750,4995
127 | 1280,2872,3186,4750,4995
128 | 1040,1280,2872,3186,4750,4995
129 | 1040,1280,2495,2872,3186,4750
130 | 1040,1280,3186,4750,4995
131 | 1040,1280,2872,3186,4750,4995
132 | 1040,1280,2872,4750,4995
133 | 1280,2872,3186
134 | 1040,2495,2872,4750
135 | 1280,2872,3186,4995
136 | 1040,1280,2495,2872,3186,4750,4995
137 | 1040,2872,4750
138 | 4995
139 | 1040,4750
140 | 1040,1280,2872,3186,4750
141 | 1040,1280,2872,3186,4750,4995
142 | 1280,2872,3186
143 | 1280,2872,3186,4995
144 | 1040,2372,2872,3159,3186,4750,4995
145 | 1040,1280,2872,3186,4750
146 | 1040,2495,2872,4750
147 | 1040,1280,2872,3186,4750,4995
148 | 1040,2495,4750,4995
149 | 1040,1280,2872,3186,4750
150 | 1040,1280,2872,3186,4750,4995
151 | 1040,2495,2872,4750
152 | 1040,2372,2872,3159,4750
153 | 1280,2872,3186,4995
154 | 1040,1280,2872,3186,4750,4995
155 | 1280,2872
156 | 1040,1280,2495,2872,3186,4750,4995
157 | 1040,1280,3186,4750,4995
158 | 1040,2372,2495,3159,4750
159 | 1040,2495,2872,4750
160 | 1280,2872,3159,3186,4995
161 | 1280,2872,3186,4995
162 | 1040,1280,2495,2872,3186,4750
163 | 1040,2495,2872,4750
164 | 1040,1280,2495,2872,3186,4750,4995
165 | 1040,2872,3186,4750,4995
166 | 1280,2872,3186
167 | 1040,1280,2495,2872,3186,4995
168 | 1040,2872,4750
169 | 1040,1280,2372,2495,2872,3159,3186,4750,4995
170 | 1040,1280,2872,3186,4750,4995
171 | 1040,2495,2872,4750
172 | 1040,1280,2372,2495,2872,3159,3186,4750,4995
173 | 1040,1280,2495,2872,3186,4995
174 | 1040,1280,2372,2495,2872,3186,4750,4995
175 | 1280,3186,4995
176 | 1040,2495,2872,4750
177 | 1040,1280,2495,2872,4750
178 | 1040,4750
179 | 1040,1280,2872,3186,4750,4995
180 | 1040,1280,2872,3186,4750
181 | 1040,1280,2872,4750,4995
182 | 1040,1280,2372,2872,3159,3186,4750,4995
183 | 1040,1280,2872,3186,4750,4995
184 | 1040,2495,2872,4750
185 | 2872
186 | 1040,1280,2495,2872,3186,4750,4995
187 | 1280,2872,3186,4750,4995
188 | 1040,1280,3186,4750,4995
189 | 1040,1280,2495,2872,3186,4995
190 | 1040,1280,2495,2872
191 | 1040,2872
192 | 1040,1280,2495,2872,3186,4750,4995
193 | 1040,1280,2872,3186,4750,4995
194 | 1040,1280,2872,3186,4750
195 | 1040,2372,2495,2872,4750
196 | 1040,1280,2495,2872,3186,4750,4995
197 | 1040,1280,2495,2872,4750,4995
198 | 1040,2495,2872,4750
199 | 1040,1280,2495,2872,3186,4750,4995
200 | 1280,2872,3186,4995
201 | 1040,1280,2872,3186,4750,4995
202 | 1040,1280,2495,2872,3186,4750,4995
203 | 4750
204 | 1280,2872,3186,4995
205 | 1040,2495,4750
206 | 1040,3159,4750
207 | 1040,1280,2872,4750,4995
208 | 1040,2495,2872,4750
209 | 1040,1280,2372,2495,2872,3159,3186,4750,4995
210 | 1040,2372,2495,2872,3159,4750
211 | 1040,1280,2372,2872,3159,3186,4750,4995
212 | 1040,1280,2372,2495,2872,3159,3186,4750,4995
213 | 1040,2372,2872,4750
214 | 1040,1280,2872,3186,4750,4995
215 | 1040,2872
216 | 1040,2872,4750
217 | 1280,2872,3186,4995
218 | 1040,1280,2372,2872,4750,4995
219 | 1040,1280,2495,3186,4750
220 | 1040,2872,4750
221 | 1280,2872,4995
222 | 1040,2495,2872,4750
223 | 1040,4750
224 | 1040,1280,2872,3186,4750,4995
225 | 1040,2372,3159,4750
226 | 1040,1280,2872,3186,4750,4995
227 | 1040,2495,2872,4750
228 | 1040,1280,2872,3186,4750,4995
229 | 1040,1280,2495,2872,4750,4995
230 | 1040,4750
231 | 1280,2872,3186,4995
232 | 1040,4750
233 | 1040,1280,2495,2872,3186,4750,4995
234 | 1040,1280,2872,3186,4750,4995
235 | 2872,3186
236 | 1280,2872,3186,4995
237 | 1040,1280,2872,3159,3186,4750,4995
238 | 1040,4750
239 | 1040,1280,2872,3186,4750,4995
240 | 1040,4750
241 | 1280,2872,3186,4995
242 | 1040,1280,2872,3186,4750
243 | 1040,2372,2495,3159,4750
244 | 1040,1280,2872,3186,4750,4995
245 | 1040,2495,4750
246 | 1280,2872,3186,4995
247 | 1040,1280,2372,2495,2872,3159,3186,4750,4995
248 | 1040,2495,2872,4750
249 | 1280,2495,2872,3186
250 | 1040,2372,2495,2872,3159,4750
251 | 1040,2495,2872,3186,4750
252 | 1040,2495,2872,4750
253 | 1040,1280,2872,3186,4750,4995
254 | 1040,1280,2872,3186,4750,4995
255 | 1280,2872,3186,4995
256 | 1280,2872,3186,4995
257 | 1040,2372,3159,4750
258 | 1040,1280,2872,3186,4750,4995
259 | 1040,4750
260 | 1280,3186,4995
261 | 1280,2872,3186,4995
262 | 1040,1280,2872,3186,4750,4995
263 | 1280,2872,3186,4995
264 | 1040,1280,2372,2495,2872,3159,3186,4750
265 | 1280,2872,3186,4995
266 | 1040,1280,2495,2872,4750,4995
267 | 1040,4750
268 | 1040,1280,2495,2872,3186,4750,4995
269 | 1280,2872,3186,4995
270 | 1040,1280,2372,2872,3159,3186,4750,4995
271 | 1040,2495,2872,4750,4995
272 | 1280,2872,3186,4995
273 | 1040,2872,4750
274 | 1040,1280
--------------------------------------------------------------------------------
/dataset/data3.csv:
--------------------------------------------------------------------------------
1 | 2076,4785,5793,6338,9484
2 | 2076,2564,5793,6395,9484,9994
3 | 5793,9484
4 | 2076,2564,5793,6395,9994
5 | 2076,9484
6 | 2564,5793,6395,9994
7 | 2076,2564,5793,6395,9484,9994
8 | 2076,2564,5016,5793,6395,9484,9994
9 | 2076,2564,5793,6395,9484,9994
10 | 2076,2564,9484,9994
11 | 2564,5793,6395,9994
12 | 2076,4785,5793,6338,9484
13 | 2076,5016,9484
14 | 2076,2564,5793,6395,9484,9994
15 | 2076,2564,5016,5793,6338,6395,9484,9994
16 | 2564,5793,6395,9994
17 | 2564,5793,6395,9994
18 | 5793,6338,9484
19 | 2076,2564,5016,5793,6395,9484,9994
20 | 2564,4785,5793,6338,6395,9484,9994
21 | 2076,5793,6338,9484,9994
22 | 2076,2564,5793,9484,9994
23 | 2076,9484
24 | 2076,2564,5793,6395,9484,9994
25 | 2076,5016,9484
26 | 2564,5793,6395,9484,9994
27 | 2076,5793,9484
28 | 2076,2564,5793,6395,9484,9994
29 | 2076,2564,5793,6395,9484,9994
30 | 2076,5793,6338,6395,9484
31 | 2564,5793,6395,9994
32 | 2076,5016,5793,9484
33 | 2076,5016,5793,9484
34 | 2076,5016,9484
35 | 2076,5793,6395,9484,9994
36 | 2076,2564,5016,5793,6395,9484,9994
37 | 2076,2564,5793,6395,9484,9994
38 | 2076,2564,5016,5793,6395,9484
39 | 2564,5793,6395,9994
40 | 2076,2564,5793,6395,9484
41 | 2076,4785,5016,5793,6338,9484
42 | 2564,5793,9994
43 | 2076,2564,5016,5793,6395,9484,9994
44 | 2076,2564,5793,6395,9484,9994
45 | 2076,2564,4785,5016,5793,6338,6395,9484,9994
46 | 2076,9484,9994
47 | 5793
48 | 2076,2564,4785,5016,5793,6338,6395,9484
49 | 5793,6395
50 | 2564,5793,9994
51 | 2076,2564,5793,6395,9484,9994
52 | 2076,4785,5793,6338,9484
53 | 2564,4785,5793,6338,6395,9994
54 | 2076,9484
55 | 2076,2564,5016,5793,6395,9484,9994
56 | 2564,5016,5793,6395
57 | 2076,9484
58 | 2076,9484
59 | 2076,2564,5793,6395,9484,9994
60 | 2076,2564,5016,5793,6395,9484,9994
61 | 2564,5793,6395,9994
62 | 2076,5016,6338,9484
63 | 2076,2564,6395,9484
64 | 5016,5793,9484
65 | 2076,5793,9484
66 | 2564,5793,6395,9484
67 | 2076,9484
68 | 2076,9484
69 | 2076,2564,5016,5793,6395,9484,9994
70 | 9484
71 | 2076,2564,4785,5016,5793
72 | 2564,5793,9484,9994
73 | 2076,4785,6338,9484
74 | 2076,5793,6395,9484,9994
75 | 2076,2564,6395
76 | 2076,9484
77 | 2076,9484
78 | 2076,4785,5016,5793,6338,9484
79 | 2076,9484
80 | 2076,5793,9484
81 | 2076,5016,5793,9484
82 | 2076,5016,9484
83 | 2564,5016,5793,6395,9994
84 | 2564,5793,6395,9994
85 | 2076,9484
86 | 2076,2564,5793,6395,9484,9994
87 | 2076,5016,5793,9484
88 | 2076,5016,9484
89 | 2076,9484
90 | 2076,2564,5793,6395,9484,9994
91 | 2076,9484
92 | 2076,2564,5016,5793,6395,9484,9994
93 | 2076,5793,6395,9484
94 | 2076,2564,5016,5793,6395,9484,9994
95 | 2076,2564,5016,5793,6395,9484,9994
96 | 2076,9484
97 | 2076,4785,5016,5793,6338,9484
98 | 2076,2564,5793,6395,9994
99 | 2076,2564,5793,6395,9994
100 | 2076,9484
101 | 2076,2564,5016,5793,6395,9484,9994
102 | 2076,2564,5793,6395,9484,9994
103 | 2076,2564,5793,6395,9484,9994
104 | 2564,5793,6395,9994
105 | 2564,5793,6395,9994
106 | 2076,5793,9484
107 | 2076,2564,5793,6395,9484,9994
108 | 2564,5016,5793,6395,9484,9994
109 | 2076,5793,6395,9484
110 | 2076,2564,5793,9484
111 | 5793,9484
112 | 2076,2564,4785,6338,6395,9484,9994
113 | 2076,5016,5793,9484,9994
114 | 2076,2564,5793,6395,9484
115 | 2564,6395,9994
116 | 2076,2564,5793,9484,9994
117 | 2076,2564,5793,6395,9484,9994
118 | 2076,4785,5016,5793,6338,9484
119 | 2076,2564,5793,6395,9484,9994
120 | 2076,2564,5793,6395,9484,9994
121 | 2564,6395,9994
122 | 2076,2564,5016,5793,6338,6395,9484,9994
123 | 2076,2564,5793,9484,9994
124 | 2076,2564,5016,5793,6395,9484
125 | 2076,5016,5793,9484
126 | 2564,5793,6395,9994
127 | 2076,2564,5793,6395,9484,9994
128 | 2564,5016,5793,6395,9994
129 | 2076,5793,9484
130 | 2076,2564,4785,5793,9484,9994
131 | 2076,2564,5016,5793,9484
132 | 2076,2564,4785,5016,5793,6338,6395,9484,9994
133 | 2076,4785,5793,6338,9484,9994
134 | 2564,5793,6395,9484,9994
135 | 2076,2564,5016,5793,6395,9484,9994
136 | 2076,5016,9484
137 | 2564,5793,6395
138 | 2076,5016,5793,6395,9484,9994
139 | 2564,5793,6395,9994
140 | 2076,6395,9484,9994
141 | 2076,2564,4785,5793,6395,9484,9994
142 | 2076,2564,5016,5793,9484
143 | 5793
144 | 2076,9484
145 | 2076,2564,5016,5793,6395,9484,9994
146 | 2076,2564,5016,5793,6395,9484,9994
147 | 2076,5016,5793,6395,9994
148 | 2076,2564,5793,6395
149 | 2076,2564,5016,5793,6395,9484,9994
150 | 2076,2564,5016,5793,6395,9484
151 | 2076,2564,5793,9484,9994
152 | 2076,5793,9484,9994
153 | 2076,2564,5793,6395,9484,9994
154 | 2076,2564,5016,5793,6395,9484,9994
155 | 2076,2564,5016,6395,9484
156 | 2564,5793,6395,9994
157 | 2076,2564,5793,6395,9484,9994
158 | 2076,5793,9484
159 | 2076,5016,5793,6395,9484,9994
160 | 2076,9484
161 | 2076,2564,4785,5793,6338,9484
162 | 2076,4785,5016,6338,9484
163 | 2076,2564,5793,6395,9484,9994
164 | 2076,4785,5016,6338,9484
165 | 2076,2564,5793,6395,9484,9994
166 | 2076,4785,6338,9484
167 | 2076,2564,5793,9484,9994
168 | 2076,5016,5793,9484
169 | 2076,5793,9484
170 | 2564,5793,6395,9994
171 | 2076,9484
172 | 2076,5793,6338,9484
173 | 2564,5793,6395,9994
174 | 2076,2564,4785,5016,5793,6338,6395,9484,9994
175 | 2076,2564,5793,9484
176 | 2076,2564,5793,6395,9484,9994
177 | 2076,2564,5016,5793,6395,9484,9994
178 | 2076,2564,5793,6395,9484,9994
179 | 2076,2564,5016,5793,6395,9484,9994
180 | 2564,5793,6395
181 | 2076,2564,4785,5793,6338,6395,9484
182 | 2076,2564,5793,6395,9484,9994
183 | 2076,2564,5793,6395,9484,9994
184 | 2076,2564,5016,5793,6395,9484,9994
185 | 2076,5016,5793,6395,9484
186 | 2076,2564,5793,6395,9484,9994
187 | 2076,2564,5016,5793,6395,9484,9994
188 | 2076,4785,5016,6338,9484,9994
189 | 4785,6338
190 | 2564,5793,6395,9994
191 | 2076,2564,5793,6395,9484,9994
192 | 2564,5016,5793,6395,9994
193 | 4785,5016,6338
194 | 2076,4785,5793,9484
195 | 2564,5793,6395
196 | 2564,5793,6395,9994
197 | 2076,5016,5793,9484
198 | 2076,2564,5793,6395,9484,9994
199 | 5016
200 | 2076,2564,5793,6395,9484
201 | 2076,2564,6395,9484,9994
202 | 2076,4785,5793,6338,9484
203 | 2076,2564,5793,6395,9484,9994
204 | 2076,5793,6338,9484
205 | 2076,2564,5016,5793,6395,9484,9994
206 | 2076,2564,6395,9484
207 | 2076,2564,5793,9484
208 | 2076,5793,9484
209 | 2564,5793,6395,9994
210 | 2076,2564,5793,9484,9994
211 | 2076
212 | 2076,5016,9994
213 | 2564,5793,6395
214 | 2076,2564,5016,5793,6395,9994
215 | 2076,6395,9484,9994
216 | 2076,2564,5793,6395,9484,9994
217 | 2076,5016,5793,6395,9484
218 | 2076,2564,5016,5793,6395,9484,9994
219 | 2076,2564,5793,6395,9484,9994
220 | 2076,5016,5793,6338,9484
221 | 2076,2564,5793,6395,9484,9994
222 | 5016,9484
223 | 2076,2564,5793,9484,9994
224 | 2076,2564,5016,5793,6395,9484,9994
225 | 2076,5793,9484
226 | 2076,2564,5793,6395,9484
227 | 5793,6395,9994
228 | 2076,9484
229 | 2076,5793,9484
230 | 2076,5793,6395,9484,9994
231 | 2076,2564,5016,5793,6338,9484
232 | 2564,4785,5793,6338,6395,9994
233 | 2564,5793,6395,9484,9994
234 | 2076,2564,5793,6395,9484,9994
235 | 2564,5016,5793,6395,9994
236 | 2076,2564,5016,5793,6395,9484,9994
237 | 2076,2564,5016,5793,6395,9484,9994
238 | 2076,5016,5793,9484
239 | 5016,5793
240 | 2076,2564,5016,5793,6395,9484,9994
241 | 2076,2564,5016,5793,6395
242 | 2076,9484
243 | 2076,5793,6395,9484,9994
244 | 2076,2564,5016,5793,6395,9484,9994
245 | 5016,9484
246 | 2076,2564,5793,6395,9484,9994
247 | 2076,9484
248 | 2076,5016,5793,9484,9994
249 | 2564,5793,6395,9484,9994
250 | 2564,5016,5793,6395,9484,9994
251 | 2076,2564,5016,5793,6395,9484
252 | 5793,6338
253 | 2076,2564,5793
254 | 2076,2564,5016,5793,6395,9484,9994
255 | 2564,5793,6395,9994
256 | 2564,5016,5793,6395,9994
257 | 2076,4785,5793,6338,9484,9994
258 | 2564,6395,9994
259 | 2076,2564,5793,6395,9484,9994
260 | 2076,9484
261 | 2076,2564,5016,5793,6395,9484,9994
262 | 2564,5793,6395,9994
263 | 2076,9484
264 | 2076,5016,6395,9484,9994
265 | 2076,2564,4785,5793,6338
266 | 2564,5793,6395,9994
267 | 2076,2564,5793,6395,9484,9994
268 | 2076,4785,6338,9484
269 | 2076,2564,5016,5793,6338,6395,9484
270 | 2076,5016,9484
271 | 2076,2564,5016,5793,9484
272 | 2076,2564,5016,5793,6395,9484,9994
273 | 2076,4785,6338,9484
274 | 2076,2564,5793,9484,9994
275 | 2076,2564,5793,6395,9484,9994
276 | 2076,2564,5793,6395,9484
277 | 2076,4785,5016,5793,9484
278 | 2564,5016,5793,6395,9994
279 | 2076,5016,5793,9484
280 | 2076,5793,9484
281 | 2564,5793,6395,9994
282 | 2076,4785,5016,5793,6338
283 | 2076,2564,5016,5793,6395,9484,9994
284 | 2076,5016,5793
285 | 2076,4785,5016,9484
286 | 2076,5793,9484
287 | 2076,2564,5793,9484,9994
288 | 2564,5016,5793,9994
289 | 2564,5016,5793,6395,9994
290 | 5793,6395,9994
291 | 2076,2564,5016,5793,9484
292 | 2076,9484
293 | 2076,2564,5793,6395,9484,9994
294 | 2076,2564,5793,6395,9484,9994
295 | 4785,5793
296 | 2076,2564,5016,6395,9484
297 | 2076
298 | 2076,4785,6338,9484
299 | 2076,2564,5793,6338,9484,9994
300 | 2076,5016,5793,9484
301 | 2076,6395,9484
302 | 2076,2564,5793,9484,9994
303 | 2076,2564,5016,5793,9484
304 | 2076,5016,5793,6395,9484,9994
305 | 2076,5016,5793,9484
306 | 2076,2564,5016,5793,9484,9994
307 | 2076,2564,5016,5793,6395,9484,9994
308 | 2076,2564,5016,5793,9994
309 | 2076,2564,5016,5793,6395,9484,9994
310 | 2564,5793,6395,9994
311 | 2076,2564,5793,6395,9484
312 | 2076,9484
313 | 2076,5016,5793,9484,9994
314 | 2076,4785,9484
315 | 2076,2564,4785,5793,6338,6395,9484,9994
316 | 2564,5793,6395,9994
317 | 2076,2564,5793,6395,9994
318 | 5016,5793
319 | 2564,5016,5793,6338,6395,9994
320 | 2076,2564,5793,6395,9484,9994
321 | 2076,5016,5793,6395,9484,9994
322 | 5793
323 | 2076,9484
324 | 2076,2564,5793,6395,9484
325 | 2564,5016,5793,6395,9994
326 | 2076,5016,5793,6395,9484
327 | 2076,2564,5016,5793,6395,9484,9994
328 | 2076,2564,5793,6395,9484,9994
329 | 2076,2564,5016,5793,6395,9484,9994
330 | 2564,5793,9994
331 | 2076,5016,9484
332 | 2076,2564,5016,5793,6395,9484,9994
333 | 2076,2564,5016,5793,6395,9484,9994
334 | 2564,5016,5793,6395,9484,9994
335 | 2076,2564,5793,6395,9484,9994
336 | 2076,2564,5016,5793,6395,9484,9994
337 | 2564,5793,6395,9484,9994
338 | 2076,5793,6395,9484
339 | 2076,2564,5793,6395,9484
340 | 2564,5793,6395,9994
341 | 2076,2564,5793,6338,6395,9994
342 | 2076,2564,5016,5793,6395,9484,9994
343 | 2076,5793,6395,9484,9994
344 | 2076,2564,5016,5793,6395,9484,9994
345 | 2076,2564,5793,6395,9994
346 | 2076,4785,5016,5793,6338,9484
347 | 2076,6395,9484
348 | 2076,9484
349 | 2076,2564,5793,9484,9994
350 | 2564,5793,9994
351 | 2564,4785,5793,6338,6395,9994
352 | 2076,2564,5016,5793,6395,9484
353 | 2076,9484
354 | 2076,2564,5793,6395,9484,9994
355 | 2076,5793,9484
356 | 2564,5793
357 | 2076,2564,5793,6395,9484,9994
358 | 2076,9484
359 | 2076,5016,5793,9484
360 | 2076,2564,5016,5793,6395
361 | 2076,4785,5016,6338,9484
362 | 2076,5016,9484
363 | 2564,6395,9994
364 | 2076,2564,5016,5793,6395,9484
365 | 2564,5793,6395,9484,9994
366 | 2076,5016,5793
367 | 2564,6395,9994
368 | 2564,4785,5793,6395,9994
369 | 2076,2564,5016,5793,6395,9484,9994
370 | 9484
371 | 2076,2564,5793,6395,9484,9994
372 | 2076,2564,5016,5793,6395,9484,9994
373 | 2076,9484
374 | 2564,4785,5016,5793,6338,9994
375 | 2076,2564,5793,6395,9484,9994
376 | 2564,5793,6395
377 | 2076,2564,5793,6395,9484,9994
378 | 2076,2564,5793,6395,9484,9994
379 | 2076,2564,5793,9484
380 | 2076,2564,5793,6395,9484,9994
381 | 2076,5016,5793,9484
382 | 2076,2564,5016,9484
383 | 2564,5016,5793,6395,9994
384 | 2076,2564,5793,6395,9484,9994
385 | 2564,4785,5793,6338,6395,9994
386 | 2076,2564,5793,9484,9994
387 | 2076,5016,9484
388 | 2076,5016,5793,9484
389 | 2076,2564,5793,9484
390 | 2076,5016,9484
391 | 2076,4785,5016,5793,6338,6395,9484,9994
392 | 2076,2564,5793,6395,9484,9994
393 | 2076,2564,5016,5793,9484,9994
394 | 2076,2564,5793,9484
395 | 2076,5793,9484
396 | 2076,4785,5793,6338,9484
397 | 2564,5793,6395,9994
398 | 2076,5016,5793,6395,9484,9994
399 | 2076,2564,5793,6395,9484,9994
400 | 2076,9484
401 | 2076,2564,5793,6395,9484,9994
402 | 2076,4785,5793,6338,9484,9994
403 | 2076,6338,9484
404 | 2076,9484
405 | 5793,6395,9994
406 | 2076,5016,5793,9484
407 | 2076,2564,5016,5793,6395,9484,9994
408 | 5793
409 | 5793,6395,9994
410 | 2076,2564,5793,6395,9484,9994
411 | 2076,2564,4785,9484,9994
412 | 2564,5793,6395,9484,9994
413 | 2076,5016,6395,9484,9994
414 | 2076,5016,5793,6395,9484,9994
415 | 2076,5016,5793,9484
416 | 2076,2564,5016,5793,6395,9484,9994
417 | 2076,2564,5793,6395,9484
418 | 2076,2564,5793,6395,9484,9994
419 | 2076,2564,4785,6338,6395,9484
420 | 2076,5793,9484
421 | 2076,5016,5793,9484
422 | 2564,4785,5793,6338,6395,9994
423 | 2076,2564,5016,5793,6395,9484,9994
424 | 2076,5016,5793,9484
425 | 5793,6395,9484,9994
426 | 2076,5016,6395,9484,9994
427 | 2076,2564,5793,9484
428 | 2564,5793
429 | 2564,5793,6395,9994
430 | 2564,5793,6395,9994
431 | 2076,5016,9484
432 | 2076,4785,5793,6338,9484
433 | 2076,4785,5793,6338,6395,9484,9994
434 | 2076,5016,9484
435 | 5016
436 | 2076,9484
437 | 2076,2564,5016,5793,6395,9484,9994
438 | 2076,2564,4785,5016,5793,6338,6395,9484,9994
439 | 2076,2564,5793,6395,9484,9994
440 | 2076,9484
441 | 2076,9484
442 | 2076,2564,5793,6395,9484,9994
443 | 2076,2564,5793,6395,9484,9994
444 | 2076,2564,9484,9994
445 | 4785,5016,5793,6338,6395,9994
446 | 2076,2564,5016,5793,6395,9484,9994
447 | 2076,2564,5016,5793,6395,9484
448 | 2564,6395,9994
449 | 2076,2564,5793,6395,9484
450 | 5793,9484
451 | 2564,5793,6395,9994
452 | 6395,9994
453 | 2076,2564,5016,5793,6395,9484,9994
454 | 2564,5016,5793,9994
455 | 2076,2564,5793,6395,9484,9994
456 | 2076,5016,9484
457 | 2076,4785,5016,5793,6338,6395,9484,9994
458 | 2076,2564,4785,5016,5793,6395,9484,9994
459 | 2564,5016,5793,6395,9994
460 | 2076,5016,9484
461 | 2076,5016,9484
462 | 2076,2564,5016,5793,9484
463 | 2076,2564,5793,6395,9484,9994
464 | 2076,4785,6338,9484
465 | 2076,2564,5016,5793,6395,9484
466 | 2564,5793,6395,9484
467 | 2076,5016,5793,6395,9484
468 | 2076,5793,9484
469 | 2564,5793,6395,9484,9994
470 | 2076,4785,5793,6338,9484
471 | 2076,2564,9484
472 | 2076,2564,5793,6395,9484
473 | 2076,4785,5016,6338,9484
474 | 2076,5793,6395,9484,9994
475 | 2564,5016,5793,6395,9994
476 | 2076,2564,5016,6395,9484,9994
477 | 2076,9484
478 | 2564,5793,6395,9994
479 | 2076,5016,5793,6395,9484,9994
480 | 2076,2564,5793,6395,9484,9994
481 | 2076,2564,5016,5793,6395,9484,9994
482 | 2076,2564,5793,6395,9484,9994
483 | 2076,2564,5793,6395,9484
484 | 2564,5016,5793,6395
485 | 2076,5793,6395,9484,9994
486 | 2076,9484
487 | 5793,6395,9994
488 | 2076,2564,5793,6395,9484,9994
489 | 2076,2564,5016,5793,6395,9484,9994
490 | 2564,4785,5793,6395,9994
491 | 2564,5016,5793,6395,9994
492 | 2076,2564,5793,9484
493 | 2076,4785,6338,9484
494 | 2076,2564,5793,6395,9484,9994
495 | 2076,2564,5793,6395,9484,9994
496 | 2076,2564,6395,9484,9994
497 | 2076,9484
498 | 2076,9484
499 | 2564,5793,6395,9994
500 | 2564,5793,6395
501 | 2076,2564,9484
502 | 2564,5793,9994
--------------------------------------------------------------------------------
/dataset/tesco.csv:
--------------------------------------------------------------------------------
1 | a,c,d,f,g,i,m,p
2 | a,b,c,f,i,m,o
3 | b,f,h,j,o
4 | b,c,k,s,p
5 | a,c,e,f,l,m,n,p
--------------------------------------------------------------------------------
/dataset/tesco2.csv:
--------------------------------------------------------------------------------
1 | apple,beer,rice,chicken
2 | apple,beer,rice
3 | apple,beer
4 | apple,mango
5 | milk,beer,rice,chicken
6 | milk,beer,rice
7 | milk,beer
8 | milk,mango
--------------------------------------------------------------------------------
/fpgrowth.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chonyy/fpgrowth_py/2457a5301486c920eb9277b37ae79a3866fe0bcc/fpgrowth.PNG
--------------------------------------------------------------------------------
/fpgrowth_py/__init__.py:
--------------------------------------------------------------------------------
1 | from fpgrowth_py.fpgrowth import *
2 | from fpgrowth_py.utils import *
--------------------------------------------------------------------------------
/fpgrowth_py/fpgrowth.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict, OrderedDict
2 | from csv import reader
3 | from itertools import chain, combinations
4 | from optparse import OptionParser
5 | from fpgrowth_py.utils import *
6 |
7 | def fpgrowth(itemSetList, minSupRatio, minConf):
8 | frequency = getFrequencyFromList(itemSetList)
9 | minSup = len(itemSetList) * minSupRatio
10 | fpTree, headerTable = constructTree(itemSetList, frequency, minSup)
11 | if(fpTree == None):
12 | print('No frequent item set')
13 | else:
14 | freqItems = []
15 | mineTree(headerTable, minSup, set(), freqItems)
16 | rules = associationRule(freqItems, itemSetList, minConf)
17 | return freqItems, rules
18 |
19 | def fpgrowthFromFile(fname, minSupRatio, minConf):
20 | itemSetList, frequency = getFromFile(fname)
21 | minSup = len(itemSetList) * minSupRatio
22 | fpTree, headerTable = constructTree(itemSetList, frequency, minSup)
23 | if(fpTree == None):
24 | print('No frequent item set')
25 | else:
26 | freqItems = []
27 | mineTree(headerTable, minSup, set(), freqItems)
28 | rules = associationRule(freqItems, itemSetList, minConf)
29 | return freqItems, rules
30 |
31 | if __name__ == "__main__":
32 | optparser = OptionParser()
33 | optparser.add_option('-f', '--inputFile',
34 | dest='inputFile',
35 | help='CSV filename',
36 | default=None)
37 | optparser.add_option('-s', '--minSupport',
38 | dest='minSup',
39 | help='Min support (float)',
40 | default=0.5,
41 | type='float')
42 | optparser.add_option('-c', '--minConfidence',
43 | dest='minConf',
44 | help='Min confidence (float)',
45 | default=0.5,
46 | type='float')
47 |
48 | (options, args) = optparser.parse_args()
49 |
50 | freqItemSet, rules = fpgrowthFromFile(
51 | options.inputFile, options.minSup, options.minConf)
52 |
53 | print(freqItemSet)
54 | print(rules)
55 |
--------------------------------------------------------------------------------
/fpgrowth_py/utils.py:
--------------------------------------------------------------------------------
1 | from csv import reader
2 | from collections import defaultdict
3 | from itertools import chain, combinations
4 |
5 | class Node:
6 | def __init__(self, itemName, frequency, parentNode):
7 | self.itemName = itemName
8 | self.count = frequency
9 | self.parent = parentNode
10 | self.children = {}
11 | self.next = None
12 |
13 | def increment(self, frequency):
14 | self.count += frequency
15 |
16 | def display(self, ind=1):
17 | print(' ' * ind, self.itemName, ' ', self.count)
18 | for child in list(self.children.values()):
19 | child.display(ind+1)
20 |
21 | def getFromFile(fname):
22 | itemSetList = []
23 | frequency = []
24 |
25 | with open(fname, 'r') as file:
26 | csv_reader = reader(file)
27 | for line in csv_reader:
28 | line = list(filter(None, line))
29 | itemSetList.append(line)
30 | frequency.append(1)
31 |
32 | return itemSetList, frequency
33 |
34 | def constructTree(itemSetList, frequency, minSup):
35 | headerTable = defaultdict(int)
36 | # Counting frequency and create header table
37 | for idx, itemSet in enumerate(itemSetList):
38 | for item in itemSet:
39 | headerTable[item] += frequency[idx]
40 |
41 | # Deleting items below minSup
42 | headerTable = dict((item, sup) for item, sup in headerTable.items() if sup >= minSup)
43 | if(len(headerTable) == 0):
44 | return None, None
45 |
46 | # HeaderTable column [Item: [frequency, headNode]]
47 | for item in headerTable:
48 | headerTable[item] = [headerTable[item], None]
49 |
50 | # Init Null head node
51 | fpTree = Node('Null', 1, None)
52 | # Update FP tree for each cleaned and sorted itemSet
53 | for idx, itemSet in enumerate(itemSetList):
54 | itemSet = [item for item in itemSet if item in headerTable]
55 | itemSet.sort(key=lambda item: headerTable[item][0], reverse=True)
56 | # Traverse from root to leaf, update tree with given item
57 | currentNode = fpTree
58 | for item in itemSet:
59 | currentNode = updateTree(item, currentNode, headerTable, frequency[idx])
60 |
61 | return fpTree, headerTable
62 |
63 | def updateHeaderTable(item, targetNode, headerTable):
64 | if(headerTable[item][1] == None):
65 | headerTable[item][1] = targetNode
66 | else:
67 | currentNode = headerTable[item][1]
68 | # Traverse to the last node then link it to the target
69 | while currentNode.next != None:
70 | currentNode = currentNode.next
71 | currentNode.next = targetNode
72 |
73 | def updateTree(item, treeNode, headerTable, frequency):
74 | if item in treeNode.children:
75 | # If the item already exists, increment the count
76 | treeNode.children[item].increment(frequency)
77 | else:
78 | # Create a new branch
79 | newItemNode = Node(item, frequency, treeNode)
80 | treeNode.children[item] = newItemNode
81 | # Link the new branch to header table
82 | updateHeaderTable(item, newItemNode, headerTable)
83 |
84 | return treeNode.children[item]
85 |
86 | def ascendFPtree(node, prefixPath):
87 | if node.parent != None:
88 | prefixPath.append(node.itemName)
89 | ascendFPtree(node.parent, prefixPath)
90 |
91 | def findPrefixPath(basePat, headerTable):
92 | # First node in linked list
93 | treeNode = headerTable[basePat][1]
94 | condPats = []
95 | frequency = []
96 | while treeNode != None:
97 | prefixPath = []
98 | # From leaf node all the way to root
99 | ascendFPtree(treeNode, prefixPath)
100 | if len(prefixPath) > 1:
101 | # Storing the prefix path and it's corresponding count
102 | condPats.append(prefixPath[1:])
103 | frequency.append(treeNode.count)
104 |
105 | # Go to next node
106 | treeNode = treeNode.next
107 | return condPats, frequency
108 |
109 | def mineTree(headerTable, minSup, preFix, freqItemList):
110 | # Sort the items with frequency and create a list
111 | sortedItemList = [item[0] for item in sorted(list(headerTable.items()), key=lambda p:p[1][0])]
112 | # Start with the lowest frequency
113 | for item in sortedItemList:
114 | # Pattern growth is achieved by the concatenation of suffix pattern with frequent patterns generated from conditional FP-tree
115 | newFreqSet = preFix.copy()
116 | newFreqSet.add(item)
117 | freqItemList.append(newFreqSet)
118 | # Find all prefix path, constrcut conditional pattern base
119 | conditionalPattBase, frequency = findPrefixPath(item, headerTable)
120 | # Construct conditonal FP Tree with conditional pattern base
121 | conditionalTree, newHeaderTable = constructTree(conditionalPattBase, frequency, minSup)
122 | if newHeaderTable != None:
123 | # Mining recursively on the tree
124 | mineTree(newHeaderTable, minSup,
125 | newFreqSet, freqItemList)
126 |
127 | def powerset(s):
128 | return chain.from_iterable(combinations(s, r) for r in range(1, len(s)))
129 |
130 | def getSupport(testSet, itemSetList):
131 | count = 0
132 | for itemSet in itemSetList:
133 | if(set(testSet).issubset(itemSet)):
134 | count += 1
135 | return count
136 |
137 | def associationRule(freqItemSet, itemSetList, minConf):
138 | rules = []
139 | for itemSet in freqItemSet:
140 | subsets = powerset(itemSet)
141 | itemSetSup = getSupport(itemSet, itemSetList)
142 | for s in subsets:
143 | confidence = float(itemSetSup / getSupport(s, itemSetList))
144 | if(confidence > minConf):
145 | rules.append([set(s), set(itemSet.difference(s)), confidence])
146 | return rules
147 |
148 | def getFrequencyFromList(itemSetList):
149 | frequency = [1 for i in range(len(itemSetList))]
150 | return frequency
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | with open("README.md", "r") as fh:
4 | long_description = fh.read()
5 |
6 | setuptools.setup(
7 | name="fpgrowth_py",
8 | version="1.0.0",
9 | author="Chonyy",
10 | author_email="tcheon8788@gmail.com",
11 | description="Python implementation of FP Growth algorithm",
12 | long_description=long_description,
13 | long_description_content_type="text/markdown",
14 | url="https://github.com/chonyy/fpgrowth_py",
15 | packages=setuptools.find_packages(),
16 | classifiers=[
17 | "Programming Language :: Python :: 3",
18 | "License :: OSI Approved :: MIT License",
19 | "Operating System :: OS Independent",
20 | ],
21 | python_requires='>=3.6',
22 | )
23 |
--------------------------------------------------------------------------------