├── .gitignore ├── LICENSE.txt ├── README.md ├── dataset ├── data.csv ├── data2.csv ├── data3.csv ├── data4.csv ├── data5.csv ├── data6.csv ├── data7.csv ├── kaggle.csv ├── tesco.csv └── tesco2.csv ├── fpgrowth.PNG ├── fpgrowth_py ├── __init__.py ├── fpgrowth.py └── utils.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | build 3 | apriori_py.egg-info -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | Copyright (c) 2020 Chonyy 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in all 10 | copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 |

6 | 7 | 8 | 9 | 10 | 11 | 12 |

13 | 14 | ## How to use 15 | 16 | ### Install the Pypi package using pip 17 | 18 | ``` 19 | pip install fpgrowth_py 20 | ``` 21 | 22 | Then use it like 23 | 24 | ```python 25 | from fpgrowth_py import fpgrowth 26 | itemSetList = [['eggs', 'bacon', 'soup'], 27 | ['eggs', 'bacon', 'apple'], 28 | ['soup', 'bacon', 'banana']] 29 | freqItemSet, rules = fpgrowth(itemSetList, minSupRatio=0.5, minConf=0.5) 30 | print(freqItemSet) 31 | print(rules) 32 | # [[{'beer'}, {'rice'}, 0.6666666666666666], [{'rice'}, {'beer'}, 1.0]] 33 | # rules[0] --> rules[1], confidence = rules[2] 34 | ``` 35 | 36 | ### Clone the repo 37 | 38 | Get a copy of this repo using git clone 39 | ``` 40 | git clone https://github.com/chonyy/fpgrowth_py.git 41 | ``` 42 | 43 | Run the program with dataset provided and **default** values for *minSupport* = 0.5 and *minConfidence* = 0.5 44 | 45 | ``` 46 | python fpgrowth.py -f dataset.csv 47 | ``` 48 | 49 | Run program with dataset and min support and min confidence 50 | 51 | ``` 52 | python fpgrowth.py -f tesco2.csv -s 0.5 -c 0.5 53 | ``` 54 | -------------------------------------------------------------------------------- /dataset/data.csv: -------------------------------------------------------------------------------- 1 | 8585,948,1125,1289,1544,1651 2 | 656,783,1017,1273,1318,1429,1516 3 | 112,1034 4 | 647,704,929,1040,1094,1182,1194,1214,1250 5 | 11,89,870,1094,1605,1940 -------------------------------------------------------------------------------- /dataset/data2.csv: -------------------------------------------------------------------------------- 1 | 1040,2372,2872,3159,4750 2 | 1040,1280,2495,2872,3186,4750,4995 3 | 1040,2872,4750 4 | 1280,2872,3186,4995 5 | 1040,1280,2872,3186,4995 6 | 1280,3186,4995 7 | 1040,1280,2872,3186,4750,4995 8 | 1040,1280,2495,2872,3186,4750,4995 9 | 1040,1280,2495,2872,3186,4750,4995 10 | 1040,2872,3186,4750,4995 11 | 1040,2495,4750 12 | 1040,1280,2872,3186,4750,4995 13 | 1040,1280,2872,3186,4750,4995 14 | 1040,1280,2872,4750,4995 15 | 1040,2372,2495,3159,4750 16 | 1280,2872,3186,4995 17 | 1040,4750 18 | 1280,2872,3186,4995 19 | 1040,1280,2495,2872,4750 20 | 1040,1280,2372,2872,3186,4995 21 | 1280,2872,3186,4750,4995 22 | 1040,2872,4750 23 | 1040,2872,4750 24 | 1280,2872,3186,4750,4995 25 | 1040,2495,4750 26 | 1040,1280,2872,3186,4750,4995 27 | 2372,3159 28 | 1040,1280,2872,3186,4750,4995 29 | 1040,1280,2372,2495,2872,3159,3186,4750,4995 30 | 1040,2495,2872,4750 31 | 1280,2495,2872,3186,4995 32 | 1040,4750 33 | 1040,2872,3186,4750,4995 34 | 1040,1280,2872,4750,4995 35 | 1280,2872,3186,4750,4995 36 | 1040,1280,2495,2872,3186,4750,4995 37 | 1280,2872,3186,4995 38 | 1040,1280,2872,3186,4750,4995 39 | 1280,2872,3186,4995 40 | 1040,2495,2872,4750 41 | 1040,1280,2495,2872,3186,4750,4995 42 | 1040,1280,2495,2872,3186,4750,4995 43 | 1040,1280,2495,2872,4750,4995 44 | 1040,1280,3186,4750,4995 45 | 1040,1280,2372,2495,2872,3186,4750,4995 46 | 1280,2872,3186,4995 47 | 1040,2872,4750 48 | 1040,1280,2495,2872,3186,4750,4995 49 | 1040,1280,2872,4750 50 | 1040,2495,2872,4750 51 | 1280,2495,2872,3186,4995 52 | 1040,1280,2495,2872,4750,4995 53 | 1280,2872,3186,4995 54 | 1040,2495,4750 55 | 1040,2372,2872,3159,4750 56 | 1280,2872,3186,4995 57 | 1280,2495,3186,4995 58 | 1040,2872,4750 59 | 1040,2872,4750 60 | 1040,1280,2495,2872,3186,4750,4995 61 | 2495,2872,4750 62 | 1040,2372,2872,3159,4750 63 | 1040,2495,2872,4750 64 | 1280,2872,3186,4995 65 | 1280,2495,2872,3186,4995 66 | 2495,2872 67 | 1280,2872,4995 68 | 2872,3186,4995 69 | 1040,2372,4750 70 | 1280,2872,3186,4995 71 | 1040,2372,3159,3186,4750 72 | 1280,2872,3186,4995 73 | 1040,2372,2872,4750 74 | 1040,2495,2872,4750 75 | 1280,2872,3186,4995 76 | 1040,2495,4750 77 | 2495 78 | 1040,1280,2495,2872,4750,4995 79 | 1040,2872,4750 80 | 1040,2495,4750 81 | 1040,4750 82 | 2872,3186,4995 83 | 1040,2872,4750 84 | 1280,2872,3186,4995 85 | 1040,1280,2872,3186,4750,4995 86 | 2495,2872 87 | 1280,2872,3186 88 | 1280,2872,3186,4995 89 | 1040,2372,2872,3159 90 | 1040,1280,2495,2872,3186,4750 91 | 1040,1280,2872,3186,4750,4995 92 | 1040,1280,2495,2872,3186,4750,4995 93 | 1040,1280,2372,2872,3159,3186,4750,4995 94 | 1280,2495,2872,3186,4995 95 | 1040,1280,2872,4750,4995 96 | 1040,2372,2495,3159,4750 97 | 1280,2872,3186,4995 98 | 1040,1280,2495,2872,3159,3186,4750,4995 99 | 1040,2495,2872,4750,4995 100 | 1040,1280,2872,3186,4750,4995 101 | 1040,1280,2872,3186,4750 102 | 1040,4750 103 | 1040,1280,2495,2872,3186,4750,4995 104 | 1040,1280,2495,2872,3186,4750 105 | 1040,2495,2872,3186,4750 106 | 1040,1280,2872,3186,4750,4995 107 | 1040,1280,2872,3186,4995 108 | 1040,1280,2872,3186,4750 109 | 1040,1280,2872,4750,4995 110 | 1040,4750 111 | 1280,3186,4995 112 | 1040,1280,2872,3159,3186,4750,4995 113 | 1040,1280,2872,3186,4750,4995 114 | 1040,1280,2495,2872,3186,4750,4995 115 | 1040,1280,2372,2495,2872,3159,3186,4750 116 | 1040,1280,2872,3186,4750,4995 117 | 1040,4750 118 | 1040,1280,2495,2872,4750,4995 119 | 1280,2872,3186 120 | 1040,1280,2872,3186,4750,4995 121 | 1040,1280,2872,3186,4750,4995 122 | 1040,2872,4750 123 | 1040,1280,2872,3186,4750,4995 124 | 1040,1280,2495,2872,3186,4750 125 | 1280,2872,3186,4995 126 | 1040,1280,2872,3186,4750,4995 127 | 1280,2872,3186,4750,4995 128 | 1040,1280,2872,3186,4750,4995 129 | 1040,1280,2495,2872,3186,4750 130 | 1040,1280,3186,4750,4995 131 | 1040,1280,2872,3186,4750,4995 132 | 1040,1280,2872,4750,4995 133 | 1280,2872,3186 134 | 1040,2495,2872,4750 135 | 1280,2872,3186,4995 136 | 1040,1280,2495,2872,3186,4750,4995 137 | 1040,2872,4750 138 | 4995 139 | 1040,4750 140 | 1040,1280,2872,3186,4750 141 | 1040,1280,2872,3186,4750,4995 142 | 1280,2872,3186 143 | 1280,2872,3186,4995 144 | 1040,2372,2872,3159,3186,4750,4995 145 | 1040,1280,2872,3186,4750 146 | 1040,2495,2872,4750 147 | 1040,1280,2872,3186,4750,4995 148 | 1040,2495,4750,4995 149 | 1040,1280,2872,3186,4750 150 | 1040,1280,2872,3186,4750,4995 151 | 1040,2495,2872,4750 152 | 1040,2372,2872,3159,4750 153 | 1280,2872,3186,4995 154 | 1040,1280,2872,3186,4750,4995 155 | 1280,2872 156 | 1040,1280,2495,2872,3186,4750,4995 157 | 1040,1280,3186,4750,4995 158 | 1040,2372,2495,3159,4750 159 | 1040,2495,2872,4750 160 | 1280,2872,3159,3186,4995 161 | 1280,2872,3186,4995 162 | 1040,1280,2495,2872,3186,4750 163 | 1040,2495,2872,4750 164 | 1040,1280,2495,2872,3186,4750,4995 165 | 1040,2872,3186,4750,4995 166 | 1280,2872,3186 167 | 1040,1280,2495,2872,3186,4995 168 | 1040,2872,4750 169 | 1040,1280,2372,2495,2872,3159,3186,4750,4995 170 | 1040,1280,2872,3186,4750,4995 171 | 1040,2495,2872,4750 172 | 1040,1280,2372,2495,2872,3159,3186,4750,4995 173 | 1040,1280,2495,2872,3186,4995 174 | 1040,1280,2372,2495,2872,3186,4750,4995 175 | 1280,3186,4995 176 | 1040,2495,2872,4750 177 | 1040,1280,2495,2872,4750 178 | 1040,4750 179 | 1040,1280,2872,3186,4750,4995 180 | 1040,1280,2872,3186,4750 181 | 1040,1280,2872,4750,4995 182 | 1040,1280,2372,2872,3159,3186,4750,4995 183 | 1040,1280,2872,3186,4750,4995 184 | 1040,2495,2872,4750 185 | 2872 186 | 1040,1280,2495,2872,3186,4750,4995 187 | 1280,2872,3186,4750,4995 188 | 1040,1280,3186,4750,4995 189 | 1040,1280,2495,2872,3186,4995 190 | 1040,1280,2495,2872 191 | 1040,2872 192 | 1040,1280,2495,2872,3186,4750,4995 193 | 1040,1280,2872,3186,4750,4995 194 | 1040,1280,2872,3186,4750 195 | 1040,2372,2495,2872,4750 196 | 1040,1280,2495,2872,3186,4750,4995 197 | 1040,1280,2495,2872,4750,4995 198 | 1040,2495,2872,4750 199 | 1040,1280,2495,2872,3186,4750,4995 200 | 1280,2872,3186,4995 201 | 1040,1280,2872,3186,4750,4995 202 | 1040,1280,2495,2872,3186,4750,4995 203 | 4750 204 | 1280,2872,3186,4995 205 | 1040,2495,4750 206 | 1040,3159,4750 207 | 1040,1280,2872,4750,4995 208 | 1040,2495,2872,4750 209 | 1040,1280,2372,2495,2872,3159,3186,4750,4995 210 | 1040,2372,2495,2872,3159,4750 211 | 1040,1280,2372,2872,3159,3186,4750,4995 212 | 1040,1280,2372,2495,2872,3159,3186,4750,4995 213 | 1040,2372,2872,4750 214 | 1040,1280,2872,3186,4750,4995 215 | 1040,2872 216 | 1040,2872,4750 217 | 1280,2872,3186,4995 218 | 1040,1280,2372,2872,4750,4995 219 | 1040,1280,2495,3186,4750 220 | 1040,2872,4750 221 | 1280,2872,4995 222 | 1040,2495,2872,4750 223 | 1040,4750 224 | 1040,1280,2872,3186,4750,4995 225 | 1040,2372,3159,4750 226 | 1040,1280,2872,3186,4750,4995 227 | 1040,2495,2872,4750 228 | 1040,1280,2872,3186,4750,4995 229 | 1040,1280,2495,2872,4750,4995 230 | 1040,4750 231 | 1280,2872,3186,4995 232 | 1040,4750 233 | 1040,1280,2495,2872,3186,4750,4995 234 | 1040,1280,2872,3186,4750,4995 235 | 2872,3186 236 | 1280,2872,3186,4995 237 | 1040,1280,2872,3159,3186,4750,4995 238 | 1040,4750 239 | 1040,1280,2872,3186,4750,4995 240 | 1040,4750 241 | 1280,2872,3186,4995 242 | 1040,1280,2872,3186,4750 243 | 1040,2372,2495,3159,4750 244 | 1040,1280,2872,3186,4750,4995 245 | 1040,2495,4750 246 | 1280,2872,3186,4995 247 | 1040,1280,2372,2495,2872,3159,3186,4750,4995 248 | 1040,2495,2872,4750 249 | 1280,2495,2872,3186 250 | 1040,2372,2495,2872,3159,4750 251 | 1040,2495,2872,3186,4750 252 | 1040,2495,2872,4750 253 | 1040,1280,2872,3186,4750,4995 254 | 1040,1280,2872,3186,4750,4995 255 | 1280,2872,3186,4995 256 | 1280,2872,3186,4995 257 | 1040,2372,3159,4750 258 | 1040,1280,2872,3186,4750,4995 259 | 1040,4750 260 | 1280,3186,4995 261 | 1280,2872,3186,4995 262 | 1040,1280,2872,3186,4750,4995 263 | 1280,2872,3186,4995 264 | 1040,1280,2372,2495,2872,3159,3186,4750 265 | 1280,2872,3186,4995 266 | 1040,1280,2495,2872,4750,4995 267 | 1040,4750 268 | 1040,1280,2495,2872,3186,4750,4995 269 | 1280,2872,3186,4995 270 | 1040,1280,2372,2872,3159,3186,4750,4995 271 | 1040,2495,2872,4750,4995 272 | 1280,2872,3186,4995 273 | 1040,2872,4750 274 | 1040,1280 -------------------------------------------------------------------------------- /dataset/data3.csv: -------------------------------------------------------------------------------- 1 | 2076,4785,5793,6338,9484 2 | 2076,2564,5793,6395,9484,9994 3 | 5793,9484 4 | 2076,2564,5793,6395,9994 5 | 2076,9484 6 | 2564,5793,6395,9994 7 | 2076,2564,5793,6395,9484,9994 8 | 2076,2564,5016,5793,6395,9484,9994 9 | 2076,2564,5793,6395,9484,9994 10 | 2076,2564,9484,9994 11 | 2564,5793,6395,9994 12 | 2076,4785,5793,6338,9484 13 | 2076,5016,9484 14 | 2076,2564,5793,6395,9484,9994 15 | 2076,2564,5016,5793,6338,6395,9484,9994 16 | 2564,5793,6395,9994 17 | 2564,5793,6395,9994 18 | 5793,6338,9484 19 | 2076,2564,5016,5793,6395,9484,9994 20 | 2564,4785,5793,6338,6395,9484,9994 21 | 2076,5793,6338,9484,9994 22 | 2076,2564,5793,9484,9994 23 | 2076,9484 24 | 2076,2564,5793,6395,9484,9994 25 | 2076,5016,9484 26 | 2564,5793,6395,9484,9994 27 | 2076,5793,9484 28 | 2076,2564,5793,6395,9484,9994 29 | 2076,2564,5793,6395,9484,9994 30 | 2076,5793,6338,6395,9484 31 | 2564,5793,6395,9994 32 | 2076,5016,5793,9484 33 | 2076,5016,5793,9484 34 | 2076,5016,9484 35 | 2076,5793,6395,9484,9994 36 | 2076,2564,5016,5793,6395,9484,9994 37 | 2076,2564,5793,6395,9484,9994 38 | 2076,2564,5016,5793,6395,9484 39 | 2564,5793,6395,9994 40 | 2076,2564,5793,6395,9484 41 | 2076,4785,5016,5793,6338,9484 42 | 2564,5793,9994 43 | 2076,2564,5016,5793,6395,9484,9994 44 | 2076,2564,5793,6395,9484,9994 45 | 2076,2564,4785,5016,5793,6338,6395,9484,9994 46 | 2076,9484,9994 47 | 5793 48 | 2076,2564,4785,5016,5793,6338,6395,9484 49 | 5793,6395 50 | 2564,5793,9994 51 | 2076,2564,5793,6395,9484,9994 52 | 2076,4785,5793,6338,9484 53 | 2564,4785,5793,6338,6395,9994 54 | 2076,9484 55 | 2076,2564,5016,5793,6395,9484,9994 56 | 2564,5016,5793,6395 57 | 2076,9484 58 | 2076,9484 59 | 2076,2564,5793,6395,9484,9994 60 | 2076,2564,5016,5793,6395,9484,9994 61 | 2564,5793,6395,9994 62 | 2076,5016,6338,9484 63 | 2076,2564,6395,9484 64 | 5016,5793,9484 65 | 2076,5793,9484 66 | 2564,5793,6395,9484 67 | 2076,9484 68 | 2076,9484 69 | 2076,2564,5016,5793,6395,9484,9994 70 | 9484 71 | 2076,2564,4785,5016,5793 72 | 2564,5793,9484,9994 73 | 2076,4785,6338,9484 74 | 2076,5793,6395,9484,9994 75 | 2076,2564,6395 76 | 2076,9484 77 | 2076,9484 78 | 2076,4785,5016,5793,6338,9484 79 | 2076,9484 80 | 2076,5793,9484 81 | 2076,5016,5793,9484 82 | 2076,5016,9484 83 | 2564,5016,5793,6395,9994 84 | 2564,5793,6395,9994 85 | 2076,9484 86 | 2076,2564,5793,6395,9484,9994 87 | 2076,5016,5793,9484 88 | 2076,5016,9484 89 | 2076,9484 90 | 2076,2564,5793,6395,9484,9994 91 | 2076,9484 92 | 2076,2564,5016,5793,6395,9484,9994 93 | 2076,5793,6395,9484 94 | 2076,2564,5016,5793,6395,9484,9994 95 | 2076,2564,5016,5793,6395,9484,9994 96 | 2076,9484 97 | 2076,4785,5016,5793,6338,9484 98 | 2076,2564,5793,6395,9994 99 | 2076,2564,5793,6395,9994 100 | 2076,9484 101 | 2076,2564,5016,5793,6395,9484,9994 102 | 2076,2564,5793,6395,9484,9994 103 | 2076,2564,5793,6395,9484,9994 104 | 2564,5793,6395,9994 105 | 2564,5793,6395,9994 106 | 2076,5793,9484 107 | 2076,2564,5793,6395,9484,9994 108 | 2564,5016,5793,6395,9484,9994 109 | 2076,5793,6395,9484 110 | 2076,2564,5793,9484 111 | 5793,9484 112 | 2076,2564,4785,6338,6395,9484,9994 113 | 2076,5016,5793,9484,9994 114 | 2076,2564,5793,6395,9484 115 | 2564,6395,9994 116 | 2076,2564,5793,9484,9994 117 | 2076,2564,5793,6395,9484,9994 118 | 2076,4785,5016,5793,6338,9484 119 | 2076,2564,5793,6395,9484,9994 120 | 2076,2564,5793,6395,9484,9994 121 | 2564,6395,9994 122 | 2076,2564,5016,5793,6338,6395,9484,9994 123 | 2076,2564,5793,9484,9994 124 | 2076,2564,5016,5793,6395,9484 125 | 2076,5016,5793,9484 126 | 2564,5793,6395,9994 127 | 2076,2564,5793,6395,9484,9994 128 | 2564,5016,5793,6395,9994 129 | 2076,5793,9484 130 | 2076,2564,4785,5793,9484,9994 131 | 2076,2564,5016,5793,9484 132 | 2076,2564,4785,5016,5793,6338,6395,9484,9994 133 | 2076,4785,5793,6338,9484,9994 134 | 2564,5793,6395,9484,9994 135 | 2076,2564,5016,5793,6395,9484,9994 136 | 2076,5016,9484 137 | 2564,5793,6395 138 | 2076,5016,5793,6395,9484,9994 139 | 2564,5793,6395,9994 140 | 2076,6395,9484,9994 141 | 2076,2564,4785,5793,6395,9484,9994 142 | 2076,2564,5016,5793,9484 143 | 5793 144 | 2076,9484 145 | 2076,2564,5016,5793,6395,9484,9994 146 | 2076,2564,5016,5793,6395,9484,9994 147 | 2076,5016,5793,6395,9994 148 | 2076,2564,5793,6395 149 | 2076,2564,5016,5793,6395,9484,9994 150 | 2076,2564,5016,5793,6395,9484 151 | 2076,2564,5793,9484,9994 152 | 2076,5793,9484,9994 153 | 2076,2564,5793,6395,9484,9994 154 | 2076,2564,5016,5793,6395,9484,9994 155 | 2076,2564,5016,6395,9484 156 | 2564,5793,6395,9994 157 | 2076,2564,5793,6395,9484,9994 158 | 2076,5793,9484 159 | 2076,5016,5793,6395,9484,9994 160 | 2076,9484 161 | 2076,2564,4785,5793,6338,9484 162 | 2076,4785,5016,6338,9484 163 | 2076,2564,5793,6395,9484,9994 164 | 2076,4785,5016,6338,9484 165 | 2076,2564,5793,6395,9484,9994 166 | 2076,4785,6338,9484 167 | 2076,2564,5793,9484,9994 168 | 2076,5016,5793,9484 169 | 2076,5793,9484 170 | 2564,5793,6395,9994 171 | 2076,9484 172 | 2076,5793,6338,9484 173 | 2564,5793,6395,9994 174 | 2076,2564,4785,5016,5793,6338,6395,9484,9994 175 | 2076,2564,5793,9484 176 | 2076,2564,5793,6395,9484,9994 177 | 2076,2564,5016,5793,6395,9484,9994 178 | 2076,2564,5793,6395,9484,9994 179 | 2076,2564,5016,5793,6395,9484,9994 180 | 2564,5793,6395 181 | 2076,2564,4785,5793,6338,6395,9484 182 | 2076,2564,5793,6395,9484,9994 183 | 2076,2564,5793,6395,9484,9994 184 | 2076,2564,5016,5793,6395,9484,9994 185 | 2076,5016,5793,6395,9484 186 | 2076,2564,5793,6395,9484,9994 187 | 2076,2564,5016,5793,6395,9484,9994 188 | 2076,4785,5016,6338,9484,9994 189 | 4785,6338 190 | 2564,5793,6395,9994 191 | 2076,2564,5793,6395,9484,9994 192 | 2564,5016,5793,6395,9994 193 | 4785,5016,6338 194 | 2076,4785,5793,9484 195 | 2564,5793,6395 196 | 2564,5793,6395,9994 197 | 2076,5016,5793,9484 198 | 2076,2564,5793,6395,9484,9994 199 | 5016 200 | 2076,2564,5793,6395,9484 201 | 2076,2564,6395,9484,9994 202 | 2076,4785,5793,6338,9484 203 | 2076,2564,5793,6395,9484,9994 204 | 2076,5793,6338,9484 205 | 2076,2564,5016,5793,6395,9484,9994 206 | 2076,2564,6395,9484 207 | 2076,2564,5793,9484 208 | 2076,5793,9484 209 | 2564,5793,6395,9994 210 | 2076,2564,5793,9484,9994 211 | 2076 212 | 2076,5016,9994 213 | 2564,5793,6395 214 | 2076,2564,5016,5793,6395,9994 215 | 2076,6395,9484,9994 216 | 2076,2564,5793,6395,9484,9994 217 | 2076,5016,5793,6395,9484 218 | 2076,2564,5016,5793,6395,9484,9994 219 | 2076,2564,5793,6395,9484,9994 220 | 2076,5016,5793,6338,9484 221 | 2076,2564,5793,6395,9484,9994 222 | 5016,9484 223 | 2076,2564,5793,9484,9994 224 | 2076,2564,5016,5793,6395,9484,9994 225 | 2076,5793,9484 226 | 2076,2564,5793,6395,9484 227 | 5793,6395,9994 228 | 2076,9484 229 | 2076,5793,9484 230 | 2076,5793,6395,9484,9994 231 | 2076,2564,5016,5793,6338,9484 232 | 2564,4785,5793,6338,6395,9994 233 | 2564,5793,6395,9484,9994 234 | 2076,2564,5793,6395,9484,9994 235 | 2564,5016,5793,6395,9994 236 | 2076,2564,5016,5793,6395,9484,9994 237 | 2076,2564,5016,5793,6395,9484,9994 238 | 2076,5016,5793,9484 239 | 5016,5793 240 | 2076,2564,5016,5793,6395,9484,9994 241 | 2076,2564,5016,5793,6395 242 | 2076,9484 243 | 2076,5793,6395,9484,9994 244 | 2076,2564,5016,5793,6395,9484,9994 245 | 5016,9484 246 | 2076,2564,5793,6395,9484,9994 247 | 2076,9484 248 | 2076,5016,5793,9484,9994 249 | 2564,5793,6395,9484,9994 250 | 2564,5016,5793,6395,9484,9994 251 | 2076,2564,5016,5793,6395,9484 252 | 5793,6338 253 | 2076,2564,5793 254 | 2076,2564,5016,5793,6395,9484,9994 255 | 2564,5793,6395,9994 256 | 2564,5016,5793,6395,9994 257 | 2076,4785,5793,6338,9484,9994 258 | 2564,6395,9994 259 | 2076,2564,5793,6395,9484,9994 260 | 2076,9484 261 | 2076,2564,5016,5793,6395,9484,9994 262 | 2564,5793,6395,9994 263 | 2076,9484 264 | 2076,5016,6395,9484,9994 265 | 2076,2564,4785,5793,6338 266 | 2564,5793,6395,9994 267 | 2076,2564,5793,6395,9484,9994 268 | 2076,4785,6338,9484 269 | 2076,2564,5016,5793,6338,6395,9484 270 | 2076,5016,9484 271 | 2076,2564,5016,5793,9484 272 | 2076,2564,5016,5793,6395,9484,9994 273 | 2076,4785,6338,9484 274 | 2076,2564,5793,9484,9994 275 | 2076,2564,5793,6395,9484,9994 276 | 2076,2564,5793,6395,9484 277 | 2076,4785,5016,5793,9484 278 | 2564,5016,5793,6395,9994 279 | 2076,5016,5793,9484 280 | 2076,5793,9484 281 | 2564,5793,6395,9994 282 | 2076,4785,5016,5793,6338 283 | 2076,2564,5016,5793,6395,9484,9994 284 | 2076,5016,5793 285 | 2076,4785,5016,9484 286 | 2076,5793,9484 287 | 2076,2564,5793,9484,9994 288 | 2564,5016,5793,9994 289 | 2564,5016,5793,6395,9994 290 | 5793,6395,9994 291 | 2076,2564,5016,5793,9484 292 | 2076,9484 293 | 2076,2564,5793,6395,9484,9994 294 | 2076,2564,5793,6395,9484,9994 295 | 4785,5793 296 | 2076,2564,5016,6395,9484 297 | 2076 298 | 2076,4785,6338,9484 299 | 2076,2564,5793,6338,9484,9994 300 | 2076,5016,5793,9484 301 | 2076,6395,9484 302 | 2076,2564,5793,9484,9994 303 | 2076,2564,5016,5793,9484 304 | 2076,5016,5793,6395,9484,9994 305 | 2076,5016,5793,9484 306 | 2076,2564,5016,5793,9484,9994 307 | 2076,2564,5016,5793,6395,9484,9994 308 | 2076,2564,5016,5793,9994 309 | 2076,2564,5016,5793,6395,9484,9994 310 | 2564,5793,6395,9994 311 | 2076,2564,5793,6395,9484 312 | 2076,9484 313 | 2076,5016,5793,9484,9994 314 | 2076,4785,9484 315 | 2076,2564,4785,5793,6338,6395,9484,9994 316 | 2564,5793,6395,9994 317 | 2076,2564,5793,6395,9994 318 | 5016,5793 319 | 2564,5016,5793,6338,6395,9994 320 | 2076,2564,5793,6395,9484,9994 321 | 2076,5016,5793,6395,9484,9994 322 | 5793 323 | 2076,9484 324 | 2076,2564,5793,6395,9484 325 | 2564,5016,5793,6395,9994 326 | 2076,5016,5793,6395,9484 327 | 2076,2564,5016,5793,6395,9484,9994 328 | 2076,2564,5793,6395,9484,9994 329 | 2076,2564,5016,5793,6395,9484,9994 330 | 2564,5793,9994 331 | 2076,5016,9484 332 | 2076,2564,5016,5793,6395,9484,9994 333 | 2076,2564,5016,5793,6395,9484,9994 334 | 2564,5016,5793,6395,9484,9994 335 | 2076,2564,5793,6395,9484,9994 336 | 2076,2564,5016,5793,6395,9484,9994 337 | 2564,5793,6395,9484,9994 338 | 2076,5793,6395,9484 339 | 2076,2564,5793,6395,9484 340 | 2564,5793,6395,9994 341 | 2076,2564,5793,6338,6395,9994 342 | 2076,2564,5016,5793,6395,9484,9994 343 | 2076,5793,6395,9484,9994 344 | 2076,2564,5016,5793,6395,9484,9994 345 | 2076,2564,5793,6395,9994 346 | 2076,4785,5016,5793,6338,9484 347 | 2076,6395,9484 348 | 2076,9484 349 | 2076,2564,5793,9484,9994 350 | 2564,5793,9994 351 | 2564,4785,5793,6338,6395,9994 352 | 2076,2564,5016,5793,6395,9484 353 | 2076,9484 354 | 2076,2564,5793,6395,9484,9994 355 | 2076,5793,9484 356 | 2564,5793 357 | 2076,2564,5793,6395,9484,9994 358 | 2076,9484 359 | 2076,5016,5793,9484 360 | 2076,2564,5016,5793,6395 361 | 2076,4785,5016,6338,9484 362 | 2076,5016,9484 363 | 2564,6395,9994 364 | 2076,2564,5016,5793,6395,9484 365 | 2564,5793,6395,9484,9994 366 | 2076,5016,5793 367 | 2564,6395,9994 368 | 2564,4785,5793,6395,9994 369 | 2076,2564,5016,5793,6395,9484,9994 370 | 9484 371 | 2076,2564,5793,6395,9484,9994 372 | 2076,2564,5016,5793,6395,9484,9994 373 | 2076,9484 374 | 2564,4785,5016,5793,6338,9994 375 | 2076,2564,5793,6395,9484,9994 376 | 2564,5793,6395 377 | 2076,2564,5793,6395,9484,9994 378 | 2076,2564,5793,6395,9484,9994 379 | 2076,2564,5793,9484 380 | 2076,2564,5793,6395,9484,9994 381 | 2076,5016,5793,9484 382 | 2076,2564,5016,9484 383 | 2564,5016,5793,6395,9994 384 | 2076,2564,5793,6395,9484,9994 385 | 2564,4785,5793,6338,6395,9994 386 | 2076,2564,5793,9484,9994 387 | 2076,5016,9484 388 | 2076,5016,5793,9484 389 | 2076,2564,5793,9484 390 | 2076,5016,9484 391 | 2076,4785,5016,5793,6338,6395,9484,9994 392 | 2076,2564,5793,6395,9484,9994 393 | 2076,2564,5016,5793,9484,9994 394 | 2076,2564,5793,9484 395 | 2076,5793,9484 396 | 2076,4785,5793,6338,9484 397 | 2564,5793,6395,9994 398 | 2076,5016,5793,6395,9484,9994 399 | 2076,2564,5793,6395,9484,9994 400 | 2076,9484 401 | 2076,2564,5793,6395,9484,9994 402 | 2076,4785,5793,6338,9484,9994 403 | 2076,6338,9484 404 | 2076,9484 405 | 5793,6395,9994 406 | 2076,5016,5793,9484 407 | 2076,2564,5016,5793,6395,9484,9994 408 | 5793 409 | 5793,6395,9994 410 | 2076,2564,5793,6395,9484,9994 411 | 2076,2564,4785,9484,9994 412 | 2564,5793,6395,9484,9994 413 | 2076,5016,6395,9484,9994 414 | 2076,5016,5793,6395,9484,9994 415 | 2076,5016,5793,9484 416 | 2076,2564,5016,5793,6395,9484,9994 417 | 2076,2564,5793,6395,9484 418 | 2076,2564,5793,6395,9484,9994 419 | 2076,2564,4785,6338,6395,9484 420 | 2076,5793,9484 421 | 2076,5016,5793,9484 422 | 2564,4785,5793,6338,6395,9994 423 | 2076,2564,5016,5793,6395,9484,9994 424 | 2076,5016,5793,9484 425 | 5793,6395,9484,9994 426 | 2076,5016,6395,9484,9994 427 | 2076,2564,5793,9484 428 | 2564,5793 429 | 2564,5793,6395,9994 430 | 2564,5793,6395,9994 431 | 2076,5016,9484 432 | 2076,4785,5793,6338,9484 433 | 2076,4785,5793,6338,6395,9484,9994 434 | 2076,5016,9484 435 | 5016 436 | 2076,9484 437 | 2076,2564,5016,5793,6395,9484,9994 438 | 2076,2564,4785,5016,5793,6338,6395,9484,9994 439 | 2076,2564,5793,6395,9484,9994 440 | 2076,9484 441 | 2076,9484 442 | 2076,2564,5793,6395,9484,9994 443 | 2076,2564,5793,6395,9484,9994 444 | 2076,2564,9484,9994 445 | 4785,5016,5793,6338,6395,9994 446 | 2076,2564,5016,5793,6395,9484,9994 447 | 2076,2564,5016,5793,6395,9484 448 | 2564,6395,9994 449 | 2076,2564,5793,6395,9484 450 | 5793,9484 451 | 2564,5793,6395,9994 452 | 6395,9994 453 | 2076,2564,5016,5793,6395,9484,9994 454 | 2564,5016,5793,9994 455 | 2076,2564,5793,6395,9484,9994 456 | 2076,5016,9484 457 | 2076,4785,5016,5793,6338,6395,9484,9994 458 | 2076,2564,4785,5016,5793,6395,9484,9994 459 | 2564,5016,5793,6395,9994 460 | 2076,5016,9484 461 | 2076,5016,9484 462 | 2076,2564,5016,5793,9484 463 | 2076,2564,5793,6395,9484,9994 464 | 2076,4785,6338,9484 465 | 2076,2564,5016,5793,6395,9484 466 | 2564,5793,6395,9484 467 | 2076,5016,5793,6395,9484 468 | 2076,5793,9484 469 | 2564,5793,6395,9484,9994 470 | 2076,4785,5793,6338,9484 471 | 2076,2564,9484 472 | 2076,2564,5793,6395,9484 473 | 2076,4785,5016,6338,9484 474 | 2076,5793,6395,9484,9994 475 | 2564,5016,5793,6395,9994 476 | 2076,2564,5016,6395,9484,9994 477 | 2076,9484 478 | 2564,5793,6395,9994 479 | 2076,5016,5793,6395,9484,9994 480 | 2076,2564,5793,6395,9484,9994 481 | 2076,2564,5016,5793,6395,9484,9994 482 | 2076,2564,5793,6395,9484,9994 483 | 2076,2564,5793,6395,9484 484 | 2564,5016,5793,6395 485 | 2076,5793,6395,9484,9994 486 | 2076,9484 487 | 5793,6395,9994 488 | 2076,2564,5793,6395,9484,9994 489 | 2076,2564,5016,5793,6395,9484,9994 490 | 2564,4785,5793,6395,9994 491 | 2564,5016,5793,6395,9994 492 | 2076,2564,5793,9484 493 | 2076,4785,6338,9484 494 | 2076,2564,5793,6395,9484,9994 495 | 2076,2564,5793,6395,9484,9994 496 | 2076,2564,6395,9484,9994 497 | 2076,9484 498 | 2076,9484 499 | 2564,5793,6395,9994 500 | 2564,5793,6395 501 | 2076,2564,9484 502 | 2564,5793,9994 -------------------------------------------------------------------------------- /dataset/tesco.csv: -------------------------------------------------------------------------------- 1 | a,c,d,f,g,i,m,p 2 | a,b,c,f,i,m,o 3 | b,f,h,j,o 4 | b,c,k,s,p 5 | a,c,e,f,l,m,n,p -------------------------------------------------------------------------------- /dataset/tesco2.csv: -------------------------------------------------------------------------------- 1 | apple,beer,rice,chicken 2 | apple,beer,rice 3 | apple,beer 4 | apple,mango 5 | milk,beer,rice,chicken 6 | milk,beer,rice 7 | milk,beer 8 | milk,mango -------------------------------------------------------------------------------- /fpgrowth.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chonyy/fpgrowth_py/2457a5301486c920eb9277b37ae79a3866fe0bcc/fpgrowth.PNG -------------------------------------------------------------------------------- /fpgrowth_py/__init__.py: -------------------------------------------------------------------------------- 1 | from fpgrowth_py.fpgrowth import * 2 | from fpgrowth_py.utils import * -------------------------------------------------------------------------------- /fpgrowth_py/fpgrowth.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict, OrderedDict 2 | from csv import reader 3 | from itertools import chain, combinations 4 | from optparse import OptionParser 5 | from fpgrowth_py.utils import * 6 | 7 | def fpgrowth(itemSetList, minSupRatio, minConf): 8 | frequency = getFrequencyFromList(itemSetList) 9 | minSup = len(itemSetList) * minSupRatio 10 | fpTree, headerTable = constructTree(itemSetList, frequency, minSup) 11 | if(fpTree == None): 12 | print('No frequent item set') 13 | else: 14 | freqItems = [] 15 | mineTree(headerTable, minSup, set(), freqItems) 16 | rules = associationRule(freqItems, itemSetList, minConf) 17 | return freqItems, rules 18 | 19 | def fpgrowthFromFile(fname, minSupRatio, minConf): 20 | itemSetList, frequency = getFromFile(fname) 21 | minSup = len(itemSetList) * minSupRatio 22 | fpTree, headerTable = constructTree(itemSetList, frequency, minSup) 23 | if(fpTree == None): 24 | print('No frequent item set') 25 | else: 26 | freqItems = [] 27 | mineTree(headerTable, minSup, set(), freqItems) 28 | rules = associationRule(freqItems, itemSetList, minConf) 29 | return freqItems, rules 30 | 31 | if __name__ == "__main__": 32 | optparser = OptionParser() 33 | optparser.add_option('-f', '--inputFile', 34 | dest='inputFile', 35 | help='CSV filename', 36 | default=None) 37 | optparser.add_option('-s', '--minSupport', 38 | dest='minSup', 39 | help='Min support (float)', 40 | default=0.5, 41 | type='float') 42 | optparser.add_option('-c', '--minConfidence', 43 | dest='minConf', 44 | help='Min confidence (float)', 45 | default=0.5, 46 | type='float') 47 | 48 | (options, args) = optparser.parse_args() 49 | 50 | freqItemSet, rules = fpgrowthFromFile( 51 | options.inputFile, options.minSup, options.minConf) 52 | 53 | print(freqItemSet) 54 | print(rules) 55 | -------------------------------------------------------------------------------- /fpgrowth_py/utils.py: -------------------------------------------------------------------------------- 1 | from csv import reader 2 | from collections import defaultdict 3 | from itertools import chain, combinations 4 | 5 | class Node: 6 | def __init__(self, itemName, frequency, parentNode): 7 | self.itemName = itemName 8 | self.count = frequency 9 | self.parent = parentNode 10 | self.children = {} 11 | self.next = None 12 | 13 | def increment(self, frequency): 14 | self.count += frequency 15 | 16 | def display(self, ind=1): 17 | print(' ' * ind, self.itemName, ' ', self.count) 18 | for child in list(self.children.values()): 19 | child.display(ind+1) 20 | 21 | def getFromFile(fname): 22 | itemSetList = [] 23 | frequency = [] 24 | 25 | with open(fname, 'r') as file: 26 | csv_reader = reader(file) 27 | for line in csv_reader: 28 | line = list(filter(None, line)) 29 | itemSetList.append(line) 30 | frequency.append(1) 31 | 32 | return itemSetList, frequency 33 | 34 | def constructTree(itemSetList, frequency, minSup): 35 | headerTable = defaultdict(int) 36 | # Counting frequency and create header table 37 | for idx, itemSet in enumerate(itemSetList): 38 | for item in itemSet: 39 | headerTable[item] += frequency[idx] 40 | 41 | # Deleting items below minSup 42 | headerTable = dict((item, sup) for item, sup in headerTable.items() if sup >= minSup) 43 | if(len(headerTable) == 0): 44 | return None, None 45 | 46 | # HeaderTable column [Item: [frequency, headNode]] 47 | for item in headerTable: 48 | headerTable[item] = [headerTable[item], None] 49 | 50 | # Init Null head node 51 | fpTree = Node('Null', 1, None) 52 | # Update FP tree for each cleaned and sorted itemSet 53 | for idx, itemSet in enumerate(itemSetList): 54 | itemSet = [item for item in itemSet if item in headerTable] 55 | itemSet.sort(key=lambda item: headerTable[item][0], reverse=True) 56 | # Traverse from root to leaf, update tree with given item 57 | currentNode = fpTree 58 | for item in itemSet: 59 | currentNode = updateTree(item, currentNode, headerTable, frequency[idx]) 60 | 61 | return fpTree, headerTable 62 | 63 | def updateHeaderTable(item, targetNode, headerTable): 64 | if(headerTable[item][1] == None): 65 | headerTable[item][1] = targetNode 66 | else: 67 | currentNode = headerTable[item][1] 68 | # Traverse to the last node then link it to the target 69 | while currentNode.next != None: 70 | currentNode = currentNode.next 71 | currentNode.next = targetNode 72 | 73 | def updateTree(item, treeNode, headerTable, frequency): 74 | if item in treeNode.children: 75 | # If the item already exists, increment the count 76 | treeNode.children[item].increment(frequency) 77 | else: 78 | # Create a new branch 79 | newItemNode = Node(item, frequency, treeNode) 80 | treeNode.children[item] = newItemNode 81 | # Link the new branch to header table 82 | updateHeaderTable(item, newItemNode, headerTable) 83 | 84 | return treeNode.children[item] 85 | 86 | def ascendFPtree(node, prefixPath): 87 | if node.parent != None: 88 | prefixPath.append(node.itemName) 89 | ascendFPtree(node.parent, prefixPath) 90 | 91 | def findPrefixPath(basePat, headerTable): 92 | # First node in linked list 93 | treeNode = headerTable[basePat][1] 94 | condPats = [] 95 | frequency = [] 96 | while treeNode != None: 97 | prefixPath = [] 98 | # From leaf node all the way to root 99 | ascendFPtree(treeNode, prefixPath) 100 | if len(prefixPath) > 1: 101 | # Storing the prefix path and it's corresponding count 102 | condPats.append(prefixPath[1:]) 103 | frequency.append(treeNode.count) 104 | 105 | # Go to next node 106 | treeNode = treeNode.next 107 | return condPats, frequency 108 | 109 | def mineTree(headerTable, minSup, preFix, freqItemList): 110 | # Sort the items with frequency and create a list 111 | sortedItemList = [item[0] for item in sorted(list(headerTable.items()), key=lambda p:p[1][0])] 112 | # Start with the lowest frequency 113 | for item in sortedItemList: 114 | # Pattern growth is achieved by the concatenation of suffix pattern with frequent patterns generated from conditional FP-tree 115 | newFreqSet = preFix.copy() 116 | newFreqSet.add(item) 117 | freqItemList.append(newFreqSet) 118 | # Find all prefix path, constrcut conditional pattern base 119 | conditionalPattBase, frequency = findPrefixPath(item, headerTable) 120 | # Construct conditonal FP Tree with conditional pattern base 121 | conditionalTree, newHeaderTable = constructTree(conditionalPattBase, frequency, minSup) 122 | if newHeaderTable != None: 123 | # Mining recursively on the tree 124 | mineTree(newHeaderTable, minSup, 125 | newFreqSet, freqItemList) 126 | 127 | def powerset(s): 128 | return chain.from_iterable(combinations(s, r) for r in range(1, len(s))) 129 | 130 | def getSupport(testSet, itemSetList): 131 | count = 0 132 | for itemSet in itemSetList: 133 | if(set(testSet).issubset(itemSet)): 134 | count += 1 135 | return count 136 | 137 | def associationRule(freqItemSet, itemSetList, minConf): 138 | rules = [] 139 | for itemSet in freqItemSet: 140 | subsets = powerset(itemSet) 141 | itemSetSup = getSupport(itemSet, itemSetList) 142 | for s in subsets: 143 | confidence = float(itemSetSup / getSupport(s, itemSetList)) 144 | if(confidence > minConf): 145 | rules.append([set(s), set(itemSet.difference(s)), confidence]) 146 | return rules 147 | 148 | def getFrequencyFromList(itemSetList): 149 | frequency = [1 for i in range(len(itemSetList))] 150 | return frequency -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="fpgrowth_py", 8 | version="1.0.0", 9 | author="Chonyy", 10 | author_email="tcheon8788@gmail.com", 11 | description="Python implementation of FP Growth algorithm", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/chonyy/fpgrowth_py", 15 | packages=setuptools.find_packages(), 16 | classifiers=[ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: OS Independent", 20 | ], 21 | python_requires='>=3.6', 22 | ) 23 | --------------------------------------------------------------------------------