├── LICENSE ├── README.md ├── fp_tree.py └── input.txt /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Ray Pan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pyFP-Tree 2 | ========= 3 | 4 | Frequent pattern tree algorithm using Python

5 | Alogroithm from Han J, Pei J, Yin Y. Mining frequent patterns without candidate generation[C]//ACM SIGMOD Record. ACM, 2000, 29(2): 1-12. (https://www.cs.sfu.ca/~jpei/publications/sigmod00.pdf) 6 | 7 | 8 | 用python运行程序后,会要求输入minimum support threshold,输入后程序会先后给出:a. 每个商品被购买的次数;b. 经过minimum support threshold筛选并排序后的商品顺序;c.每次交易的(ordered) frequent items; d.建树的过程; e. 最后生成的树;f.要求输入两个商品代号; g. 程序给出同时购买过这两个商品的总人数 9 | 10 | 11 | LICENSE: MIT License 12 | -------------------------------------------------------------------------------- /fp_tree.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import pdb 3 | import string 4 | 5 | class Node:#tree sturcture 6 | def __init__(self): 7 | self.num_children=0 8 | self.children=[] 9 | self.parent=None 10 | self.times=0 11 | self.item='' 12 | 13 | def stats(filename):#calculate purchased times of every items 14 | for line in filename: 15 | for element in line: 16 | for character in string.lowercase: 17 | if(element==character): 18 | temp=ord(character)-97 19 | sample_list[temp]=sample_list[temp]+1 20 | return sample_list 21 | 22 | def swapping(sample_list,minimum_support):#sort and delete those elements who did not meet the minimum_support 23 | i=0 24 | j=0 25 | new_list=['']*26 26 | for i in range(0,26): 27 | temp=0 28 | temp2=0 29 | for j in range(0,26): 30 | if(temp=minimum_support): 34 | new_list[i]=chr(temp2+97) 35 | sample_list[temp2]=0 36 | for n in range(0,26): 37 | if(new_list[-1]==''): 38 | new_list.remove('') 39 | return new_list 40 | 41 | def ordered_frequent_items(filename, listname):#find the frequent items of users 42 | file=open('input.txt') 43 | num_of_line=len(file.readlines())#calculate how many lines in file 44 | print "num of lines : ", num_of_line 45 | OFI_list = [[] for i in range(num_of_line)]#avoid shallow copy 46 | n=0 47 | file=open('input.txt') 48 | for line in file: 49 | print line 50 | for character in listname: 51 | for element in line: 52 | if(character==element): 53 | OFI_list[n].append(character) 54 | print OFI_list[n] 55 | n=n+1 56 | return OFI_list 57 | 58 | def BuildTree(node,list,element): #build a fp-tree 59 | if(len(list)==0): 60 | return 61 | for child in node.children: 62 | if(child.item==element): 63 | child.times+=1 64 | del list[0] 65 | if(len(list)==0): 66 | return 67 | BuildTree(child,list,list[0]) 68 | if(len(list)==0): 69 | return 70 | break 71 | new_node=Node() 72 | node.num_children+=1 73 | new_node.item=element 74 | new_node.times+=1 75 | new_node.parent=node 76 | node.children.append(new_node) 77 | del list[0] 78 | if(len(list)==0): 79 | return 80 | BuildTree(new_node,list,list[0]) 81 | 82 | def Query(tree):#main function of finding common buyers 83 | temp=raw_input("Please input two items: ") 84 | a=temp[0] 85 | b=temp[1] 86 | for element in sample_list:#swap the sequence of user's input, make it fit the OFI sequence 87 | if(element==a): 88 | break 89 | elif(element==b): 90 | temp=a 91 | a=b 92 | b=temp 93 | break 94 | result=0 95 | A_list=[] 96 | B_list=[] 97 | FindA(tree,a,A_list) 98 | for node in A_list: 99 | FindB(node,b,B_list) 100 | for node in B_list: 101 | result+=node.times 102 | return result 103 | 104 | def FindA(node,a,list): 105 | if(len(node.children)==0): 106 | return 107 | if(node.item==a): 108 | list.append(node) 109 | else: 110 | for child in node.children: 111 | FindA(child,a,list) 112 | 113 | def FindB(node,b,list): 114 | if(node.item==b): 115 | list.append(node) 116 | else: 117 | for child in node.children: 118 | FindB(child,b,list) 119 | if(len(node.children)==0): 120 | return 121 | 122 | def Draw_tree(node,prefix,isTail):#Draw a tree 123 | if(isTail): 124 | temp=prefix+"└── "+node.item 125 | else: 126 | temp=prefix+"├── "+node.item 127 | print temp 128 | for n in range(0,len(node.children)-1): 129 | if(isTail): 130 | Draw_tree(node.children[n],prefix+" ",False) 131 | else: 132 | Draw_tree(node.children[n],prefix+"│ ",False) 133 | if(len(node.children)>=1): 134 | if(isTail): 135 | prefix=prefix+" " 136 | else: 137 | prefix=prefix+"│ " 138 | Draw_tree(node.children[-1],prefix,True) 139 | 140 | file=open('input.txt') 141 | sample_list=[0]*26 142 | temp=0 143 | minimum_support=input("Please input a minimum support: ") 144 | sample_list=stats(file) 145 | print "original sequence is :", 146 | print sample_list 147 | 148 | sample_list=swapping(sample_list,minimum_support) 149 | print "after sorting (which meet the minimum_support):", 150 | print sample_list 151 | 152 | OFI_list=ordered_frequent_items(file, sample_list) 153 | i=len(OFI_list) 154 | root=Node() 155 | print "Tree Building...... Wait please...." 156 | for n in OFI_list: 157 | BuildTree(root,n,n[0]) 158 | print OFI_list 159 | print "Your tree has been succefully built, shown as follow: " 160 | Draw_tree(root,"",True) 161 | result=Query(root) 162 | print result 163 | 164 | 165 | -------------------------------------------------------------------------------- /input.txt: -------------------------------------------------------------------------------- 1 | facdgimp 2 | abcflmo 3 | bfhjo 4 | bcksp 5 | afcelpmn --------------------------------------------------------------------------------