├── LICENSE
├── README.md
├── fp_tree.py
└── input.txt
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Ray Pan
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | pyFP-Tree
2 | =========
3 |
4 | Frequent pattern tree algorithm using Python
5 | Alogroithm from Han J, Pei J, Yin Y. Mining frequent patterns without candidate generation[C]//ACM SIGMOD Record. ACM, 2000, 29(2): 1-12. (https://www.cs.sfu.ca/~jpei/publications/sigmod00.pdf)
6 |
7 |
8 | 用python运行程序后,会要求输入minimum support threshold,输入后程序会先后给出:a. 每个商品被购买的次数;b. 经过minimum support threshold筛选并排序后的商品顺序;c.每次交易的(ordered) frequent items; d.建树的过程; e. 最后生成的树;f.要求输入两个商品代号; g. 程序给出同时购买过这两个商品的总人数
9 |
10 |
11 | LICENSE: MIT License
12 |
--------------------------------------------------------------------------------
/fp_tree.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import pdb
3 | import string
4 |
5 | class Node:#tree sturcture
6 | def __init__(self):
7 | self.num_children=0
8 | self.children=[]
9 | self.parent=None
10 | self.times=0
11 | self.item=''
12 |
13 | def stats(filename):#calculate purchased times of every items
14 | for line in filename:
15 | for element in line:
16 | for character in string.lowercase:
17 | if(element==character):
18 | temp=ord(character)-97
19 | sample_list[temp]=sample_list[temp]+1
20 | return sample_list
21 |
22 | def swapping(sample_list,minimum_support):#sort and delete those elements who did not meet the minimum_support
23 | i=0
24 | j=0
25 | new_list=['']*26
26 | for i in range(0,26):
27 | temp=0
28 | temp2=0
29 | for j in range(0,26):
30 | if(temp=minimum_support):
34 | new_list[i]=chr(temp2+97)
35 | sample_list[temp2]=0
36 | for n in range(0,26):
37 | if(new_list[-1]==''):
38 | new_list.remove('')
39 | return new_list
40 |
41 | def ordered_frequent_items(filename, listname):#find the frequent items of users
42 | file=open('input.txt')
43 | num_of_line=len(file.readlines())#calculate how many lines in file
44 | print "num of lines : ", num_of_line
45 | OFI_list = [[] for i in range(num_of_line)]#avoid shallow copy
46 | n=0
47 | file=open('input.txt')
48 | for line in file:
49 | print line
50 | for character in listname:
51 | for element in line:
52 | if(character==element):
53 | OFI_list[n].append(character)
54 | print OFI_list[n]
55 | n=n+1
56 | return OFI_list
57 |
58 | def BuildTree(node,list,element): #build a fp-tree
59 | if(len(list)==0):
60 | return
61 | for child in node.children:
62 | if(child.item==element):
63 | child.times+=1
64 | del list[0]
65 | if(len(list)==0):
66 | return
67 | BuildTree(child,list,list[0])
68 | if(len(list)==0):
69 | return
70 | break
71 | new_node=Node()
72 | node.num_children+=1
73 | new_node.item=element
74 | new_node.times+=1
75 | new_node.parent=node
76 | node.children.append(new_node)
77 | del list[0]
78 | if(len(list)==0):
79 | return
80 | BuildTree(new_node,list,list[0])
81 |
82 | def Query(tree):#main function of finding common buyers
83 | temp=raw_input("Please input two items: ")
84 | a=temp[0]
85 | b=temp[1]
86 | for element in sample_list:#swap the sequence of user's input, make it fit the OFI sequence
87 | if(element==a):
88 | break
89 | elif(element==b):
90 | temp=a
91 | a=b
92 | b=temp
93 | break
94 | result=0
95 | A_list=[]
96 | B_list=[]
97 | FindA(tree,a,A_list)
98 | for node in A_list:
99 | FindB(node,b,B_list)
100 | for node in B_list:
101 | result+=node.times
102 | return result
103 |
104 | def FindA(node,a,list):
105 | if(len(node.children)==0):
106 | return
107 | if(node.item==a):
108 | list.append(node)
109 | else:
110 | for child in node.children:
111 | FindA(child,a,list)
112 |
113 | def FindB(node,b,list):
114 | if(node.item==b):
115 | list.append(node)
116 | else:
117 | for child in node.children:
118 | FindB(child,b,list)
119 | if(len(node.children)==0):
120 | return
121 |
122 | def Draw_tree(node,prefix,isTail):#Draw a tree
123 | if(isTail):
124 | temp=prefix+"└── "+node.item
125 | else:
126 | temp=prefix+"├── "+node.item
127 | print temp
128 | for n in range(0,len(node.children)-1):
129 | if(isTail):
130 | Draw_tree(node.children[n],prefix+" ",False)
131 | else:
132 | Draw_tree(node.children[n],prefix+"│ ",False)
133 | if(len(node.children)>=1):
134 | if(isTail):
135 | prefix=prefix+" "
136 | else:
137 | prefix=prefix+"│ "
138 | Draw_tree(node.children[-1],prefix,True)
139 |
140 | file=open('input.txt')
141 | sample_list=[0]*26
142 | temp=0
143 | minimum_support=input("Please input a minimum support: ")
144 | sample_list=stats(file)
145 | print "original sequence is :",
146 | print sample_list
147 |
148 | sample_list=swapping(sample_list,minimum_support)
149 | print "after sorting (which meet the minimum_support):",
150 | print sample_list
151 |
152 | OFI_list=ordered_frequent_items(file, sample_list)
153 | i=len(OFI_list)
154 | root=Node()
155 | print "Tree Building...... Wait please...."
156 | for n in OFI_list:
157 | BuildTree(root,n,n[0])
158 | print OFI_list
159 | print "Your tree has been succefully built, shown as follow: "
160 | Draw_tree(root,"",True)
161 | result=Query(root)
162 | print result
163 |
164 |
165 |
--------------------------------------------------------------------------------
/input.txt:
--------------------------------------------------------------------------------
1 | facdgimp
2 | abcflmo
3 | bfhjo
4 | bcksp
5 | afcelpmn
--------------------------------------------------------------------------------