# coding=utf-8
class HeaderTable:
def __init__(self):
pass
def create(self, dat, minsup):
headerTable = {}
setDat = set(dat)
for key in setDat:
headerTable[key] = dat.count(key)
removelist = []
for k,v in headerTable.items():
if v < minsup:
removelist.append(k)
for k in removelist:
del headerTable[k]
headerTable = sorted(headerTable.items(),key=lambda i:i[1],reverse=True)
print(headerTable)
return headerTable
class FPTreeItem:
def __init__(self, key, name, numOccur, parent):
self.key = key # key
self.name = nameself.numOccur = numOccur #? self.parent = parent # ? self.supprate = 0
self.confrate = 0
class BuildTree:
# inDat: (list) [[],[]]
def refactor(self, inDat, headerTable):
lineCounter = 0
datLine =[]
dat = []
for list in inDat:
lineCounter += 1
for i in headerTable:
if i[0] in list:
datLine.append(i[0])
dat.append(datLine)
datLine = []
return dat
# updDat: (list) [[],[]]
def update(self, updDat):
fpTree = []
for line in updDat:
parent = []
keyLink = []
for item in line:
parent = tuple(keyLink)
keyLink += [item]
for fpTreeItem in fpTree:
if tuple(keyLink) == tuple(fpTreeItem.key):
fpTreeItem.numOccur += 1
break
fpTreeItem = FPTreeItem(tuple(keyLink), item, 1, tuple(parent))
fpTree.append(fpTreeItem)
return fpTree
oneDimList = []
def loadSimpDat():
simpDat = [['t1', 't2', 't3','t4','t5','t6','t7','t8','t9','t10','t11','t12','t13','t14','t15','tr1','tr2','tr3','tr4','tr5','tr6','tr7'],
['t1', 't2', 't8','t9','t10','t11','t12','t13','t14','t15','tr1','tr2','tr6','tr7'],
['t1', 't2', 't3','t4','t5','t6','t7','t8','t9','t10','t11','t12','tr1','tr2','tr6'],
['t1', 't2', 't3','t4','t5','t11','t12','t13','t14','t15','tr1','tr6','tr7'],
['t1', 't2', 't3','t4','t5','t6','t7','t14','t15','tr1','tr7'],
['t1', 't14','t15','tr1','tr2','tr6','tr7'],
['t1', 't2', 't3','t13','t14','t15','tr1','tr2','tr3','tr7']
]
# simpDat = [
# ['a','b','a','c','d'],
# ['b','b','c','c','c','a','a'],
# ]
return simpDat
# simpDat = loadSimpDat()
for line in simpDat:
oneDimList += line
#eader table
headerTable = HeaderTable().create(oneDimList, 1)
# tree
buildTree = BuildTree()
updDat = buildTree.refactor(simpDat, headerTable)
fpTree = buildTree.update(updDat)
countmap = {}
for fpTreeItem in fpTree:
countmap[tuple(list(fpTreeItem.parent)+[fpTreeItem.name])] = fpTreeItem.numOccur
for fpTreeItem in fpTree:
fpTreeItem.supprate = 1.0*fpTreeItem.numOccur / len(simpDat)
if len(fpTreeItem.parent)==0:
pass
else:
parentcount = countmap[tuple(fpTreeItem.parent)]
fpTreeItem.confrate = 1.0*fpTreeItem.numOccur/parentcount
print('?)
for fpTreeItem in fpTree:
if fpTreeItem.confrate<0.8: continue
print('%s\t%s\t%.4f\t%.4f'%(','.join(fpTreeItem.parent),fpTreeItem.name,fpTreeItem.supprate,fpTreeItem.confrate))