0
点赞
收藏
分享

微信扫一扫

Python爬虫(二)——豆瓣图书决策树构建


Matplotlib绘制决策树代码:

1 # coding=utf-8
2 import matplotlib.pyplot as plt
3
4 decisionNode = dict(boxstyle='sawtooth', fc='10')
5 leafNode = dict(boxstyle='round4',fc='0.8')
6 arrow_args = dict(arrowstyle='<-')
7
8
9
10 def plotNode(nodeTxt, centerPt, parentPt, nodeType):
11 createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction',\
12 xytext=centerPt,textcoords='axes fraction',\
13 va='center', ha='center',bbox=nodeType,arrowprops\
14 =arrow_args)
15
16
17 def getNumLeafs(myTree):
18 numLeafs = 0
19 firstStr = list(myTree.keys())[0]
20 secondDict = myTree[firstStr]
21 for key in secondDict:
22 if(type(secondDict[key]).__name__ == 'dict'):
23 numLeafs += getNumLeafs(secondDict[key])
24 else:
25 numLeafs += 1
26 return numLeafs
27
28 def getTreeDepth(myTree):
29 maxDepth = 0
30 firstStr = list(myTree.keys())[0]
31 secondDict = myTree[firstStr]
32 for key in secondDict:
33 if(type(secondDict[key]).__name__ == 'dict'):
34 thisDepth = 1+getTreeDepth((secondDict[key]))
35 else:
36 thisDepth = 1
37 if thisDepth > maxDepth: maxDepth = thisDepth
38 return maxDepth
39
40 def retrieveTree(i):
41 #预先设置树的信息
42 listOfTree = [{'no surfacing':{0:'no',1:{'flipper':{0:'no',1:'yes'}}}},
43 {'no surfacing':{0:'no',1:{'flipper':{0:{'head':{0:'no',1:'yes'}},1:'no'}}}},
44 {'Comment score greater than 8.0':{0:{'Comment score greater than 9.5':{0:'Yes',1:{'More than 45,000 people commented': {
45 0: 'Yes',1: 'No'}}}},1:'No'}}]
46 return listOfTree[i]
47
48 def createPlot(inTree):
49 fig = plt.figure(1,facecolor='white')
50 fig.clf()
51 axprops = dict(xticks = [], yticks=[])
52 createPlot.ax1 = plt.subplot(111,frameon = False,**axprops)
53 plotTree.totalW = float(getNumLeafs(inTree))
54 plotTree.totalD = float(getTreeDepth(inTree))
55 plotTree.xOff = -0.5/plotTree.totalW;plotTree.yOff = 1.0
56 plotTree(inTree,(0.5,1.0), '')
57 plt.title('Douban reading Decision Tree\n')
58 plt.show()
59
60 def plotMidText(cntrPt, parentPt,txtString):
61 xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]
62 yMid = (parentPt[1] - cntrPt[1])/2.0 + cntrPt[1]
63 createPlot.ax1.text(xMid, yMid, txtString)
64
65 def plotTree(myTree, parentPt, nodeTxt):
66 numLeafs = getNumLeafs(myTree)
67 depth = getTreeDepth(myTree)
68 firstStr = list(myTree.keys())[0]
69 cntrPt = (plotTree.xOff+(1.0+float(numLeafs))/2.0/plotTree.totalW,\
70 plotTree.yOff)
71 plotMidText(cntrPt,parentPt,nodeTxt)
72 plotNode(firstStr,cntrPt,parentPt,decisionNode)
73 secondDict = myTree[firstStr]
74 plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
75 for key in secondDict:
76 if type(secondDict[key]).__name__ == 'dict':
77 plotTree(secondDict[key],cntrPt,str(key))
78 else:
79 plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
80 plotNode(secondDict[key],(plotTree.xOff,plotTree.yOff),\
81 cntrPt,leafNode)
82 plotMidText((plotTree.xOff,plotTree.yOff),cntrPt,str(key))
83 plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD
84
85 if __name__ == '__main__':
86 myTree = retrieveTree(2)
87

 

运行结果:

 

Python爬虫(二)——豆瓣图书决策树构建_python

 

作者: AntzUhl

公众号

Python爬虫(二)——豆瓣图书决策树构建_爬虫_02

举报

相关推荐

0 条评论