0
点赞
收藏
分享

微信扫一扫

【慕课网】如何统计序列中元素的出现频度


from random import randint
import requests
from collections import Counter
from lxml import etree
import re

'如何统计序列中元素的出现频度'


# 从随机字符串中 找到次数最高的三个元素

def main():
data = [randint(0, 20) for _ in range(30)]
print(data)
c = dict.fromkeys(data, 0)
c2 = Counter(data)
for x in data:
c[x] += 1
print("排序方案1:%s" % c)
print("排序方案2:%s出现的次数:%s" % (1, c2[1]))
print("频数最高的3个元素%s" % c2.most_common(3))

pass


# 对文本进行词频统计
def main2():
# 可以复制直接用这个
# txt=open('/test05.py').read()
# 或者比较复杂 去读取网上的文本
txt = requests.get("http://www.en8848.com.cn/article/life/parenting/71258.html").text
text = etree.HTML(txt)
a = text.xpath('//*[@id="articlebody"]/p')
allText = " "
for x in a:
if x.text is not None:
allText = allText + x.text + "\n "

allText2=re.split('\W+',allText)
c3=Counter(allText2)
print(c3)
print(c3.most_common(3))
pass


main2()


举报

相关推荐

0 条评论