【慕课网】如何统计序列中元素的出现频度-CFANZ编程社区

【慕课网】如何统计序列中元素的出现频度

from random import randint
import requests
from collections import Counter
from lxml import etree
import re

'如何统计序列中元素的出现频度'


# 从随机字符串中 找到次数最高的三个元素

def main():
    data = [randint(0, 20) for _ in range(30)]
    print(data)
    c = dict.fromkeys(data, 0)
    c2 = Counter(data)
    for x in data:
        c[x] += 1
    print("排序方案1：%s" % c)
    print("排序方案2：%s出现的次数：%s" % (1, c2[1]))
    print("频数最高的3个元素%s" % c2.most_common(3))

    pass


# 对文本进行词频统计
def main2():
    # 可以复制直接用这个
    # txt=open('/test05.py').read()
    # 或者比较复杂 去读取网上的文本
    txt = requests.get("http://www.en8848.com.cn/article/life/parenting/71258.html").text
    text = etree.HTML(txt)
    a = text.xpath('//*[@id="articlebody"]/p')
    allText = "  "
    for x in a:
        if x.text is not None:
            allText = allText + x.text + "\n  "

    allText2=re.split('\W+',allText)
    c3=Counter(allText2)
    print(c3)
    print(c3.most_common(3))
    pass


main2()

0 条评论