#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2021/10/28
# @Author : 80006339
# @description :
# @File : 提取关键词.py
# @Software: PyCharm+
# 提取关键词
import pandas as pd
import jieba.posseg as pseg
from datetime import datetime
path=r'F:/'
file='xxxxxx.csv'
chunks = pd.read_csv(path+file,sep='\t',chunksize=100000,encoding='utf-8',names=['comp','cate3','type'])
count,df_rslt=0,pd.DataFrame()
for df in chunks:
count=count+1
print('开始',count*100000,datetime.now())
df=df[df['cate3'].notnull()&(df['cate3']!='null')]
print(df)
segments=[]
for index, row in df.iterrows():
words = pseg.cut(row['comp'])
# print(words)
for t in words:
segments.append({'word': t.word, 'flag': t.flag, 'cate':