为每个中文句子添加语义类别,标签为数字形式,从1开始编号
import os
def add_type_label(from_list, to_list):
'''为每个中文句子添加语义类别,标签为数字形式,从1开始编号'''
From_Path = from_list
To_Path = to_list
label = 1
for file_path in os.listdir(From_Path):
Path1 = From_Path + '/%s'
Path2 = To_Path + '/%s.txt'
with open(Path1 % file_path, "r+", encoding="utf-8", errors='ignore') as P1:
lines = P1.readlines()
for line in lines:
line = line.replace('\n', '')
with open(Path2 % file_path[:file_path.find('.txt')], "a+", encoding="utf-8", errors='ignore') as P2:
P2.write(line + str(label) + '\n')
label = label + 1
if __name__ == '__main__':
i = '使'
list1 = './word/{}'.format(i)
list2 = './word_label/{}'.format(i)
add_type_label(list1, list2)
运行结果:
中文句子数据:
处理后的结果: