将统计好的中文词、语义、词性依次合并到txt中
def count_char_merge(word_list, semantics_list, pos_list, wps_list):
'''将统计好的中文词、语义、词性依次合并到txt中'''
shuffle_char_list = []
text_list = []
with open(word_list, "r+", encoding="utf-8", errors='ignore') as f1:
t1_ids = []
lines = f1.readlines()
for line in lines:
text_list.append(line.strip())
t1_id = lines.index(line)
t1_ids.append(t1_id)
with open(semantics_list, "r+", encoding="utf-8", errors='ignore') as f2:
t2_ids = []
lines = f2.readlines()
for line in lines:
text_list.append(line.strip())
t2_id = lines.index(line) + int(t1_id) + 1
t2_ids.append(t2_id)
with open(pos_list, "r+", encoding="utf-8", errors='ignore') as f3:
t3_ids = []
lines = f3.readlines()
for line in lines:
text_list.append(line.strip())
t3_id = lines.index(line) + int(t2_id) + 1
t3_ids.append(t3_id)
ids = t1_ids + t2_ids + t3_ids
for i_d in ids:
shuffle_char_list.append(text_list[int(i_d)])
shuffle_char = '\n'.join(shuffle_char_list)
with open(wps_list, "a", encoding="utf-8", errors='ignore') as f4:
f4.write(shuffle_char)
f4.close()
if __name__ == '__main__':
i = '使'
list1 = './word_count_word/{}.txt'.format(i)
list2 = './word_count_semantics/{}.txt'.format(i)
list3 = './word_count_pos/{}.txt'.format(i)
list4 = './word_count_wps/{}.txt'.format(i)
count_char_merge(list1, list2, list3, list4)
运行结果:
合并后的中文词、语义、词性: