现有固定词换取,手工分词很麻烦
后面还需要爬取句子并分词
jieba分词数量不好控制,哎
import random import sys two_chars_words = ["朱砂","天下","杀伐","人家","韶华","风华","繁华","血染","墨染","白衣", "素衣","嫁衣","倾城","孤城","空城","旧城","旧人","伊人","心疼","春风", "古琴","无情","迷离","奈何","断弦","焚尽","散乱","陌路","乱世","笑靥", "浅笑","明眸","轻叹","烟火","一生","三生","浮生","桃花","梨花","落花", "烟花","离殇","情殇","爱殇","剑殇","灼伤","仓皇","匆忙","陌上","清商", "焚香","墨香","微凉","断肠","痴狂","凄凉","黄梁","未央","成双","无恙", "虚妄","凝霜","洛阳","长安","江南","忘川","千年","纸伞","烟雨","回眸", "公子","红尘","红颜","红衣","红豆","红线","青丝","青史","青冢","白发", "白首","白骨","黄土","黄泉","碧落","紫陌","如花","相思","沧笙","亘古", "殷红","揉碎","年少","烟火","泪痕","红颜","踏歌","梦醒","美人","戏子"] four_chars_words = ["情深缘浅","情深不寿","莫失莫忘","阴阳相隔","如花美眷","似水流年","眉目如画", "曲终人散","繁华落尽","不诉离殇","一世长安","百媚千红","情之所钟","阴晴圆缺", "悲欢离合","此去经年","天涯海角","相濡以沫","静水流深","悠悠琴声","秋锁寒烟"] sentence_model = ["xx,xx,xx了xx。", "xxxx,xxxx,不过是一场xxxx。", "你说xxxx,我说xxxx,最后不过xxxx。", "xx,xx,许我一场xxxx。", "一x一x一xx,半x半x半xx。", "你说xxxxxxxx,后来xxxxxxxx。", "xxxx,xxxx,终不敌xxxx。", "为君沦xx,乱舞在xx。", "用我xxxx,换你xxxx。", "我自是xx,xxxx。", "终是谁使xx,xxxx,xxxx。", "多少xx,多少xx,唯留xxxx。", "任他xxxx,为你一笑xxxx。", "谁将xxxx,散了xxxx。", "xxxx,xxxx。三生xxxx,一朝xxxx。", "风华是xxxx,苍老是xxxx。", "几段唏嘘xxxx,可笑我命xxxx。", "xx,xx,我终于看见xxxx,那是xxxx。", "谁应了谁的xx,又变成了谁的xx。"] def get_sentence(): model = random.choice(sentence_model) #print(model) while (model.count("xxxx")!=0): model=model.replace("xxxx",random.choice(four_chars_words),1) while (model.count("xx")!=0): model=model.replace("xx",random.choice(two_chars_words),1) while (model.count("x")!=0): model=model.replace("x",random.choice(two_chars_words),1) return model with open('demo.txt','w',encoding="utf-8") as f: for i in range(50): f.write(get_sentence()+" ") print("Done!")