• python批量替代


    import re
    import jieba.analyse
    import codecs
    import pandas as pd
    def word_replace(xianbingshi,hospital1):
        """替换词表"""
        data = []
        hospital = []
        """去重"""
        with codecs.open(hospital1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in hospital:
                    hospital.append(line)
                else:
                    continue
        hospital.sort(key=len, reverse=True)
        with codecs.open(xianbingshi,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for ho in hospital:
                    if ho in hospital:
                        line = line.replace(ho,'[hospital]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(r'C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoqucodexianbingshi_write_sub.txt','w','utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    
    def word_replace3(xianbingshi2, operation1):
        data = []
        operation = []
        with codecs.open(operation1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in operation:
                    operation.append(line)
                else:
                    continue
        """排序"""
        operation.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for op in operation:
                    if op in line:
                        line = line.replace(op, '[operation]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2, 'w','utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace1(xianbingshi2,disease1):
        data = []
        disease = []
        with codecs.open(disease1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in disease:
                    disease.append(line)
                else:
                    continue
        disease.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for di in disease:
                    if di in line and len(di)>1:
                        line = line.replace(di, '[disease]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace2(xianbingshi2, symptom1):
        data = []
        symptom = []
        with codecs.open(symptom1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in symptom:
                    symptom.append(line)
                else:
                    continue
        """排序"""
        symptom.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for sy in symptom:
                    if sy in line and len(sy) > 1:
                        line = line.replace(sy, '[symptom]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    
    def word_replace4(xianbingshi2, test1):
        data = []
        test = []
        with codecs.open(test1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in test:
                    test.append(line)
                else:
                    continue
        """排序"""
        test.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for te in test:
                    if te in line:
                        line = line.replace(te, '[test]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2, 'w','utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace5(xianbingshi2, time1):
        data = []
        time = []
        with codecs.open(time1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in time:
                    time.append(line)
                else:
                    continue
        """排序"""
        time.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for t in time:
                    if t in line:
                        line = line.replace(t,'[time]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace6(xianbingshi2, organ1):
        data = []
        organ = []
        with codecs.open(organ1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in organ:
                    organ.append(line)
                else:
                    continue
        """排序"""
        organ.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for o in organ:
                    if o in line and len(o) > 1:
                        line = line.replace(o, '[organ]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace7(xianbingshi2, symptom1):
        data = []
        symptom = []
        with codecs.open(symptom1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in symptom and len(line) == 1:
                    symptom.append(line)
                    print(line)
                else:
                    continue
        """排序"""
        symptom.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for sy in symptom:
                    line = line.replace(sy, '[symptom]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace8(xianbingshi2, disease1):
        data = []
        disease = []
        with codecs.open(disease1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in disease and line == 1:
                    disease.append(line)
                else:
                    continue
        """排序"""
        disease.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for di in disease:
                    line = line.replace(di, '[disease]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    def word_replace9(xianbingshi2, organ1):
        data = []
        organ = []
        with codecs.open(organ1,'r','utf8') as f:
            for line in f:
                line = line.strip()
                if line not in organ and line == 1:
                    organ.append(line)
                else:
                    continue
        """排序"""
        organ.sort(key=len, reverse=True)
        with codecs.open(xianbingshi2,'r','utf8') as f:
            """优先级:医院、手术、检查、症状、疾病、部位、时间"""
            for line in f:
                for o in organ:
                     line = line.replace(o, '[organ]')
                line = line.strip()
                data.append(line)
                print(line)
        with codecs.open(xianbingshi2,'w', 'utf8') as f:
            for line in data:
                f.write(line + '
    ')
            f.close()
    if __name__ == '__main__':
        disease1 =r'C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoquTXTdisease_0903.txt'
        organ1 = r"C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoquTXTorgan_0903.txt"
        test1 = r"C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoquTXT	est_0903.txt"
        time1 = r"C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoqu	ime1.txt"
        operation1 = r"C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoquTXToperation_0903.txt"
        symptom1 = r"C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoqucode症状.txt"
        xianbingshi = r'C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoqucodexianbingshi_write.txt'
        xianbingshi2 =r'C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoqucodexianbingshi_write_sub.txt'
        hospital1 = r'C:UsersAdministrator.SC-201812211013PycharmProjects词表工作代码yiwoquTXThospital_0903.txt'
        word_replace(xianbingshi, hospital1)
        word_replace3(xianbingshi2, operation1)
        word_replace1(xianbingshi2, disease1)
        word_replace2(xianbingshi2, symptom1)
        word_replace4(xianbingshi2, test1)
        # word_replace5(xianbingshi2, time1)
        word_replace6(xianbingshi2, organ1)
        word_replace7(xianbingshi2, symptom1)
        word_replace8(xianbingshi2, disease1)
        word_replace9(xianbingshi2, organ1)
  • 相关阅读:
    StrToInt && StrToHex && IntToString && 编辑框格式 .
    DAO简介(MFC) .
    RedrawWindow, UpdateWindow,InvalidateRect 用法
    在MFC中获取窗口\视图句柄 &获取当前活动的CView .
    DAO在MFC中的应用实例 .
    捕获异常 .
    ubuntu下chm和PDF阅读器 « 小居
    QQ登陆功能的实现2 rolends1986 博客园
    【原创】说说JSON和JSONP,也许你会豁然开朗,含jQuery用例 随它去吧 博客园
    PDF Ubuntu中文
  • 原文地址:https://www.cnblogs.com/yiwoqu/p/11542059.html
Copyright © 2020-2023  润新知