Python 字串处理

#!/usr/bin/python
#-*- coding:utf-8 –*-

import os
import sys
import re
import shutil
import xlrd
import xlwt
import getopt
import math 
from xlutils.copy import copy


'''
脚本使用：
    设置strUiPorject ui项目名称，取值如下 "mstar"/"mstar_atv"/"formal"/"formal_grey"/"haier"/"videocon"/"bbk"/"atv_project"
    删除无用字串：             ./genstr -d
    特殊标记的字串优先排序：    ./genstr -p
    给已整理好优先级高的字串添加strMark..../genstr -a
    读取 优先级字串整理.h 中的字串进行比对添加标记（未整理）./genstr -c 

脚本功能：
    1、根据strUiPorject设置的UI名称，迭代过滤UI目录的所有源文件和头文件，获取项目使用字串总数，并删除UIL多余字串
    2、对某一种语言做特殊标记strMark，标记的字串会放在翻译的最后面
    3、mstar优先级字串整理在mstar优先字串整理.h，已使用字串整理在mstar已使用字串整理.h中,
    formal优先级字串整理在formal优先字串整理.h
执行过程：
    1、设置UI项目名称
    2、根据UI名称，配置过滤路径和UIL删除路径(filterPath/strUilPath)
    3、再根据配置的路径执行过滤和删除动作

注意：
    SourceCode中，有一些是 TV_IDS_String_ID+Offset方式获取新字串的，这些字串要手动加到脚本，以防误删
        如：TV_IDS_String_GMT_0 字串
'''

#=======注意此处设置UI项目================================#
#=="mstar"/"formal"/"formal_grey"/"mstar_atv"/"haier"/"videocon"/"bbk"/"atv_project"=#
strUiPorject = "formal_grey"
#========================================================#

g_deleteMode = 0
g_priorityMode = 0
g_AddmarkMode = 0
g_CompareMode = 0

setStr = set()
tupleStr = () 
strMark = "aaaa"

#=======================以下不需要设置=======================#
if "bbk" == strUiPorject:
    filterPath = "aps/application/radisson/formal"
    strUilPath = "aps/application/radisson/formal/UI_Project/TV_UIProject/Languages"
elif "formal" == strUiPorject:
    filterPath = "aps/application/radisson/formal"
    strUilPath = "aps/application/radisson/formal/UI_Project/TV_UIProject_new/Languages"
elif "formal_grey" == strUiPorject:
    filterPath = "aps/application/radisson/formal"
    strUilPath = "aps/application/radisson/formal/UI_Project/TV_UIProject_grey/Languages"
elif "mstar_atv" == strUiPorject:
    filterPath = "aps/application/radisson/mstar"
    strUilPath = "aps/application/radisson/mstar/UI_Project/TV_UIProject_atv/Languages"
else:
    filterPath = "aps/application/radisson/%s" % strUiPorject
    strUilPath = "aps/application/radisson/%s/UI_Project/TV_UIProject/Languages" % strUiPorject
gamePath = "aps/game"

listPath = [filterPath,gamePath]

def filterUsefulString():
    listStrId = []
    for path in listPath:
        for  dirPath, dirNames, fileNames in os.walk(path):
            for  sourceFile in fileNames:
                filePath = dirPath+"/"+sourceFile
                if (re.search(".*.c.*",sourceFile) or re.search(".*.h.*",sourceFile)) 
                    and sourceFile != "TV_strid.h":
                    for line in open(filePath,"r"):
                        if "TV_IDS_String" in line:
                            if line.count("TV_IDS_String") > 2:
                                print "

the number of string are more than 2 in a row 

 "
                                print sourceFile
                                print "
"
                                continue
                            if re.search(".*TV_(IDS_Stringw*).*TV_(IDS_Stringw*).*",line):
                                tupleStr=re.search(".*TV_(IDS_Stringw*).*TV_(IDS_Stringw*).*",line).groups()
                                for i in range(len(tupleStr)):
                                    setStr.add(tupleStr[i])
                            else:
                                setStr.add(re.search(".*TV_(IDS_Stringw*).*",line).group(1))
                        elif "TV_IDS_Game_Menu_OSD_String" in line:
                            setStr.add("IDS_Game_Menu_OSD_String")
    UsedStrfilename = strUiPorject + "已使用字串整理.h"
    print "

程序中共使用 %d 个字串。
保存在当前目录 %s-文件中

" % (len(setStr),UsedStrfilename)
    for line in setStr:
        listStrId.append(line)
        listStrId.append("
")
    open(UsedStrfilename,"w").writelines(listStrId)

'''
    #读EXCEL到映射表
def excelSetting()
    setElStr = set()
    mapStr = {}
    listFirst = []
    setDiff = set()
    book = xlrd.open_workbook(r'Languages.xls')
    sheet = book.sheet_by_index(0)

    listFirst = sheet.row_values(0)

    for row in range(sheet.nrows):
        cellStr = str(sheet.cell(row,0).value)
        cellStr.rstrip()
        if cellStr in setStr:
            mapStr[cellStr] = sheet.row_values(row)

    #setElStr = set(mapStr.keys())
    #setDiff = setElStr - setStr

    #写EXCEL
    wboot = xlwt.Workbook()
    sheet = wboot.add_sheet("Language")
    #操作第一行，抬头
    for col in range(len(listFirst)):
        sheet.write(0,col,listFirst[col])

    #其它行
    row = 1
    for (k,v) in mapStr.items():
        for col in range(len(v)):
            sheet.write(row,col,v[col])
        row = row + 1
    wboot.save(r'Language_.xls')
    
        
'''
#处理UIL文件，对比setStr集合，删除无用字串
def deleteString():
    delCount = 0
    lanList = []
    for dirPath,dirNames,fileNames in os.walk(strUilPath):
        for sourceFile in fileNames:
            filePath = dirPath + "/" + sourceFile
            for line in open(filePath,"r"):
                #==============================================#
                #有些字串在code中是以偏移量的方式使用，不能删除
                if "IDS_String_GMT_" in line:
                    lanList.append(line)
                    continue
                elif re.search(".*IDS_String_d{1,2}".*",line) or ("IDS_String_LNB" in line):
                    lanList.append(line)
                    continue
                # ==============================================#

                if "<String ID=" in line:
                    if re.search("s*<String ID="(w*)".*",line).group(1) in setStr:
                        lanList.append(line)
                    else:
                        delCount = delCount+1
                else:
                    lanList.append(line)
            open(filePath,"w").writelines(lanList)
            print(sourceFile + "删除 %s" %delCount)
            lanList = []
            delCount = 0
            
#处理UIL文件，迭代lanFist集合，标记字串放在UIL文件后面
def priorityString():
    lanFist = []
    lanList1 = []
    lanList2 = []
    lanList3 = []
    pat = re.compile(".*"(IDS_Stringw*)".*")
    for line in open(strUilPath+"/English.uil","r").readlines():
        if strMark in line and pat.search(line):
            lanFist.append(pat.search(line).group(1))
    PrStrfilename = strUiPorject + "优先字串整理.h"
    open(PrStrfilename, "a").writelines([x + "
" for x in lanFist])
    print "优先级字串共%d，如下：" %len(lanFist)
    print lanFist
    print "

优先字串共 %d 个。
保存在当前目录 %s-文件中

" % (len(lanFist),PrStrfilename)
    
    for dirPath, dirNames, fileNames in os.walk(strUilPath):
        for sourceFile in fileNames:
            filePath = dirPath + "/" + sourceFile
            for line in open(filePath, "r"):
                if pat.search(line) and pat.search(line).group(1) in lanFist:
                    line = line.replace(strMark,"")
                    lanList1.append(line)
                elif "IDS_String_spliteLine" in line:
                    lanList3.append(line)
                else:
                    lanList2.append(line)
            if(len(lanList1) and len(lanList2)>=3):
                lanList2 = lanList2[0:-1] +lanList3 + lanList1+lanList2[-1:]
                lanList3 = []
                open(filePath, "w").writelines(lanList2)
            lanList1 = []
            lanList2 = []

#给已整理好优先级高的字串添加strMark
def AddMark():
    StringList = []
    a = 0
    for line in open(strUilPath+"/English.uil","r") :
        if ("<String ID=" in line):  
            if re.search("IDS_String_spliteLine",line):
                a = 1
            elif(a == 1):
                line = line.replace('Value="','Value="'+strMark)
        StringList.append(line)
    open(strUilPath+"/English.uil","w").writelines(StringList)
    print "
添加StrMark完成
"

#读取 优先级字串整理.h 中的字串进行比对添加标记（未整理）
def CompareAddMark():
    strSet = set()
    strList = []
    for line in open('formal优先字串整理.h', "r"):
        strSet.add(re.search(".*(IDS_String_.*).*",line).group(1))
    print strSet
    for line in open(strUilPath+"/English.uil","r") :
        if "<String ID=" in line:
            if re.search("s*<String ID="(w*)".*", line).group(1) in strSet:
                line = line.replace('Value="','Value="'+strMark)
        strList.append(line)
    open(strUilPath+"/English.uil","w").writelines(strList)    
    
def fun_parse_InputParam():
    global g_deleteMode
    global g_priorityMode
    global g_AddmarkMode
    global g_CompareMode
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'dpac')
    except getopt.GetoptError, err:
        #print str(err)
        sys.exit()

    for op, value in opts:
        if op == "-d":
            g_deleteMode = 1
        elif op == "-p":
            g_priorityMode = 1
        elif op == "-a":
            g_AddmarkMode = 1
        elif op == "-c":
            g_CompareMode = 1
        else:
            print("unhandled option")
            sys.exit()

            
if __name__ == "__main__":
    fun_parse_InputParam()
    if g_deleteMode:
        filterUsefulString()
        deleteString()
    if g_priorityMode:
        priorityString()
    if g_AddmarkMode:
        AddMark()
    if g_CompareMode:
        CompareAddMark()
相关阅读:
ImageLoader
Matrix(单点移动，多点缩放)
自定义各种图形
 自定义圆形图片
 将博客搬至CSDN
拉格朗日乘子法的证明
 周志华《机器学习》课后答案——第4章.决策树
 [转] 现实•理论•证据──谈如何做研究和写论文
 高维度下的数据科学——线性空间（下）
线性模型——异方差、序列相关、多重共线性与内生性的处理
原文地址：https://www.cnblogs.com/jiangzhaowei/p/9278403.html