• 5章代码


    5.3  获得汉字长度和分解

    #coding=utf8
    import arcpy
    
    import os
    import sys
    import math
    import string
    from collections import namedtuple
    
    #判断字符串是否为汉字
    def isChineseWord(string):
        if string.isalpha():
            if ord(string) in range(65,91) or ord(string) in range(97,123) :
                print("是字母")
                return  False
            else:
                print("是汉字")
                return True
        print("不是汉字也不是字母")
        return False
    #返回汉字和英文数量
    def getChinesenum(s):
        s_len = len(s)
        cnum=0
        for c in s:
            if isChineseWord(c):
                cnum+=1
        return cnum,s_len-cnum
    
    
    def str_count(s):
        '''找出字符串中的中英文、空格、数字、标点符号个数'''
    
        count_en = count_dg = count_sp = count_zh = count_pu = 0
        s_len = len(s)
        for c in s:
            if c in string.ascii_letters: #英文
                count_en += 1
            elif c.isdigit(): #数字
                count_dg += 1
            elif c.isspace(): #空格
                count_sp += 1
            elif c.isalpha(): #方法检测字符串是否只由字母组成。
                count_zh += 1
            else:
                count_pu += 1 #标点
        total_chars = count_zh + count_en + count_sp + count_dg + count_pu
        if total_chars == s_len:
            return namedtuple('Count', ['total', 'zh', 'en', 'space', 'digit', 'punc'])(s_len, count_zh, count_en, count_sp, count_dg, count_pu)
        else:
            print('Something is wrong!')
            return None
        return None
    
    #汉字长度
    def getcharlength(mystr): #在u下一个汉字长度为3
        lenTxt = len(mystr)
        lenTxt_utf8 = len(mystr.decode('utf-8'))
        size = int((lenTxt_utf8 - lenTxt)/2 + lenTxt)
        return size #如果返回为None,就是没有返回值
    
    def getlength():
        ss="我们的gis"
        num=len(ss)
        arcpy.AddMessage(u"{1} length={0}".format(num,ss)) #u加不加都一样 一个汉字为3
        arcpy.AddMessage("{1} length={0}".format(num,ss))
        ss=u"我们的gis" #加u,一个汉字为1
        num=len(ss)
        arcpy.AddMessage(u"ss={1} length={0}".format(num,ss))
        arcpy.AddMessage("ss={1} length={0}".format(num,ss))
    
        ss=u"我们的gis" #加u,一个汉字为1
        num=getcharlength(ss)
        arcpy.AddMessage("{1} length={0}".format(num,ss))
        ss="我们的gis" #不加u,一个汉字为2
        num=getcharlength(ss)
        arcpy.AddMessage("{1} length={0}".format(num,ss))
    
        s = u'我们的gis gisoracle 2019.11.07' #一定要加u
        count = str_count(s)
        arcpy.AddMessage(s)
        arcpy.AddMessage('该字符串共有 {} 个字符,其中有 {} 个汉字,{} 个英文,{} 个空格,{} 个数字,{} 个标点符号。'.format(count.total, count.zh, count.en, count.space, count.digit, count.punc))
        cnum,enum=getChinesenum(s)
        arcpy.AddMessage('该字符串共有 {} 汉字字符,非汉字{}'.format(cnum,enum))
        s=u"我们的gis"
        arcpy.AddMessage('该字符串 {} 取左边4个{}'.format(s,s[0:4]))
        s="我们的gis" #下面没有取出来,一定加u
        arcpy.AddMessage('该字符串 {} 取左边4个{}'.format(s,s[0:4]))
    
    def printinfo(): #不能名称为print
        arcpy.AddMessage("====================我们都是Python程序员=============")
        arcpy.AddMessage(u"====================我们都是Python程序员=============") #加不加都一样
        arcpy.AddMessage("{0:<20s}1".format(u"我们都是程序员"))
        arcpy.AddMessage("{0:<20s}12345678901234567890".format("我们都是程序员")) #需要加u
        arcpy.AddMessage("%-20s12345678901234567890"%(u"我们都是程序员"))
        arcpy.AddMessage("%-20s12345678901234567890"%("我们都是程序员")) #需要加u
        arcpy.AddMessage(u"没有对应数据".encode('gbk'))  # 汉字乱码的解决
    #把字符串按汉字固定2为截取
    def gettrim(s,n):
    
        num=0
        first=""
        after=""
        for c in s:
            if isChineseWord(c):
                num+=2 #汉字2位
            else:
                num+=1 #其他1位
            if num<=n:
                first=first+c
            else:
                after=after+c
        return first,after
    def strtrim():
        s=u"我们1的gis1"
        first,after=gettrim(s,6)
        arcpy.AddMessage('{} 取6位,前{} 后{}'.format(s,first,after))
        s="我们1的gis1" #不加u,一个汉字3位
        first,after=gettrim(s,6)
        arcpy.AddMessage('{} 取6位,前{} 后{}'.format(s,first,after))
    
    def Main():
        getlength()
        strtrim()
        printinfo()
    Main()

    5.4  表中读写汉字

    #coding=utf8
    import arcpy
    import os
    import sys
    import math
    def readHZ():
        fields = [inField]
        with arcpy.da.SearchCursor(inTable, fields) as cursor:
            i=1
            for row in cursor:
                arcpy.AddMessage(u"序号{0}, 字段={1}, 值={2}".format(i,inField,row[0]))
                i=i+1
    
    def updateHZ():
        fields = [inField]
        with arcpy.da.UpdateCursor(inTable, fields) as cursor:
            i=1
            for row in cursor:
                #mystr=row[0]+u" 长度"+str(i)
                mystr=row[0]+" 长度"+str(i) #加u,不加u都可以
                mystr=mystr[0:50]
                row[0]=mystr
                cursor.updateRow(row)
                i=i+1
    def insertHZ():
        fields = (inField)
        cursor = arcpy.da.InsertCursor(inTable, fields)
        for x in xrange(0, 2):
            #cursor.insertRow((str(x*100)+u"我爱你",)) #最后必须加,可以不加u 加u和不加u一样
            cursor.insertRow((str(x*100)+"我爱你",))
        del cursor
    def main():
        readHZ()
        updateHZ()
        insertHZ()
    
    inTable=arcpy.GetParameterAsText(0)
    inField=arcpy.GetParameterAsText(1)
    main()

    5.5  读写文本文件中汉字

    #coding=utf8
    import arcpy
    
    import os
    import sys
    import math
    import codecs
    def ReadTXTNEW(txtFile):
        f = codecs.open(txtFile,'r','gbk') #'utf-8'
    
        lines = f.readlines()
        f.close()
        return lines
    
    def ReadTXT(txtFile):
    
        f = open(txtFile)
        mystr= f.read()
        f.close()
        return mystr
    
    #返回数组
    def ReadTXTList(txtFile):
        sumlist=[]
        f = open(txtFile,"r") #
        try:
            lines = f.readlines()
            for line in lines:
                curline=line.replace('
    ', '') #删除
    
                sumlist.append(curline)
        finally:
            f.close()
        return sumlist
    
    def WriteTXT(mylist,txtFile):
        wfiles = open(txtFile,'w')
        num=len(mylist)
    
        try:
            for i in range(num):
                wfiles.write(mylist[i]+'
    ')
        finally:
            wfiles.close()
        if wfiles:
            del wfiles
    
    def main(inFile):
        pList=ReadTXTList(inFile)
        num=len(pList)
        for i in range(num):
            arcpy.AddMessage("{0}={1}".format(i,pList[i]))
            #arcpy.AddMessage("u{0}={1}".format(i,pList[i])) #这里不需要加u,加u出错误,因为没有汉字
        txtFile=inFile.lower().replace('.txt', '1.txt')
        mystr=u"我爱你 gisoracle"
        pList.append(mystr.encode("GBK")) ##解决中文乱码问题,直接pList.append(mystr)出来乱码
        #pList.append(mystr) #错误
        WriteTXT(pList,txtFile)
    inFile=arcpy.GetParameterAsText(0)
    main(inFile)

    5.6  元组和列表中汉字使用

    #coding=utf8
    import arcpy
    
    import os
    import sys
    import math
    
    ##将列表转换为元组。
    def usetuple():
        tup1 = ('我爱你 qq', '我爱你 gisorcle', 2019, 2000) #不要加u,否则后面乱码如 u'u6211u7231u4f60 gisorcle'
        num=len(tup1)
        for i in range(num):
            arcpy.AddMessage("{0}={1}".format(i,tup1[i]))
        ss=str(tup1[2:4]) #从2,4,不包括4
    
        arcpy.AddMessage("{0}".format(ss))
        ss=str(tup1[0:1]) #从0,1,不包括1
        ss = ss.decode('string-escape')# 不加ss = ss.decode('string-escape') 输出 xe6x88x91xe7x88xb1xe4xbdxa0
        arcpy.AddMessage("{0}".format(ss))
    
    #列表
    def useList():
        List = ['我爱你 qq', '我爱你 gisorcle'] #不要加u,否则后面乱码入 u'u6211u7231u4f60 gisorcle'
        List.append("我爱")
        num=len(List)
        for i in range(num):
            arcpy.AddMessage("{0}={1}".format(i,List[i]))
        ss=str(List[0:3]) #从0,3,不包括3
        ss = ss.decode('string-escape')# 不加ss = ss.decode('string-escape') 输出 xe6x88x91xe7x88xb1xe4xbdxa0
        arcpy.AddMessage("{0}".format(ss))
    
    def main():
        useList()
        usetuple()
    main()

    5.7  字典使用

    #coding=utf8
    import arcpy
    
    import os
    import sys
    import math
    def main():
        d = {}
        fields = arcpy.ListFields(inFeature)
        for field in fields:
            d[field.name] = field.aliasName
        for key in d.keys():
            arcpy.AddMessage("{0}={1}".format(key,d[key]))
        for item in d.items():
            ss = str(item).decode("unicode_escape")
            arcpy.AddMessage("===={0}".format(ss))
    
    inFeature=arcpy.GetParameterAsText(0)
    main()
  • 相关阅读:
    ie9\fckeditor无法上传图片、弹出浮层内容不显示
    HOW TO:使用Osql工具管理SQL Server桌面引擎(MSDE 2000)
    系统安装SATA模式(Mode)与PQMagic#105硬盘错误
    建立唯一、准确页面标题
    CSS属性分类
    模型
    写作技巧
    在旧工程ObjectiveC中使用SwiftUI开发
    一次大数据量导出优化借助xml导出xls、xlsx文件
    排序算法之 Slow Sort
  • 原文地址:https://www.cnblogs.com/gisoracle/p/13636102.html
Copyright © 2020-2023  润新知