5.3 获得汉字长度和分解
#coding=utf8
import arcpy
import os
import sys
import math
import string
from collections import namedtuple
#判断字符串是否为汉字
def isChineseWord(string):
if string.isalpha():
if ord(string) in range(65,91) or ord(string) in range(97,123) :
print("是字母")
return False
else:
print("是汉字")
return True
print("不是汉字也不是字母")
return False
#返回汉字和英文数量
def getChinesenum(s):
s_len = len(s)
cnum=0
for c in s:
if isChineseWord(c):
cnum+=1
return cnum,s_len-cnum
def str_count(s):
'''找出字符串中的中英文、空格、数字、标点符号个数'''
count_en = count_dg = count_sp = count_zh = count_pu = 0
s_len = len(s)
for c in s:
if c in string.ascii_letters: #英文
count_en += 1
elif c.isdigit(): #数字
count_dg += 1
elif c.isspace(): #空格
count_sp += 1
elif c.isalpha(): #方法检测字符串是否只由字母组成。
count_zh += 1
else:
count_pu += 1 #标点
total_chars = count_zh + count_en + count_sp + count_dg + count_pu
if total_chars == s_len:
return namedtuple('Count', ['total', 'zh', 'en', 'space', 'digit', 'punc'])(s_len, count_zh, count_en, count_sp, count_dg, count_pu)
else:
print('Something is wrong!')
return None
return None
#汉字长度
def getcharlength(mystr): #在u下一个汉字长度为3
lenTxt = len(mystr)
lenTxt_utf8 = len(mystr.decode('utf-8'))
size = int((lenTxt_utf8 - lenTxt)/2 + lenTxt)
return size #如果返回为None,就是没有返回值
def getlength():
ss="我们的gis"
num=len(ss)
arcpy.AddMessage(u"{1} length={0}".format(num,ss)) #u加不加都一样 一个汉字为3
arcpy.AddMessage("{1} length={0}".format(num,ss))
ss=u"我们的gis" #加u,一个汉字为1
num=len(ss)
arcpy.AddMessage(u"ss={1} length={0}".format(num,ss))
arcpy.AddMessage("ss={1} length={0}".format(num,ss))
ss=u"我们的gis" #加u,一个汉字为1
num=getcharlength(ss)
arcpy.AddMessage("{1} length={0}".format(num,ss))
ss="我们的gis" #不加u,一个汉字为2
num=getcharlength(ss)
arcpy.AddMessage("{1} length={0}".format(num,ss))
s = u'我们的gis gisoracle 2019.11.07' #一定要加u
count = str_count(s)
arcpy.AddMessage(s)
arcpy.AddMessage('该字符串共有 {} 个字符,其中有 {} 个汉字,{} 个英文,{} 个空格,{} 个数字,{} 个标点符号。'.format(count.total, count.zh, count.en, count.space, count.digit, count.punc))
cnum,enum=getChinesenum(s)
arcpy.AddMessage('该字符串共有 {} 汉字字符,非汉字{}'.format(cnum,enum))
s=u"我们的gis"
arcpy.AddMessage('该字符串 {} 取左边4个{}'.format(s,s[0:4]))
s="我们的gis" #下面没有取出来,一定加u
arcpy.AddMessage('该字符串 {} 取左边4个{}'.format(s,s[0:4]))
def printinfo(): #不能名称为print
arcpy.AddMessage("====================我们都是Python程序员=============")
arcpy.AddMessage(u"====================我们都是Python程序员=============") #加不加都一样
arcpy.AddMessage("{0:<20s}1".format(u"我们都是程序员"))
arcpy.AddMessage("{0:<20s}12345678901234567890".format("我们都是程序员")) #需要加u
arcpy.AddMessage("%-20s12345678901234567890"%(u"我们都是程序员"))
arcpy.AddMessage("%-20s12345678901234567890"%("我们都是程序员")) #需要加u
arcpy.AddMessage(u"没有对应数据".encode('gbk')) # 汉字乱码的解决
#把字符串按汉字固定2为截取
def gettrim(s,n):
num=0
first=""
after=""
for c in s:
if isChineseWord(c):
num+=2 #汉字2位
else:
num+=1 #其他1位
if num<=n:
first=first+c
else:
after=after+c
return first,after
def strtrim():
s=u"我们1的gis1"
first,after=gettrim(s,6)
arcpy.AddMessage('{} 取6位,前{} 后{}'.format(s,first,after))
s="我们1的gis1" #不加u,一个汉字3位
first,after=gettrim(s,6)
arcpy.AddMessage('{} 取6位,前{} 后{}'.format(s,first,after))
def Main():
getlength()
strtrim()
printinfo()
Main()
5.4 表中读写汉字
#coding=utf8
import arcpy
import os
import sys
import math
def readHZ():
fields = [inField]
with arcpy.da.SearchCursor(inTable, fields) as cursor:
i=1
for row in cursor:
arcpy.AddMessage(u"序号{0}, 字段={1}, 值={2}".format(i,inField,row[0]))
i=i+1
def updateHZ():
fields = [inField]
with arcpy.da.UpdateCursor(inTable, fields) as cursor:
i=1
for row in cursor:
#mystr=row[0]+u" 长度"+str(i)
mystr=row[0]+" 长度"+str(i) #加u,不加u都可以
mystr=mystr[0:50]
row[0]=mystr
cursor.updateRow(row)
i=i+1
def insertHZ():
fields = (inField)
cursor = arcpy.da.InsertCursor(inTable, fields)
for x in xrange(0, 2):
#cursor.insertRow((str(x*100)+u"我爱你",)) #最后必须加,可以不加u 加u和不加u一样
cursor.insertRow((str(x*100)+"我爱你",))
del cursor
def main():
readHZ()
updateHZ()
insertHZ()
inTable=arcpy.GetParameterAsText(0)
inField=arcpy.GetParameterAsText(1)
main()
5.5 读写文本文件中汉字
#coding=utf8
import arcpy
import os
import sys
import math
import codecs
def ReadTXTNEW(txtFile):
f = codecs.open(txtFile,'r','gbk') #'utf-8'
lines = f.readlines()
f.close()
return lines
def ReadTXT(txtFile):
f = open(txtFile)
mystr= f.read()
f.close()
return mystr
#返回数组
def ReadTXTList(txtFile):
sumlist=[]
f = open(txtFile,"r") #
try:
lines = f.readlines()
for line in lines:
curline=line.replace('
', '') #删除
sumlist.append(curline)
finally:
f.close()
return sumlist
def WriteTXT(mylist,txtFile):
wfiles = open(txtFile,'w')
num=len(mylist)
try:
for i in range(num):
wfiles.write(mylist[i]+'
')
finally:
wfiles.close()
if wfiles:
del wfiles
def main(inFile):
pList=ReadTXTList(inFile)
num=len(pList)
for i in range(num):
arcpy.AddMessage("{0}={1}".format(i,pList[i]))
#arcpy.AddMessage("u{0}={1}".format(i,pList[i])) #这里不需要加u,加u出错误,因为没有汉字
txtFile=inFile.lower().replace('.txt', '1.txt')
mystr=u"我爱你 gisoracle"
pList.append(mystr.encode("GBK")) ##解决中文乱码问题,直接pList.append(mystr)出来乱码
#pList.append(mystr) #错误
WriteTXT(pList,txtFile)
inFile=arcpy.GetParameterAsText(0)
main(inFile)
5.6 元组和列表中汉字使用
#coding=utf8
import arcpy
import os
import sys
import math
##将列表转换为元组。
def usetuple():
tup1 = ('我爱你 qq', '我爱你 gisorcle', 2019, 2000) #不要加u,否则后面乱码如 u'u6211u7231u4f60 gisorcle'
num=len(tup1)
for i in range(num):
arcpy.AddMessage("{0}={1}".format(i,tup1[i]))
ss=str(tup1[2:4]) #从2,4,不包括4
arcpy.AddMessage("{0}".format(ss))
ss=str(tup1[0:1]) #从0,1,不包括1
ss = ss.decode('string-escape')# 不加ss = ss.decode('string-escape') 输出 xe6x88x91xe7x88xb1xe4xbdxa0
arcpy.AddMessage("{0}".format(ss))
#列表
def useList():
List = ['我爱你 qq', '我爱你 gisorcle'] #不要加u,否则后面乱码入 u'u6211u7231u4f60 gisorcle'
List.append("我爱")
num=len(List)
for i in range(num):
arcpy.AddMessage("{0}={1}".format(i,List[i]))
ss=str(List[0:3]) #从0,3,不包括3
ss = ss.decode('string-escape')# 不加ss = ss.decode('string-escape') 输出 xe6x88x91xe7x88xb1xe4xbdxa0
arcpy.AddMessage("{0}".format(ss))
def main():
useList()
usetuple()
main()
5.7 字典使用
#coding=utf8
import arcpy
import os
import sys
import math
def main():
d = {}
fields = arcpy.ListFields(inFeature)
for field in fields:
d[field.name] = field.aliasName
for key in d.keys():
arcpy.AddMessage("{0}={1}".format(key,d[key]))
for item in d.items():
ss = str(item).decode("unicode_escape")
arcpy.AddMessage("===={0}".format(ss))
inFeature=arcpy.GetParameterAsText(0)
main()