go中对于中文字符串的操作
统计字数
go在处理字符串时,经常需要知道字符串的字符数,但len()只计算字符串字节数,因此我们可以自定义处理字符串个数的函数
func countNum(handlerStr string) int {
r := []rune(handlerStr)
return len(r)
}
截取前10位字符串
先统计字符串的个数,然后进行截取操作
func InterceptString(resStr string) string {
result := resStr
r := []rune(resStr)
if len(r) >= 10{
result = string(r[:10])
}
return result
}
参考博客
如有错误,望请指正
以下内容与此篇博客无关,纯属个人笔记
场景是需要返回interface{}数据
var parserLock = sync.Mutex{}
func ParserAllMatchRules(url, html string, pattern []rules.Rule, strategy int) (interface{}, error) {
parserLock.Lock()
if strategy != 2 && len([]rune(html)) > 800000 {
parserLock.Unlock()
return nil, errors.New("html is too long:" + url)
}
var assemblyData []interface{}
for _, rule := range pattern {
response := graphquery.ParseFromString(html, rule.Patterns)
if response.Data != nil{
resParseData := response.Data.(map[string]interface{})
allUrlList := resParseData["data"].([]interface{})
assemblyData = append(assemblyData, allUrlList...)
} else {
zap.S().Infow("more rule parse failed", "data", response.Data, "response", response)
}
}
if len(assemblyData) == 0{
zap.S().Infow("response parser length is zero", "resultData")
parserLock.Unlock()
return nil, errors.New("data length is zero")
}
//去除重复链接
assemblyData = removeDuplicateElement(assemblyData)
zap.S().Info("列表长度为:", len(assemblyData))
parserLock.Unlock()
resultData := map[string]interface{}{}
resultData["data"] = assemblyData
return resultData, nil
}
func removeDuplicateElement(languages []interface{}) []interface{} {
result := make([]interface{}, 0, len(languages))
temp := map[string]struct{}{}
for _, item := range languages {
resUrl := item.(map[string]interface{})["url"].(string)
if resUrl == ""{
continue
}
if _, ok := temp[resUrl]; !ok {
temp[resUrl] = struct{}{}
result = append(result, item)
}
}
return result
}