• 从unmarshal带json字符串字段的json说起


    事情是这样的,有一段json,里面有字段本应该是obj,但是encode的时候被当成string在,就成了这个样子:

    {"body":"{"sn":"aaaa\/bbbb"}"}
    

    json.Unmarshal来解析的话,显然要映射到这样的struct里:

    	rawStr := `
    {"body":"{"sn":"aaaa\/bbbb"}"}
    `
    	data := struct {
    		Body string `json:"body"`
    	}{}
    	json.Unmarshal([]byte(rawStr), &data)
    

    这样的话 我得再定义一个struct,然后把body的string解析出来:

    	body := struct {
    		Sn string
    	}{}
    	json.Unmarshal([]byte(data.Body), &body)
    

    能不能一次到位 定义好结构体一次解析到位呢?

    因为之前有通过实现encoding.TextMarshaler接口来完成结构体里string字段的自定义marshaler,所以理所当然地想到实现encoding.TextUnmarshaler接口来完成自定义的unmarshal

    type dataEx struct {
    	Body bodyEx
    }
    
    type bodyEx struct {
    	Sn string
    }
    
    func (p *bodyEx) UnmarshalText(text []byte) error {
    	return nil
    }
    
    func marshalEx(rawStr string) {
    	data := &dataEx{}
    	err := json.Unmarshal([]byte(rawStr), data)
    	if err != nil {
    		panic(err)
    	}
    }
    
    

    先测试下,在unmarshaltext方法上打上断点,果然停住了。

    实现unmarshaltext,如果直接用dataEx结构体去接收,是解析不了的,因为json解析器在扫描到body字段的value的时候 是当做 json的string处理的,那么我们在UnmarshalText方法里拿到的就是那段字符串,因此只要将这段字符串再解析到bodyEx里就好了:
    本来预想的是这样就ok了:

    func (p *bodyEx) UnmarshalText(text []byte) error {
    	return json.Unmarshal(text, p)
    }
    

    实际运行发现报错:

    json: cannot unmarshal object into Go struct field dataEx.Body of type *main.bodyEx
    

    实际上 这段json解析到这样的结构体上应该是没问题的,现在报错 只能说是因为扩展了UnmarshalText方法导致的。因此暂时这样处理:

    type dataEx struct {
    	Body bodyEx
    }
    
    type bodyEx struct {
    	Sn string
    }
    type bodyEx2 bodyEx
    
    func (p *bodyEx) UnmarshalText(text []byte) error {
    	t := bodyEx2{}
    	err := json.Unmarshal(text, &t)
    	if err != nil {
    		return err
    	}
    	*p = bodyEx(t)
    	return nil
    }
    

    至此,解决了json里被转义的json字符串一次解析到结构体里的问题。

    因为上面使用bodyEx2这样的处理只是自己的猜测和尝试,我想看看到底为啥实现了UnmarshalText后就不能解析了。因此翻看json.Encode()源码

    scanner

    要实现对json字符串的解析,实际上就是对这段字符串进行词法分析,解析出json里的 obj、number、array、key、value等
    json包里有一个scanner,它就是一个状态机:

    // A scanner is a JSON scanning state machine.
    // Callers call scan.reset() and then pass bytes in one at a time
    // by calling scan.step(&scan, c) for each byte.
    // The return value, referred to as an opcode, tells the
    // caller about significant parsing events like beginning
    // and ending literals, objects, and arrays, so that the
    // caller can follow along if it wishes.
    // The return value scanEnd indicates that a single top-level
    // JSON value has been completed, *before* the byte that
    // just got passed in.  (The indication must be delayed in order
    // to recognize the end of numbers: is 123 a whole value or
    // the beginning of 12345e+6?).
    

    scanner的结构如下:

    type scanner struct {
    	// step 是遍历用的函数,它会随着状态的不同被赋予不同的实现方法
    	step func(*scanner, byte) int
    	// Reached end of top-level value.
    	endTop bool
    	// Stack of what we're in the middle of - array values, object keys, object values.
    	parseState []int
    	// Error that happened, if any.
    	err error
    	// total bytes consumed, updated by decoder.Decode
    	bytes int64
    }
    

    简单看一下stateBeginValue状态函数

    
    // stateBeginValue 是开始读取的状态
    func stateBeginValue(s *scanner, c byte) int {
    	if c <= ' ' && isSpace(c) {
    		return scanSkipSpace
    	}
    	switch c {
    	case '{':
    		s.step = stateBeginStringOrEmpty
    		s.pushParseState(parseObjectKey)
    		return scanBeginObject
    	case '[':
    		s.step = stateBeginValueOrEmpty
    		s.pushParseState(parseArrayValue)
    		return scanBeginArray
    	case '"':
    		s.step = stateInString
    		return scanBeginLiteral
    	case '-':
    		s.step = stateNeg
    		return scanBeginLiteral
    	case '0': // beginning of 0.123
    		s.step = state0
    		return scanBeginLiteral
    	case 't': // beginning of true
    		s.step = stateT
    		return scanBeginLiteral
    	case 'f': // beginning of false
    		s.step = stateF
    		return scanBeginLiteral
    	case 'n': // beginning of null
    		s.step = stateN
    		return scanBeginLiteral
    	}
    	if '1' <= c && c <= '9' { // beginning of 1234.5
    		s.step = state1
    		return scanBeginLiteral
    	}
    	return s.error(c, "looking for beginning of value")
    }
    
    

    一段正常的json,开始读取的时候(跳过空格后),如果读到'{'name就意味着是一个obj,如果遇到'['就意味着是一个array,如果遇到其他的,都会返回scanBeginLiteral标记,而这个标记就决定着unmarshal的时候如何映射到对应的结构体里。
    decodeStateliteralStore方法里,有各种处理:

    
    // literalStore decodes a literal stored in item into v.
    //
    // fromQuoted indicates whether this literal came from unwrapping a
    // string from the ",string" struct tag option. this is used only to
    // produce more helpful error messages.
    func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool) error {
    	// Check for unmarshaler.
    	if len(item) == 0 {
    		//Empty string given
    		d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
    		return nil
    	}
    	isNull := item[0] == 'n' // null
    	u, ut, pv := indirect(v, isNull)
    	if u != nil {
    		return u.UnmarshalJSON(item)
    	}
    	if ut != nil {
    		if item[0] != '"' {
    			if fromQuoted {
    				d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
    				return nil
    			}
    			val := "number"
    			switch item[0] {
    			case 'n':
    				val = "null"
    			case 't', 'f':
    				val = "bool"
    			}
    			d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())})
    			return nil
    		}
    		s, ok := unquoteBytes(item)
    		if !ok {
    			if fromQuoted {
    				return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
    			}
    			panic(phasePanicMsg)
    		}
    		return ut.UnmarshalText(s)
    	}
    
    	v = pv
    
    	switch c := item[0]; c {
    	case 'n': // null
    		// The main parser checks that only true and false can reach here,
    		// but if this was a quoted string input, it could be anything.
    		if fromQuoted && string(item) != "null" {
    			d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
    			break
    		}
    		switch v.Kind() {
    		case reflect.Interface, reflect.Ptr, reflect.Map, reflect.Slice:
    			v.Set(reflect.Zero(v.Type()))
    			// otherwise, ignore null for primitives/string
    		}
    	case 't', 'f': // true, false
    		value := item[0] == 't'
    		// The main parser checks that only true and false can reach here,
    		// but if this was a quoted string input, it could be anything.
    		if fromQuoted && string(item) != "true" && string(item) != "false" {
    			d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
    			break
    		}
    		switch v.Kind() {
    		default:
    			if fromQuoted {
    				d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
    			} else {
    				d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())})
    			}
    		case reflect.Bool:
    			v.SetBool(value)
    		case reflect.Interface:
    			if v.NumMethod() == 0 {
    				v.Set(reflect.ValueOf(value))
    			} else {
    				d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())})
    			}
    		}
    
    	case '"': // string
    		s, ok := unquoteBytes(item)
    		if !ok {
    			if fromQuoted {
    				return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
    			}
    			panic(phasePanicMsg)
    		}
    		switch v.Kind() {
    		default:
    			d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
    		case reflect.Slice:
    			if v.Type().Elem().Kind() != reflect.Uint8 {
    				d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
    				break
    			}
    			b := make([]byte, base64.StdEncoding.DecodedLen(len(s)))
    			n, err := base64.StdEncoding.Decode(b, s)
    			if err != nil {
    				d.saveError(err)
    				break
    			}
    			v.SetBytes(b[:n])
    		case reflect.String:
    			v.SetString(string(s))
    		case reflect.Interface:
    			if v.NumMethod() == 0 {
    				v.Set(reflect.ValueOf(string(s)))
    			} else {
    				d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
    			}
    		}
    
    	default: // number
    		if c != '-' && (c < '0' || c > '9') {
    			if fromQuoted {
    				return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
    			}
    			panic(phasePanicMsg)
    		}
    		s := string(item)
    		switch v.Kind() {
    		default:
    			if v.Kind() == reflect.String && v.Type() == numberType {
    				v.SetString(s)
    				if !isValidNumber(s) {
    					return fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", item)
    				}
    				break
    			}
    			if fromQuoted {
    				return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
    			}
    			d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())})
    		case reflect.Interface:
    			n, err := d.convertNumber(s)
    			if err != nil {
    				d.saveError(err)
    				break
    			}
    			if v.NumMethod() != 0 {
    				d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())})
    				break
    			}
    			v.Set(reflect.ValueOf(n))
    
    		case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
    			n, err := strconv.ParseInt(s, 10, 64)
    			if err != nil || v.OverflowInt(n) {
    				d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())})
    				break
    			}
    			v.SetInt(n)
    
    		case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
    			n, err := strconv.ParseUint(s, 10, 64)
    			if err != nil || v.OverflowUint(n) {
    				d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())})
    				break
    			}
    			v.SetUint(n)
    
    		case reflect.Float32, reflect.Float64:
    			n, err := strconv.ParseFloat(s, v.Type().Bits())
    			if err != nil || v.OverflowFloat(n) {
    				d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())})
    				break
    			}
    			v.SetFloat(n)
    		}
    	}
    	return nil
    }
    
    

    它会先判断 当前要映射的对象是否实现了 json.Unmarshaler接口和encoding.TextUnmarshaler接口,如果实现了前者,则直接调用前者的方法,否则,如果实现了后者,则针对引号开头的(quotedjson),会调用其UnmarshalText方法,也就是我们之前实现的自定义方法。

    这里看到了为什么我们可以扩展,那为啥开始我们直接把字符串unmarshal到实现了UnmarshalText的对象上会报错呢?

    我们在自定义方法里进行unmarshal的时候,这时候要解析的json是一段正常的json,而非quotedjson了,因此走的是decodeStateobject方法:

    // object consumes an object from d.data[d.off-1:], decoding into v.
    // The first byte ('{') of the object has been read already.
    func (d *decodeState) object(v reflect.Value) error {
    	// Check for unmarshaler.
    	u, ut, pv := indirect(v, false)
    	if u != nil {
    		start := d.readIndex()
    		d.skip()
    		return u.UnmarshalJSON(d.data[start:d.off])
    	}
    	if ut != nil {
    		d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)})
    		d.skip()
    		return nil
    	}
        ...//略去一堆
    }
    

    上面可以看出,针对obj的情况,若是实现了encoding.TextUnmarshaler接口,则直接返回错误了。

  • 相关阅读:
    Python基础-time and datetime
    Python基础-包
    Python基础-常用模块
    第四十七天Python学习记录
    第四十四天Python学习记录
    如何教你在NIPS会议上批量下载历年的pdf文档(另附04~14年NIPS论文下载链接)
    如何用pdfbox-app-1.8.10.jar批处理将pdf文档转换成text文档
    如何在Win10下设置图片的浏览方式为windows照片查看器
    如何不通过系统升级来安装window10正式版?(特别针对Xp用户)
    Mysql统计信息处理及binlog解释
  • 原文地址:https://www.cnblogs.com/clannadxr/p/11314874.html
Copyright © 2020-2023  润新知