cJSON数据解析
关于数据解析部分,其实这个解析就是个自动机,通过递归或者解析栈进行实现数据的解析
/* Utility to jump whitespace and cr/lf */
//用于跳过ascii小于32的空白字符 static const char *skip(const char *in) { while (in && *in && (unsigned char)*in <= 32) in++; return in; } /* Parse an object - create a new root, and populate. */ cJSON *cJSON_ParseWithOpts(const char *value, const char **return_parse_end, int require_null_terminated) { const char *end = 0; cJSON *c = cJSON_New_Item(); ep = 0; if (!c) return 0; /* memory fail */ //根据前几个字符设置c类型并更新读取位置为end end = parse_value(c, skip(value)); if (!end) { cJSON_Delete(c); //解析失败,数据不完整 return 0; } /* parse failure. ep is set. */ /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */ if (require_null_terminated)///?? { end = skip(end); if (*end) { cJSON_Delete(c); ep = end; return 0; } } if (return_parse_end) *return_parse_end = end; return c; } /* Default options for cJSON_Parse */ cJSON *cJSON_Parse(const char *value) { return cJSON_ParseWithOpts(value, 0, 0); }
①关于重点部分parse_value 对类型解读函数
/* Parser core - when encountering text, process appropriately. */
//将输入字符串解析为具体类型cJSON结构 static const char *parse_value(cJSON *item, const char *value) { if (!value) return 0; /* Fail on null. */
//设置结构的具体类型并且返回下一个将要解读数据的位置 if (!strncmp(value, "null", 4)) { item->type = cJSON_NULL; return value + 4; } if (!strncmp(value, "false", 5)) { item->type = cJSON_False; return value + 5; } if (!strncmp(value, "true", 4)) { item->type = cJSON_True; item->valueint = 1; return value + 4; } if (*value == '"') { return parse_string(item, value); } if (*value == '-' || (*value >= '0' && *value <= '9')) { return parse_number(item, value); } if (*value == '[') { return parse_array(item, value); } if (*value == '{') { return parse_object(item, value); } ep = value; return 0; /* failure. */ }
②解析字符串部分
解析字符串时, 对于特殊字符也应该转义,比如 "n" 字符应该转换为 'n' 这个换行符。
当然,如果只有特殊字符转换的话,代码不会又这么长, 对于字符串, 还要支持非 ascii 码的字符, 即 utf8字符。
这些字符在字符串中会编码为 uXXXX 的字符串, 我们现在需要还原为 0 - 255 的一个字符。
static unsigned parse_hex4(const char *str) { unsigned h = 0; if (*str >= '0' && *str <= '9') h += (*str) - '0'; else if (*str >= 'A' && *str <= 'F') h += 10 + (*str) - 'A'; else if (*str >= 'a' && *str <= 'f') h += 10 + (*str) - 'a'; else return 0; h = h << 4; //*F str++; if (*str >= '0' && *str <= '9') h += (*str) - '0'; else if (*str >= 'A' && *str <= 'F') h += 10 + (*str) - 'A'; else if (*str >= 'a' && *str <= 'f') h += 10 + (*str) - 'a'; else return 0; h = h << 4; str++; if (*str >= '0' && *str <= '9') h += (*str) - '0'; else if (*str >= 'A' && *str <= 'F') h += 10 + (*str) - 'A'; else if (*str >= 'a' && *str <= 'f') h += 10 + (*str) - 'a'; else return 0; h = h << 4; str++; if (*str >= '0' && *str <= '9') h += (*str) - '0'; else if (*str >= 'A' && *str <= 'F') h += 10 + (*str) - 'A'; else if (*str >= 'a' && *str <= 'f') h += 10 + (*str) - 'a'; else return 0; return h; } /* Parse the input text into an unescaped cstring, and populate item. */ static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; static const char *parse_string(cJSON *item, const char *str) { const char *ptr = str + 1; char *ptr2; char *out; int len = 0; unsigned uc, uc2; if (*str != '"') { ep = str; return 0; } /* not a string! */ while(*ptr != '"' && *ptr && ++len) if (*ptr++ == '\') //跳过续行符 ptr++; /* Skip escaped quotes. */ //空间申请 out = (char*)cJSON_malloc(len + 1); /* This is how long we need for the string, roughly. */ if (!out) return 0; ptr = str + 1;//跳过“开始 ptr2 = out; while (*ptr != '"' && *ptr) { if (*ptr != '\') *ptr2++ = *ptr++; else //转义字符处理 { ptr++; switch (*ptr) { case 'b': *ptr2++ = ''; break; case 'f': *ptr2++ = 'f'; break; case 'n': *ptr2++ = ' '; break; case 'r': *ptr2++ = ' '; break; case 't': *ptr2++ = ' '; break; case 'u': /* transcode utf16 to utf8. */ uc = parse_hex4(ptr + 1); ptr += 4; /* get the unicode char. */ if ((uc >= 0xDC00 && uc <= 0xDFFF) || uc == 0) break; /* check for invalid. */ if (uc >= 0xD800 && uc <= 0xDBFF) /* UTF16 surrogate pairs. */ { if (ptr[1] != '\' || ptr[2] != 'u') break; /* missing second-half of surrogate. */ uc2 = parse_hex4(ptr + 3); ptr += 6; if (uc2<0xDC00 || uc2>0xDFFF) break; /* invalid second-half of surrogate. */ uc = 0x10000 + (((uc & 0x3FF) << 10) | (uc2 & 0x3FF)); } len = 4; if (uc<0x80) len = 1; else if (uc<0x800) len = 2; else if (uc<0x10000) len = 3; ptr2 += len; switch (len) { case 4: *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6; case 3: *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6; case 2: *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6; case 1: *--ptr2 = (uc | firstByteMark[len]); } ptr2 += len; break; default: *ptr2++ = *ptr; break; } ptr++; } } *ptr2 = 0; if (*ptr == '"') ptr++; item->valuestring = out; item->type = cJSON_String; return ptr; }
关于具体的字符解析中的编码相关问题,请自行阅读编码相关知识
③数字解析
/* Parse the input text to generate a number, and populate the result into item. */ static const char *parse_number(cJSON *item, const char *num) { double n = 0, sign = 1, scale = 0; int subscale = 0, signsubscale = 1; if (*num == '-') sign = -1, num++; /* Has sign? */ if (*num == '0') num++; /* is zero */ if (*num >= '1' && *num <= '9') do { n = (n*10.0) + (*num++ - '0'); }while (*num >= '0' && *num <= '9'); /* Number? */ if (*num == '.' && num[1] >= '0' && num[1] <= '9') { num++; do n = (n*10.0) + (*num++ - '0'), scale--; while (*num >= '0' && *num <= '9'); } /* Fractional part? */ if (*num == 'e' || *num == 'E') /* Exponent? */ { num++; if (*num == '+') num++; else if (*num == '-') signsubscale = -1, num++; /* With sign? */ while (*num >= '0' && *num <= '9') subscale = (subscale * 10) + (*num++ - '0'); /* Number? */ } n = sign*n*pow(10.0, (scale + subscale*signsubscale)); /* number = +/- number.fraction * 10^+/- exponent */ item->valuedouble = n; item->valueint = (int)n; item->type = cJSON_Number; return num; }
④解析数组
解析数组, 需要先遇到 '[' 这个符号, 然后挨个的读取节点内容, 节点使用 ',' 分隔, ',' 前后还可能有空格, 最后以 ']' 结尾。
我们要编写的也是这样。
先创建一个数组对象, 判断是否有儿子, 有的话读取第一个儿子, 然后判断是不是有 逗号, 有的话循环读取后面的儿子。
最后读取 ']' 即可。
/* Build an array from input text. */ static const char *parse_array(cJSON *item, const char *value) { cJSON *child; if (*value != '[') { ep = value; return 0; } /* not an array! */ item->type = cJSON_Array; value = skip(value + 1); if (*value == ']') return value + 1; /* empty array. */ item->child = child = cJSON_New_Item(); if (!item->child) return 0; /* memory fail */ //解析数组内结构 value = skip(parse_value(child, skip(value))); /* skip any spacing, get the value. */ if (!value) return 0; while (*value == ',') { cJSON *new_item; if (!(new_item = cJSON_New_Item())) return 0; /* memory fail */ child->next = new_item; new_item->prev = child; child = new_item; value = skip(parse_value(child, skip(value + 1))); if (!value) return 0; /* memory fail */ } if (*value == ']') return value + 1; /* end of array */ ep = value; return 0; /* malformed. */ }
⑤解析对象
解析对象和解析数组类似, 只不过对象的一个儿子是个 key - value, key 是字符串, value 可能是任何值, key 和 value 用 ":" 分隔。
/* Render an object to text. */ static char *print_object(cJSON *item, int depth, int fmt, printbuffer *p) { char **entries = 0, **names = 0; char *out = 0, *ptr, *ret, *str; int len = 7, i = 0, j; cJSON *child = item->child; int numentries = 0, fail = 0; size_t tmplen = 0; /* Count the number of entries. */ while (child) numentries++, child = child->next; /* Explicitly handle empty object case */ if (!numentries) { if (p) out = ensure(p, fmt ? depth + 4 : 3); else out = (char*)cJSON_malloc(fmt ? depth + 4 : 3); if (!out) return 0; ptr = out; *ptr++ = '{'; if (fmt) { *ptr++ = ' '; for (i = 0; i<depth - 1; i++) *ptr++ = ' '; } *ptr++ = '}'; *ptr++ = 0; return out; } if (p) { /* Compose the output: */ i = p->offset; len = fmt ? 2 : 1; ptr = ensure(p, len + 1); if (!ptr) return 0; *ptr++ = '{'; if (fmt) *ptr++ = ' '; *ptr = 0; p->offset += len; child = item->child; depth++; while (child) { if (fmt) { ptr = ensure(p, depth); if (!ptr) return 0; for (j = 0; j<depth; j++) *ptr++ = ' '; p->offset += depth; } print_string_ptr(child->string, p); p->offset = update(p); len = fmt ? 2 : 1; ptr = ensure(p, len); if (!ptr) return 0; *ptr++ = ':'; if (fmt) *ptr++ = ' '; p->offset += len; print_value(child, depth, fmt, p); p->offset = update(p); len = (fmt ? 1 : 0) + (child->next ? 1 : 0); ptr = ensure(p, len + 1); if (!ptr) return 0; if (child->next) *ptr++ = ','; if (fmt) *ptr++ = ' '; *ptr = 0; p->offset += len; child = child->next; } ptr = ensure(p, fmt ? (depth + 1) : 2); if (!ptr) return 0; if (fmt) for (i = 0; i<depth - 1; i++) *ptr++ = ' '; *ptr++ = '}'; *ptr = 0; out = (p->buffer) + i; } else { /* Allocate space for the names and the objects */ entries = (char**)cJSON_malloc(numentries * sizeof(char*)); if (!entries) return 0; names = (char**)cJSON_malloc(numentries * sizeof(char*)); if (!names) { cJSON_free(entries); return 0; } memset(entries, 0, sizeof(char*)*numentries); memset(names, 0, sizeof(char*)*numentries); /* Collect all the results into our arrays: */ child = item->child; depth++; if (fmt) len += depth; while (child) { names[i] = str = print_string_ptr(child->string, 0); entries[i++] = ret = print_value(child, depth, fmt, 0); if (str && ret) len += strlen(ret) + strlen(str) + 2 + (fmt ? 2 + depth : 0); else fail = 1; child = child->next; } /* Try to allocate the output string */ if (!fail) out = (char*)cJSON_malloc(len); if (!out) fail = 1; /* Handle failure */ if (fail) { for (i = 0; i<numentries; i++) { if (names[i]) cJSON_free(names[i]); if (entries[i]) cJSON_free(entries[i]); } cJSON_free(names); cJSON_free(entries); return 0; } /* Compose the output: */ *out = '{'; ptr = out + 1; if (fmt)*ptr++ = ' '; *ptr = 0; for (i = 0; i<numentries; i++) { if (fmt) for (j = 0; j<depth; j++) *ptr++ = ' '; tmplen = strlen(names[i]); memcpy(ptr, names[i], tmplen); ptr += tmplen; *ptr++ = ':'; if (fmt) *ptr++ = ' '; strcpy(ptr, entries[i]); ptr += strlen(entries[i]); if (i != numentries - 1) *ptr++ = ','; if (fmt) *ptr++ = ' '; *ptr = 0; cJSON_free(names[i]); cJSON_free(entries[i]); } cJSON_free(names); cJSON_free(entries); if (fmt) for (i = 0; i<depth - 1; i++) *ptr++ = ' '; *ptr++ = '}'; *ptr++ = 0; } return out; }
这样都实现后, 字符串解析为 json 对象就实现了。
⑥序列化
序列化也就是格式化输出了。
序列化又分为格式化输出,压缩输出
/* Render a cJSON item/entity/structure to text. */ char *cJSON_Print(cJSON *item) { return print_value(item, 0, 1, 0); } char *cJSON_PrintUnformatted(cJSON *item) { return print_value(item, 0, 0, 0); } char *cJSON_PrintBuffered(cJSON *item, int prebuffer, int fmt) { printbuffer p; p.buffer = (char*)cJSON_malloc(prebuffer); p.length = prebuffer; p.offset = 0; return print_value(item, 0, fmt, &p); return p.buffer; } /* Render a value to text. */ static char *print_value(cJSON *item, int depth, int fmt, printbuffer *p) { char *out = 0; if (!item) return 0; if (p) { switch ((item->type) & 255) { case cJSON_NULL: {out = ensure(p, 5); if (out) strcpy(out, "null"); break; } case cJSON_False: {out = ensure(p, 6); if (out) strcpy(out, "false"); break; } case cJSON_True: {out = ensure(p, 5); if (out) strcpy(out, "true"); break; } case cJSON_Number: out = print_number(item, p); break; case cJSON_String: out = print_string(item, p); break; case cJSON_Array: out = print_array(item, depth, fmt, p); break; case cJSON_Object: out = print_object(item, depth, fmt, p); break; } } else { switch ((item->type) & 255) { case cJSON_NULL: out = cJSON_strdup("null"); break; case cJSON_False: out = cJSON_strdup("false"); break; case cJSON_True: out = cJSON_strdup("true"); break; case cJSON_Number: out = print_number(item, 0); break; case cJSON_String: out = print_string(item, 0); break; case cJSON_Array: out = print_array(item, depth, fmt, 0); break; case cJSON_Object: out = print_object(item, depth, fmt, 0); break; } } return out; }
假设我们要使用格式化输出, 也就是美化输出。
cjson 的做法不是边分析 json 边输出, 而是预先将要输的内容全部按字符串存在内存中, 最后输出整个字符串。
这对于比较大的 json 来说, 内存就是个问题了。
另外,格式化输出依靠的是节点的深度, 这个也可以优化, 一般宽度超过80 时, 就需要从新的一行算起的。
/* Render an object to text. */ static char *print_object(cJSON *item, int depth, int fmt, printbuffer *p) { char **entries = 0, **names = 0; char *out = 0, *ptr, *ret, *str; int len = 7, i = 0, j; cJSON *child = item->child; int numentries = 0, fail = 0; size_t tmplen = 0; /* Count the number of entries. */ while (child) numentries++, child = child->next; /* Explicitly handle empty object case */ if (!numentries) { if (p) out = ensure(p, fmt ? depth + 4 : 3); else out = (char*)cJSON_malloc(fmt ? depth + 4 : 3); if (!out) return 0; ptr = out; *ptr++ = '{'; if (fmt) { *ptr++ = ' '; for (i = 0; i<depth - 1; i++) *ptr++ = ' '; } *ptr++ = '}'; *ptr++ = 0; return out; } if (p) { /* Compose the output: */ i = p->offset; len = fmt ? 2 : 1; ptr = ensure(p, len + 1); if (!ptr) return 0; *ptr++ = '{'; if (fmt) *ptr++ = ' '; *ptr = 0; p->offset += len; child = item->child; depth++; while (child) { if (fmt) { ptr = ensure(p, depth); if (!ptr) return 0; for (j = 0; j<depth; j++) *ptr++ = ' '; p->offset += depth; } print_string_ptr(child->string, p); p->offset = update(p); len = fmt ? 2 : 1; ptr = ensure(p, len); if (!ptr) return 0; *ptr++ = ':'; if (fmt) *ptr++ = ' '; p->offset += len; print_value(child, depth, fmt, p); p->offset = update(p); len = (fmt ? 1 : 0) + (child->next ? 1 : 0); ptr = ensure(p, len + 1); if (!ptr) return 0; if (child->next) *ptr++ = ','; if (fmt) *ptr++ = ' '; *ptr = 0; p->offset += len; child = child->next; } ptr = ensure(p, fmt ? (depth + 1) : 2); if (!ptr) return 0; if (fmt) for (i = 0; i<depth - 1; i++) *ptr++ = ' '; *ptr++ = '}'; *ptr = 0; out = (p->buffer) + i; } else { /* Allocate space for the names and the objects */ entries = (char**)cJSON_malloc(numentries * sizeof(char*)); if (!entries) return 0; names = (char**)cJSON_malloc(numentries * sizeof(char*)); if (!names) { cJSON_free(entries); return 0; } memset(entries, 0, sizeof(char*)*numentries); memset(names, 0, sizeof(char*)*numentries); /* Collect all the results into our arrays: */ child = item->child; depth++; if (fmt) len += depth; while (child) { names[i] = str = print_string_ptr(child->string, 0); entries[i++] = ret = print_value(child, depth, fmt, 0); if (str && ret) len += strlen(ret) + strlen(str) + 2 + (fmt ? 2 + depth : 0); else fail = 1; child = child->next; } /* Try to allocate the output string */ if (!fail) out = (char*)cJSON_malloc(len); if (!out) fail = 1; /* Handle failure */ if (fail) { for (i = 0; i<numentries; i++) { if (names[i]) cJSON_free(names[i]); if (entries[i]) cJSON_free(entries[i]); } cJSON_free(names); cJSON_free(entries); return 0; } /* Compose the output: */ *out = '{'; ptr = out + 1; if (fmt)*ptr++ = ' '; *ptr = 0; for (i = 0; i<numentries; i++) { if (fmt) for (j = 0; j<depth; j++) *ptr++ = ' '; tmplen = strlen(names[i]); memcpy(ptr, names[i], tmplen); ptr += tmplen; *ptr++ = ':'; if (fmt) *ptr++ = ' '; strcpy(ptr, entries[i]); ptr += strlen(entries[i]); if (i != numentries - 1) *ptr++ = ','; if (fmt) *ptr++ = ' '; *ptr = 0; cJSON_free(names[i]); cJSON_free(entries[i]); } cJSON_free(names); cJSON_free(entries); if (fmt) for (i = 0; i<depth - 1; i++) *ptr++ = ' '; *ptr++ = '}'; *ptr++ = 0; } return out; }
static char *print_array(cJSON *item, int depth, int fmt, printbuffer *p) { char **entries; char *out = 0, *ptr, *ret; int len = 5; cJSON *child = item->child; int numentries = 0, i = 0, fail = 0; size_t tmplen = 0; /* How many entries in the array? */ while (child) numentries++, child = child->next; /* Explicitly handle numentries==0 */ if (!numentries) { if (p) out = ensure(p, 3); else out = (char*)cJSON_malloc(3); if (out) strcpy(out, "[]"); return out; } if (p) { /* Compose the output array. */ i = p->offset; ptr = ensure(p, 1); if (!ptr) return 0; *ptr = '['; p->offset++; child = item->child; while (child && !fail) { print_value(child, depth + 1, fmt, p); p->offset = update(p); if (child->next) { len = fmt ? 2 : 1; ptr = ensure(p, len + 1); if (!ptr) return 0; *ptr++ = ','; if (fmt)*ptr++ = ' '; *ptr = 0; p->offset += len; } child = child->next; } ptr = ensure(p, 2); if (!ptr) return 0; *ptr++ = ']'; *ptr = 0; out = (p->buffer) + i; } else { /* Allocate an array to hold the values for each */ entries = (char**)cJSON_malloc(numentries * sizeof(char*)); if (!entries) return 0; memset(entries, 0, numentries * sizeof(char*)); /* Retrieve all the results: */ child = item->child; while (child && !fail) { ret = print_value(child, depth + 1, fmt, 0); entries[i++] = ret; if (ret) len += strlen(ret) + 2 + (fmt ? 1 : 0); else fail = 1; child = child->next; } /* If we didn't fail, try to malloc the output string */ if (!fail) out = (char*)cJSON_malloc(len); /* If that fails, we fail. */ if (!out) fail = 1; /* Handle failure. */ if (fail) { for (i = 0; i<numentries; i++) if (entries[i]) cJSON_free(entries[i]); cJSON_free(entries); return 0; } /* Compose the output array. */ *out = '['; ptr = out + 1; *ptr = 0; for (i = 0; i<numentries; i++) { tmplen = strlen(entries[i]); memcpy(ptr, entries[i], tmplen); ptr += tmplen; if (i != numentries - 1) { *ptr++ = ','; if (fmt)*ptr++ = ' '; *ptr = 0; } cJSON_free(entries[i]); } cJSON_free(entries); *ptr++ = ']'; *ptr++ = 0; } return out; }