<regex.h>不是标准的C语言库函数,目前只能在linux中使用。
相关结构体:
/* Type for byte offsets within the string. POSIX mandates this. */ typedef int regoff_t; typedef struct { regoff_t rm_so; /* Byte offset from string's start to substring's start. */ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ } regmatch_t; #ifndef RE_TRANSLATE_TYPE # define __RE_TRANSLATE_TYPE unsigned char * # ifdef __USE_GNU # define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE # endif #endif #ifdef __USE_GNU # define __REPB_PREFIX(name) name #else # define __REPB_PREFIX(name) __##name #endif struct re_pattern_buffer { /* Space that holds the compiled pattern. It is declared as `unsigned char *' because its elements are sometimes used as array indexes. */ unsigned char *__REPB_PREFIX(buffer); /* Number of bytes to which `buffer' points. */ unsigned long int __REPB_PREFIX(allocated); /* Number of bytes actually used in `buffer'. */ unsigned long int __REPB_PREFIX(used); /* Syntax setting with which the pattern was compiled. */ reg_syntax_t __REPB_PREFIX(syntax); /* Pointer to a fastmap, if any, otherwise zero. re_search uses the fastmap, if there is one, to skip over impossible starting points for matches. */ char *__REPB_PREFIX(fastmap); /* Either a translate table to apply to all characters before comparing them, or zero for no translation. The translation is applied to a pattern when it is compiled and to a string when it is matched. */ __RE_TRANSLATE_TYPE __REPB_PREFIX(translate); /* Number of subexpressions found by the compiler. */ size_t re_nsub; /* Zero if this pattern cannot match the empty string, one else. Well, in truth it's used only in `re_search_2', to see whether or not we should use the fastmap, so we don't set this absolutely perfectly; see `re_compile_fastmap' (the `duplicate' case). */ unsigned __REPB_PREFIX(can_be_null) : 1; /* If REGS_UNALLOCATED, allocate space in the `regs' structure for `max (RE_NREGS, re_nsub + 1)' groups. If REGS_REALLOCATE, reallocate space if necessary. If REGS_FIXED, use what's there. */ #ifdef __USE_GNU # define REGS_UNALLOCATED 0 # define REGS_REALLOCATE 1 # define REGS_FIXED 2 #endif unsigned __REPB_PREFIX(regs_allocated) : 2; /* Set to zero when `regex_compile' compiles a pattern; set to one by `re_compile_fastmap' if it updates the fastmap. */ unsigned __REPB_PREFIX(fastmap_accurate) : 1; /* If set, `re_match_2' does not return information about subexpressions. */ unsigned __REPB_PREFIX(no_sub) : 1; /* If set, a beginning-of-line anchor doesn't match at the beginning of the string. */ unsigned __REPB_PREFIX(not_bol) : 1; /* Similarly for an end-of-line anchor. */ unsigned __REPB_PREFIX(not_eol) : 1; /* If true, an anchor at a newline matches. */ unsigned __REPB_PREFIX(newline_anchor) : 1; }; typedef struct re_pattern_buffer regex_t;
Linux C 使用reg 一般步骤:
编译 regcomp()
匹配 regexec()
释放 regfree()
相关API函数:
int regcomp(regex_t *preg, const char *pattern, int cflags); //编译 int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); //匹配 size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size); void regfree(regex_t *preg); //释放
regex regHead;
//编译
regcomp(®Head, "(.?)xml", REG_EXTENDED);
static CHAR str[MAX_STR_LINE];
regmatch_t pmatch[2];
//匹配
regexec(®Head, str, 2, pmatch, 0) == 0
regmatch_t 是一个结构体数据类型,在regex.h中定义:成员rm_so 存放匹配文本串在目标串中的开始位置,rm_eo 存放结束位置。
通常我们以数组的形式定义一组这样的结构。因为往往我们的正则表达式中还包含子正则表达式。
str是目标文本串。
2代表数组pmatch的元素个数,数组0单元存放主正则表达式位置,后边的单元依次存放子正则表达式位置,子正则表达式就是用圆括号包起来的部分表达式。
pmatch[0].rm_so和pmatch[0].rm_eo代表主正则表达式的启止位置(从x的前一个字符 到 字符l的后一个字符 ),pmatch[1].rm_so和pmatch[1].rm_eo代表子正则表达式的启止位置(从x的前一个字符 到 字符x)。
//清除
void regfree (regex_t *compiled)
当我们使用完编译好的正则表达式后,或者要重新编译其他正则表达式的时候,我们可以用这个函数清空compiled指向的regex_t结构体的内容,请记住,如果是重新编译的话,一定要先清空regex_t结构体。
/* * return zero if the regular expression matches; otherwise, it returns a nonzero value. * MSGDEF regular one preChar, otherwise regular afterStr in "". * pmatch[0].rm_so, pmatch[0].rm_eo represent all subStr's start and end[close&open rule] without first blank space. * pmatch[n].rm_so, pmatch[n].rm_eo represent one subStr's start and end[close&open rule] if n bigger than zero. */
注: 正则匹配的是满足条件的最后一个str ,在使用strstr实现的时候应注意,strstr是匹配第一个str
参考文献: