• Linux C下的正则表达式


    <regex.h>不是标准的C语言库函数,目前只能在linux中使用。

    相关结构体:

    /* Type for byte offsets within the string. POSIX mandates this. */
    typedef int regoff_t;
    
    typedef struct
    {
      regoff_t rm_so;   /* Byte offset from string's start to substring's start. */
      regoff_t rm_eo;   /* Byte offset from string's start to substring's end. */
    } regmatch_t;
    
    #ifndef RE_TRANSLATE_TYPE
    # define __RE_TRANSLATE_TYPE unsigned char *
    # ifdef __USE_GNU
    # define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
    # endif
    #endif
    
    #ifdef __USE_GNU
    # define __REPB_PREFIX(name) name
    #else
    # define __REPB_PREFIX(name) __##name
    #endif
    
    struct re_pattern_buffer
    {
    /* Space that holds the compiled pattern. It is declared as
    `unsigned char *' because its elements are sometimes used as
    array indexes. */
      unsigned char *__REPB_PREFIX(buffer);
    
    /* Number of bytes to which `buffer' points. */
      unsigned long int __REPB_PREFIX(allocated);
    
    /* Number of bytes actually used in `buffer'. */
      unsigned long int __REPB_PREFIX(used);
    
    /* Syntax setting with which the pattern was compiled. */
      reg_syntax_t __REPB_PREFIX(syntax);
    
    /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
    fastmap, if there is one, to skip over impossible starting points
    for matches. */
      char *__REPB_PREFIX(fastmap);
    
    /* Either a translate table to apply to all characters before
    comparing them, or zero for no translation. The translation is
    applied to a pattern when it is compiled and to a string when it
    is matched. */
      __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
    
    /* Number of subexpressions found by the compiler. */
      size_t re_nsub;
    
    /* Zero if this pattern cannot match the empty string, one else.
    Well, in truth it's used only in `re_search_2', to see whether or
    not we should use the fastmap, so we don't set this absolutely
    perfectly; see `re_compile_fastmap' (the `duplicate' case). */
      unsigned __REPB_PREFIX(can_be_null) : 1;
    
    /* If REGS_UNALLOCATED, allocate space in the `regs' structure
    for `max (RE_NREGS, re_nsub + 1)' groups.
    If REGS_REALLOCATE, reallocate space if necessary.
    If REGS_FIXED, use what's there. */
      #ifdef __USE_GNU
      # define REGS_UNALLOCATED 0
      # define REGS_REALLOCATE 1
      # define REGS_FIXED 2
    #endif
      unsigned __REPB_PREFIX(regs_allocated) : 2;
    
    /* Set to zero when `regex_compile' compiles a pattern; set to one
    by `re_compile_fastmap' if it updates the fastmap. */
      unsigned __REPB_PREFIX(fastmap_accurate) : 1;
    
    /* If set, `re_match_2' does not return information about
    subexpressions. */
      unsigned __REPB_PREFIX(no_sub) : 1;
    
    /* If set, a beginning-of-line anchor doesn't match at the beginning
    of the string. */
      unsigned __REPB_PREFIX(not_bol) : 1;
    
    /* Similarly for an end-of-line anchor. */
      unsigned __REPB_PREFIX(not_eol) : 1;
    
    /* If true, an anchor at a newline matches. */
      unsigned __REPB_PREFIX(newline_anchor) : 1;
    };
    
    typedef struct re_pattern_buffer regex_t;

     Linux C 使用reg 一般步骤:

    编译  regcomp()
    匹配  regexec()
    释放  regfree()

    相关API函数:

    int regcomp(regex_t *preg, const char *pattern, int cflags);             //编译
    int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);   //匹配
    size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size);
    void regfree(regex_t *preg);        //释放

    regex regHead;

    //编译

    regcomp(&regHead, "(.?)xml", REG_EXTENDED);

    static CHAR str[MAX_STR_LINE];

    regmatch_t pmatch[2];

    //匹配

    regexec(&regHead, str, 2, pmatch, 0) == 0

    regmatch_t 是一个结构体数据类型,在regex.h中定义:成员rm_so 存放匹配文本串在目标串中的开始位置,rm_eo 存放结束位置。

    通常我们以数组的形式定义一组这样的结构。因为往往我们的正则表达式中还包含子正则表达式

    str是目标文本串。

    2代表数组pmatch的元素个数数组0单元存放主正则表达式位置后边的单元依次存放子正则表达式位置,子正则表达式就是用圆括号包起来的部分表达式。

    pmatch[0].rm_so和pmatch[0].rm_eo代表主正则表达式的启止位置(从x的前一个字符  到  字符l的后一个字符 ),pmatch[1].rm_so和pmatch[1].rm_eo代表子正则表达式的启止位置(从x的前一个字符  到  字符x)。

     //清除

    void regfree (regex_t *compiled)

    当我们使用完编译好的正则表达式后,或者要重新编译其他正则表达式的时候,我们可以用这个函数清空compiled指向的regex_t结构体的内容,请记住,如果是重新编译的话,一定要先清空regex_t结构体。

    /*
     * return zero if the regular expression matches; otherwise, it returns a nonzero value.
     * MSGDEF regular one preChar, otherwise regular afterStr in "".
     * pmatch[0].rm_so, pmatch[0].rm_eo represent all subStr's start and end[close&open rule] without first blank space.
     * pmatch[n].rm_so, pmatch[n].rm_eo represent one subStr's start and end[close&open rule] if n bigger than zero.
     */

     : 正则匹配的是满足条件的最后一个str ,在使用strstr实现的时候应注意,strstr是匹配第一个str

     参考文献:

    1. C语言用regcomp、regexec、regfree和regerror函数实现正则表达式校验 

    2. C语言正则表达式详解 regcomp() regexec() regfree()用法详解

  • 相关阅读:
    HDU 1501 Zipper(DFS)
    HDU 2181 哈密顿绕行世界问题(DFS)
    HDU 1254 推箱子(BFS)
    HDU 1045 Fire Net (DFS)
    HDU 2212 DFS
    HDU 1241Oil Deposits (DFS)
    HDU 1312 Red and Black (DFS)
    HDU 1010 Tempter of the Bone(DFS+奇偶剪枝)
    HDU 1022 Train Problem I(栈)
    HDU 1008 u Calculate e
  • 原文地址:https://www.cnblogs.com/Lunais/p/13163138.html
Copyright © 2020-2023  润新知