• Linux C下的正则表达式


    <regex.h>不是标准的C语言库函数,目前只能在linux中使用。

    相关结构体:

    /* Type for byte offsets within the string. POSIX mandates this. */
    typedef int regoff_t;
    
    typedef struct
    {
      regoff_t rm_so;   /* Byte offset from string's start to substring's start. */
      regoff_t rm_eo;   /* Byte offset from string's start to substring's end. */
    } regmatch_t;
    
    #ifndef RE_TRANSLATE_TYPE
    # define __RE_TRANSLATE_TYPE unsigned char *
    # ifdef __USE_GNU
    # define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
    # endif
    #endif
    
    #ifdef __USE_GNU
    # define __REPB_PREFIX(name) name
    #else
    # define __REPB_PREFIX(name) __##name
    #endif
    
    struct re_pattern_buffer
    {
    /* Space that holds the compiled pattern. It is declared as
    `unsigned char *' because its elements are sometimes used as
    array indexes. */
      unsigned char *__REPB_PREFIX(buffer);
    
    /* Number of bytes to which `buffer' points. */
      unsigned long int __REPB_PREFIX(allocated);
    
    /* Number of bytes actually used in `buffer'. */
      unsigned long int __REPB_PREFIX(used);
    
    /* Syntax setting with which the pattern was compiled. */
      reg_syntax_t __REPB_PREFIX(syntax);
    
    /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
    fastmap, if there is one, to skip over impossible starting points
    for matches. */
      char *__REPB_PREFIX(fastmap);
    
    /* Either a translate table to apply to all characters before
    comparing them, or zero for no translation. The translation is
    applied to a pattern when it is compiled and to a string when it
    is matched. */
      __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
    
    /* Number of subexpressions found by the compiler. */
      size_t re_nsub;
    
    /* Zero if this pattern cannot match the empty string, one else.
    Well, in truth it's used only in `re_search_2', to see whether or
    not we should use the fastmap, so we don't set this absolutely
    perfectly; see `re_compile_fastmap' (the `duplicate' case). */
      unsigned __REPB_PREFIX(can_be_null) : 1;
    
    /* If REGS_UNALLOCATED, allocate space in the `regs' structure
    for `max (RE_NREGS, re_nsub + 1)' groups.
    If REGS_REALLOCATE, reallocate space if necessary.
    If REGS_FIXED, use what's there. */
      #ifdef __USE_GNU
      # define REGS_UNALLOCATED 0
      # define REGS_REALLOCATE 1
      # define REGS_FIXED 2
    #endif
      unsigned __REPB_PREFIX(regs_allocated) : 2;
    
    /* Set to zero when `regex_compile' compiles a pattern; set to one
    by `re_compile_fastmap' if it updates the fastmap. */
      unsigned __REPB_PREFIX(fastmap_accurate) : 1;
    
    /* If set, `re_match_2' does not return information about
    subexpressions. */
      unsigned __REPB_PREFIX(no_sub) : 1;
    
    /* If set, a beginning-of-line anchor doesn't match at the beginning
    of the string. */
      unsigned __REPB_PREFIX(not_bol) : 1;
    
    /* Similarly for an end-of-line anchor. */
      unsigned __REPB_PREFIX(not_eol) : 1;
    
    /* If true, an anchor at a newline matches. */
      unsigned __REPB_PREFIX(newline_anchor) : 1;
    };
    
    typedef struct re_pattern_buffer regex_t;

     Linux C 使用reg 一般步骤:

    编译  regcomp()
    匹配  regexec()
    释放  regfree()

    相关API函数:

    int regcomp(regex_t *preg, const char *pattern, int cflags);             //编译
    int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);   //匹配
    size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size);
    void regfree(regex_t *preg);        //释放

    regex regHead;

    //编译

    regcomp(&regHead, "(.?)xml", REG_EXTENDED);

    static CHAR str[MAX_STR_LINE];

    regmatch_t pmatch[2];

    //匹配

    regexec(&regHead, str, 2, pmatch, 0) == 0

    regmatch_t 是一个结构体数据类型,在regex.h中定义:成员rm_so 存放匹配文本串在目标串中的开始位置,rm_eo 存放结束位置。

    通常我们以数组的形式定义一组这样的结构。因为往往我们的正则表达式中还包含子正则表达式

    str是目标文本串。

    2代表数组pmatch的元素个数数组0单元存放主正则表达式位置后边的单元依次存放子正则表达式位置,子正则表达式就是用圆括号包起来的部分表达式。

    pmatch[0].rm_so和pmatch[0].rm_eo代表主正则表达式的启止位置(从x的前一个字符  到  字符l的后一个字符 ),pmatch[1].rm_so和pmatch[1].rm_eo代表子正则表达式的启止位置(从x的前一个字符  到  字符x)。

     //清除

    void regfree (regex_t *compiled)

    当我们使用完编译好的正则表达式后,或者要重新编译其他正则表达式的时候,我们可以用这个函数清空compiled指向的regex_t结构体的内容,请记住,如果是重新编译的话,一定要先清空regex_t结构体。

    /*
     * return zero if the regular expression matches; otherwise, it returns a nonzero value.
     * MSGDEF regular one preChar, otherwise regular afterStr in "".
     * pmatch[0].rm_so, pmatch[0].rm_eo represent all subStr's start and end[close&open rule] without first blank space.
     * pmatch[n].rm_so, pmatch[n].rm_eo represent one subStr's start and end[close&open rule] if n bigger than zero.
     */

     : 正则匹配的是满足条件的最后一个str ,在使用strstr实现的时候应注意,strstr是匹配第一个str

     参考文献:

    1. C语言用regcomp、regexec、regfree和regerror函数实现正则表达式校验 

    2. C语言正则表达式详解 regcomp() regexec() regfree()用法详解

  • 相关阅读:
    归并排序算法
    交换排序算法
    插入排序算法
    DASCTF2021五月赛
    第二届newsctf
    山西省赛
    2021广东省第一届网络安全竞赛
    2021 DozerCTF
    2021-HSCTF re
    buuctf-re (持续更新)
  • 原文地址:https://www.cnblogs.com/Lunais/p/13163138.html
Copyright © 2020-2023  润新知