• 前端编译原理 parser.js源码解读


    前面已经介绍了一个jison的使用,在正常开发中其实已经够用下,下面主要是看了下parser.js代码解读下,作为一些了解。

         下面以最简单的文法产生的parser做一些代码注释

    下面是一些注释,标示了编译过程,能够了解jison产生的编译器有哪些处理

    var a = (function() {
        var o = function(k, v, o, l) {
            for (o = o || {}, l = k.length; l--; o[k[l]] = v);
            return o;
        };
        var parser = {
            trace: function trace() {},
            //编译期共享对象
            yy: {},
            //标示符,$accept: 0, $end: 1,error: 2是默认的符号,E: 3, NAT: 4是分析文法获得的
            symbols_: { error: 2, E: 3, NAT: 4, $accept: 0, $end: 1 },
            //终结符
            terminals_: { 2: 'error', 4: 'NAT' },
            //对应文法规则
            productions_: [
                0,  // e` => E $end
                [
                    3, //规约成符号2(E) 
                    1 //堆栈移除一个状态即移除NAT,压入E
                ] // E => NAT
            ],
            //规约的时候执行的代码
            performAction: function anonymous(
                yytext,
                yyleng,
                yylineno,
                yy,
                yystate /* action[1] */,
                $$ /* vstack */,
                _$ /* lstack */
            ) {
                var $0 = $$.length - 1;
                switch (yystate) {
                    case 1:
                        this.$ = $$[$0] * 1;
                        break;
                }
            },
            //移入规约自动机 lalr(1)分析表,每一值表示一个状态
            table: [
                /*状态0 I0*/{ 3: 1/**goto 遇到符号3(E),跳转到状态1 */, 4: [1, 2] /**移入,遇到符号4(NAT),移入进入状态2 */}, 
                /*状态1 I1*/{ 1: [3] /**接受 遇到符号1($end),结束程序,完成编译 */}, 
                /*状态2 I2*/{ 1: [2, 1] /**规约 遇到符号1($end),按照productions[1],进行规则 */ }
            ],
            defaultActions: { 
                2: [2, 1] //状态2进行规则规约,不用向前看符号是什么(逻辑优化)
            },
            //错误提示
            parseError: function parseError(str, hash) {
                if (hash.recoverable) {
                    this.trace(str);
                } else {
                    var error = new Error(str);
                    error.hash = hash;
                    throw error;
                }
            },
            //编译 input为处理的字符串
            parse: function parse(input) {
                var self = this,
                    stack = [0],
                    tstack = [],
                    vstack = [null],
                    lstack = [],
                    table = this.table,
                    yytext = '',
                    yylineno = 0,
                    yyleng = 0,
                    recovering = 0,
                    TERROR = 2,
                    EOF = 1;
                var args = lstack.slice.call(arguments, 1);
                var lexer = Object.create(this.lexer);
                var sharedState = { yy: {} };
                for (var k in this.yy) {
                    if (Object.prototype.hasOwnProperty.call(this.yy, k)) {
                        sharedState.yy[k] = this.yy[k];
                    }
                }
                lexer.setInput(input, sharedState.yy);
                sharedState.yy.lexer = lexer;
                sharedState.yy.parser = this;
                if (typeof lexer.yylloc === 'undefined') {
                    lexer.yylloc = {};
                }
                var yyloc = lexer.yylloc;
                lstack.push(yyloc);
                //位置信息采用区间的形式显示
                var ranges = lexer.options && lexer.options.ranges;
                if (typeof sharedState.yy.parseError === 'function') {
                    this.parseError = sharedState.yy.parseError;
                } else {
                    this.parseError = Object.getPrototypeOf(this).parseError;
                }
                //规约的时候需要弹出堆栈
                function popStack(n) {
                    //stack [0,标识符,状态,标识符,状态...],所以出栈是2倍
                    stack.length = stack.length - 2 * n;
                    vstack.length = vstack.length - n;
                    lstack.length = lstack.length - n;
                }
                //词法分析,返回token即标示符
                var lex = function() {
                    var token;
                    token = lexer.lex() || EOF;
                    if (typeof token !== 'number') {
                        token = self.symbols_[token] || token;
                    }
                    return token;
                };
                var symbol,
                    preErrorSymbol,
                    state,
                    action,
                    a,
                    r,
                    yyval = {},
                    p,
                    len,
                    newState,
                    expected;
                while (true) {
                    state = stack[stack.length - 1];
                    if (this.defaultActions[state]) {
                        action = this.defaultActions[state];
                    } else {
                        if (symbol === null || typeof symbol === 'undefined') {
                            //通过正则获取下一个标示符
                            symbol = lex();
                        }
                        //action对应的动作
                        action = table[state] && table[state][symbol];
                    }
                    //动作错误,语法分析错误
                    if (typeof action === 'undefined' || !action.length || !action[0]) {
                        var errStr = '';
                        expected = [];
                        for (p in table[state]) {
                            if (this.terminals_[p] && p > TERROR) {
                                expected.push("'" + this.terminals_[p] + "'");
                            }
                        }
                        if (lexer.showPosition) {
                            errStr =
                                'Parse error on line ' +
                                (yylineno + 1) +
                                ':
    ' +
                                lexer.showPosition() +
                                '
    Expecting ' +
                                expected.join(', ') +
                                ", got '" +
                                (this.terminals_[symbol] || symbol) +
                                "'";
                        } else {
                            errStr =
                                'Parse error on line ' +
                                (yylineno + 1) +
                                ': Unexpected ' +
                                (symbol == EOF ? 'end of input' : "'" + (this.terminals_[symbol] || symbol) + "'");
                        }
                        this.parseError(errStr, {
                            text: lexer.match,
                            token: this.terminals_[symbol] || symbol,
                            line: lexer.yylineno,
                            loc: yyloc,
                            expected: expected
                        });
                    }
                    //规约-规约冲突,移入规约冲突
                    if (action[0] instanceof Array && action.length > 1) {
                        throw new Error('Parse Error: multiple actions possible at state: ' + state + ', token: ' + symbol);
                    }
                    switch (action[0]) {
                        //移入
                        case 1:
                            stack.push(symbol);
                            vstack.push(lexer.yytext);
                            lstack.push(lexer.yylloc);
                            stack.push(action[1]);
                            symbol = null;
                            if (!preErrorSymbol) {
                                yyleng = lexer.yyleng;
                                yytext = lexer.yytext;
                                yylineno = lexer.yylineno;
                                yyloc = lexer.yylloc;
                                if (recovering > 0) {
                                    recovering--;
                                }
                            } else {
                                symbol = preErrorSymbol;
                                preErrorSymbol = null;
                            }
                            break;
                        //规约
                        case 2:
                            len = this.productions_[action[1]][1];
                            yyval.$ = vstack[vstack.length - len];
                            yyval._$ = {
                                first_line: lstack[lstack.length - (len || 1)].first_line,
                                last_line: lstack[lstack.length - 1].last_line,
                                first_column: lstack[lstack.length - (len || 1)].first_column,
                                last_column: lstack[lstack.length - 1].last_column
                            };
                            if (ranges) {
                                yyval._$.range = [
                                    lstack[lstack.length - (len || 1)].range[0],
                                    lstack[lstack.length - 1].range[1]
                                ];
                            }
                            //执行文法定义的代码
                            r = this.performAction.apply(
                                yyval,
                                [yytext, yyleng, yylineno, sharedState.yy, action[1], vstack, lstack].concat(args)
                            );
                            if (typeof r !== 'undefined') {
                                return r;
                            }
                            if (len) {
                                stack = stack.slice(0, -1 * len * 2);
                                vstack = vstack.slice(0, -1 * len);
                                lstack = lstack.slice(0, -1 * len);
                            }
                            stack.push(this.productions_[action[1]][0]);
                            vstack.push(yyval.$);
                            lstack.push(yyval._$);
                            newState = table[stack[stack.length - 2]][stack[stack.length - 1]];
                            stack.push(newState);
                            break;
                        //接受
                        case 3:
                            return true;
                    }
                }
                return true;
            }
        };
        /* 词法分析 */
        var lexer = (function() {
            var lexer = {
                EOF: 1,
    
                parseError: function parseError(str, hash) {
                    if (this.yy.parser) {
                        this.yy.parser.parseError(str, hash);
                    } else {
                        throw new Error(str);
                    }
                },
    
                // resets the lexer, sets new input
                setInput: function(input, yy) {
                    this.yy = yy || this.yy || {};
                    this._input = input;
                    this._more = this._backtrack = this.done = false;
                    this.yylineno = this.yyleng = 0;
                    this.yytext = this.matched = this.match = '';
                    this.conditionStack = ['INITIAL'];
                    this.yylloc = {
                        first_line: 1,
                        first_column: 0,
                        last_line: 1,
                        last_column: 0
                    };
                    if (this.options.ranges) {
                        this.yylloc.range = [0, 0];
                    }
                    this.offset = 0;
                    return this;
                },
                input: function() {
                    var ch = this._input[0];
                    this.yytext += ch;
                    this.yyleng++;
                    this.offset++;
                    this.match += ch;
                    this.matched += ch;
                    var lines = ch.match(/(?:
    ?|
    ).*/g);
                    if (lines) {
                        this.yylineno++;
                        this.yylloc.last_line++;
                    } else {
                        this.yylloc.last_column++;
                    }
                    if (this.options.ranges) {
                        this.yylloc.range[1]++;
                    }
    
                    this._input = this._input.slice(1);
                    return ch;
                },
                unput: function(ch) {
                    var len = ch.length;
                    var lines = ch.split(/(?:
    ?|
    )/g);
    
                    this._input = ch + this._input;
                    this.yytext = this.yytext.substr(0, this.yytext.length - len);
                    //this.yyleng -= len;
                    this.offset -= len;
                    var oldLines = this.match.split(/(?:
    ?|
    )/g);
                    this.match = this.match.substr(0, this.match.length - 1);
                    this.matched = this.matched.substr(0, this.matched.length - 1);
    
                    if (lines.length - 1) {
                        this.yylineno -= lines.length - 1;
                    }
                    var r = this.yylloc.range;
    
                    this.yylloc = {
                        first_line: this.yylloc.first_line,
                        last_line: this.yylineno + 1,
                        first_column: this.yylloc.first_column,
                        last_column: lines
                            ? (lines.length === oldLines.length ? this.yylloc.first_column : 0) +
                              oldLines[oldLines.length - lines.length].length -
                              lines[0].length
                            : this.yylloc.first_column - len
                    };
    
                    if (this.options.ranges) {
                        this.yylloc.range = [r[0], r[0] + this.yyleng - len];
                    }
                    this.yyleng = this.yytext.length;
                    return this;
                },
                more: function() {
                    this._more = true;
                    return this;
                },
                reject: function() {
                    if (this.options.backtrack_lexer) {
                        this._backtrack = true;
                    } else {
                        return this.parseError(
                            'Lexical error on line ' +
                                (this.yylineno + 1) +
                                '. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).
    ' +
                                this.showPosition(),
                            {
                                text: '',
                                token: null,
                                line: this.yylineno
                            }
                        );
                    }
                    return this;
                },
                less: function(n) {
                    this.unput(this.match.slice(n));
                },
                pastInput: function() {
                    var past = this.matched.substr(0, this.matched.length - this.match.length);
                    return (past.length > 20 ? '...' : '') + past.substr(-20).replace(/
    /g, '');
                },
                upcomingInput: function() {
                    var next = this.match;
                    if (next.length < 20) {
                        next += this._input.substr(0, 20 - next.length);
                    }
                    return (next.substr(0, 20) + (next.length > 20 ? '...' : '')).replace(/
    /g, '');
                },
                showPosition: function() {
                    var pre = this.pastInput();
                    var c = new Array(pre.length + 1).join('-');
                    return pre + this.upcomingInput() + '
    ' + c + '^';
                },
                test_match: function(match, indexed_rule) {
                    var token, lines, backup;
    
                    if (this.options.backtrack_lexer) {
                        // save context
                        backup = {
                            yylineno: this.yylineno,
                            yylloc: {
                                first_line: this.yylloc.first_line,
                                last_line: this.last_line,
                                first_column: this.yylloc.first_column,
                                last_column: this.yylloc.last_column
                            },
                            yytext: this.yytext,
                            match: this.match,
                            matches: this.matches,
                            matched: this.matched,
                            yyleng: this.yyleng,
                            offset: this.offset,
                            _more: this._more,
                            _input: this._input,
                            yy: this.yy,
                            conditionStack: this.conditionStack.slice(0),
                            done: this.done
                        };
                        if (this.options.ranges) {
                            backup.yylloc.range = this.yylloc.range.slice(0);
                        }
                    }
    
                    lines = match[0].match(/(?:
    ?|
    ).*/g);
                    if (lines) {
                        this.yylineno += lines.length;
                    }
                    this.yylloc = {
                        first_line: this.yylloc.last_line,
                        last_line: this.yylineno + 1,
                        first_column: this.yylloc.last_column,
                        last_column: lines
                            ? lines[lines.length - 1].length - lines[lines.length - 1].match(/
    ?
    ?/)[0].length
                            : this.yylloc.last_column + match[0].length
                    };
                    this.yytext += match[0];
                    this.match += match[0];
                    this.matches = match;
                    this.yyleng = this.yytext.length;
                    if (this.options.ranges) {
                        this.yylloc.range = [this.offset, (this.offset += this.yyleng)];
                    }
                    this._more = false;
                    this._backtrack = false;
                    this._input = this._input.slice(match[0].length);
                    this.matched += match[0];
                    token = this.performAction.call(
                        this,
                        this.yy,
                        this,
                        indexed_rule,
                        this.conditionStack[this.conditionStack.length - 1]
                    );
                    if (this.done && this._input) {
                        this.done = false;
                    }
                    if (token) {
                        return token;
                    } else if (this._backtrack) {
                        for (var k in backup) {
                            this[k] = backup[k];
                        }
                        return false;
                    }
                    return false;
                },
                next: function() {
                    if (this.done) {
                        return this.EOF;
                    }
                    if (!this._input) {
                        this.done = true;
                    }
    
                    var token, match, tempMatch, index;
                    if (!this._more) {
                        this.yytext = '';
                        this.match = '';
                    }
                    //获取词法规则
                    //没看出来conditionStack有什么用,可能跟我的文法定义比较简单,没有使用一些内置的begin,pushState, popState之类的有点关系
                    var rules = this._currentRules();
                    for (var i = 0; i < rules.length; i++) {
                        tempMatch = this._input.match(this.rules[rules[i]]);
                        if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
                            match = tempMatch;
                            index = i;
                            //backtrack_lexer,会先尝试返回token,如果想尝试下面的规则的话,通过reject()方法
                            if (this.options.backtrack_lexer) {
                                token = this.test_match(tempMatch, rules[i]);
                                if (token !== false) {
                                    return token;
                                } else if (this._backtrack) {
                                    match = false;
                                    continue;
                                } else {
                                    return false;
                                }
                            //flex 设置true 会匹配符合正则多的文本
                            //flex 设置false 根据文法顺序返回结果
                            } else if (!this.options.flex) {
                                break;
                            }
                        }
                    }
                    if (match) {
                        token = this.test_match(match, rules[index]);
                        if (token !== false) {
                            return token;
                        }
                        return false;
                    }
                    if (this._input === '') {
                        return this.EOF;
                    } else {
                        //输入字符串不存在词法对应
                        return this.parseError(
                            'Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.
    ' + this.showPosition(),
                            {
                                text: '',
                                token: null,
                                line: this.yylineno
                            }
                        );
                    }
                },
                lex: function lex() {
                    var r = this.next();
                    if (r) {
                        return r;
                    } else {
                        return this.lex();
                    }
                },
                begin: function begin(condition) {
                    this.conditionStack.push(condition);
                },
                popState: function popState() {
                    var n = this.conditionStack.length - 1;
                    if (n > 0) {
                        return this.conditionStack.pop();
                    } else {
                        return this.conditionStack[0];
                    }
                },
                _currentRules: function _currentRules() {
                    if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) {
                        return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules;
                    } else {
                        return this.conditions['INITIAL'].rules;
                    }
                },
                topState: function topState(n) {
                    n = this.conditionStack.length - 1 - Math.abs(n || 0);
                    if (n >= 0) {
                        return this.conditionStack[n];
                    } else {
                        return 'INITIAL';
                    }
                },
                pushState: function pushState(condition) {
                    this.begin(condition);
                },
                stateStackSize: function stateStackSize() {
                    return this.conditionStack.length;
                },
                options: {},
                performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) {
                    var YYSTATE = YY_START;
                    switch ($avoiding_name_collisions) {
                        case 0 /* skip whitespace */:
                            break;
                        case 1:
                            return 4;
                            break;
                        case 2:
                            return '+';
                            break;
                    }
                },
                //词法中的规则
                rules: [
                    /^(?:s+)/, 
                    /^(?:[0-9]+)/, 
                    /^(?:+)/
                ],
                conditions: { 
                    INITIAL: { rules: [0, 1, 2], inclusive: true } 
                }
            };
            return lexer;
        })();
        parser.lexer = lexer;
        function Parser() {
            this.yy = {};
        }
        Parser.prototype = parser;
        parser.Parser = Parser;
        return new Parser();
    })();

         

  • 相关阅读:
    oracle常用命令(比较常见好用)
    vim编辑器详解
    对话框和打印控件
    winform弹出唯一的窗体
    ListView
    菜单栏和布局
    窗体属性和公共控件
    ASPCMS和WPF
    MVC
    正则表达式
  • 原文地址:https://www.cnblogs.com/legu/p/11320976.html
Copyright © 2020-2023  润新知