s
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE); BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree); List<HiveSemanticAnalyzerHook> saHooks = getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, HiveSemanticAnalyzerHook.class);
SemanticAnalyzerFactory.get(queryState, tree);
if (saHooks != null && !saHooks.isEmpty()) { HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl(); hookCtx.setConf(conf); hookCtx.setUserName(userName); hookCtx.setIpAddress(SessionState.get().getUserIpAddress()); hookCtx.setCommand(command); for (HiveSemanticAnalyzerHook hook : saHooks) { tree = hook.preAnalyze(hookCtx, tree); } sem.analyze(tree, ctx); hookCtx.update(sem); for (HiveSemanticAnalyzerHook hook : saHooks) { hook.postAnalyze(hookCtx, sem.getAllRootTasks()); } } else { sem.analyze(tree, ctx); } // Record any ACID compliant FileSinkOperators we saw so we can add our transaction ID to // them later. acidSinks = sem.getAcidFileSinks(); LOG.info("Semantic Analysis Completed"); // validate the plan sem.validate(); acidInQuery = sem.hasAcidInQuery();
if (saHooks != null && !saHooks.isEmpty()) { HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl(); hookCtx.setConf(conf); hookCtx.setUserName(userName); hookCtx.setIpAddress(SessionState.get().getUserIpAddress()); hookCtx.setCommand(command); for (HiveSemanticAnalyzerHook hook : saHooks) { tree = hook.preAnalyze(hookCtx, tree); } sem.analyze(tree, ctx); hookCtx.update(sem); for (HiveSemanticAnalyzerHook hook : saHooks) { hook.postAnalyze(hookCtx, sem.getAllRootTasks()); } } else { sem.analyze(tree, ctx); } // Record any ACID compliant FileSinkOperators we saw so we can add our transaction ID to // them later. acidSinks = sem.getAcidFileSinks(); LOG.info("Semantic Analysis Completed"); // validate the plan sem.validate(); acidInQuery = sem.hasAcidInQuery();
public static BaseSemanticAnalyzer get(QueryState queryState, ASTNode tree) throws SemanticException { if (tree.getToken() == null) { throw new RuntimeException("Empty Syntax Tree"); } else { HiveOperation opType = commandType.get(tree.getType()); queryState.setCommandType(opType); switch (tree.getType()) { case HiveParser.TOK_EXPLAIN: return new ExplainSemanticAnalyzer(queryState); case HiveParser.TOK_EXPLAIN_SQ_REWRITE: return new ExplainSQRewriteSemanticAnalyzer(queryState); case HiveParser.TOK_LOAD: return new LoadSemanticAnalyzer(queryState); case HiveParser.TOK_EXPORT: return new ExportSemanticAnalyzer(queryState); case HiveParser.TOK_IMPORT: return new ImportSemanticAnalyzer(queryState); case HiveParser.TOK_REPL_DUMP: return new ReplicationSemanticAnalyzer(queryState); case HiveParser.TOK_REPL_LOAD: return new ReplicationSemanticAnalyzer(queryState); case HiveParser.TOK_REPL_STATUS: return new ReplicationSemanticAnalyzer(queryState); case HiveParser.TOK_ALTERTABLE: { Tree child = tree.getChild(1); switch (child.getType()) { case HiveParser.TOK_ALTERTABLE_RENAME: case HiveParser.TOK_ALTERTABLE_TOUCH: case HiveParser.TOK_ALTERTABLE_ARCHIVE: case HiveParser.TOK_ALTERTABLE_UNARCHIVE: case HiveParser.TOK_ALTERTABLE_ADDCOLS: case HiveParser.TOK_ALTERTABLE_RENAMECOL: case HiveParser.TOK_ALTERTABLE_REPLACECOLS: case HiveParser.TOK_ALTERTABLE_DROPPARTS: case HiveParser.TOK_ALTERTABLE_ADDPARTS: case HiveParser.TOK_ALTERTABLE_PARTCOLTYPE: case HiveParser.TOK_ALTERTABLE_PROPERTIES: case HiveParser.TOK_ALTERTABLE_DROPPROPERTIES: case HiveParser.TOK_ALTERTABLE_EXCHANGEPARTITION: case HiveParser.TOK_ALTERTABLE_SKEWED: case HiveParser.TOK_ALTERTABLE_DROPCONSTRAINT: case HiveParser.TOK_ALTERTABLE_ADDCONSTRAINT: queryState.setCommandType(commandType.get(child.getType())); return new DDLSemanticAnalyzer(queryState); } opType = tablePartitionCommandType.get(child.getType())[tree.getChildCount() > 2 ? 1 : 0]; queryState.setCommandType(opType); return new DDLSemanticAnalyzer(queryState); }
dd
public void analyze(ASTNode ast, Context ctx) throws SemanticException { initCtx(ctx); init(true); analyzeInternal(ast); }
ss
analyzeInternal(ast);
boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { ASTNode child = ast; this.ast = ast; viewsExpanded = new ArrayList<String>(); ctesExpanded = new ArrayList<String>(); // 1. analyze and process the position alias processPositionAlias(ast); // 2. analyze create table command if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { // if it is not CTAS, we don't need to go further and just return if ((child = analyzeCreateTable(ast, qb, plannerCtx)) == null) { return false; } } else { queryState.setCommandType(HiveOperation.QUERY); } // 3. analyze create view command if (ast.getToken().getType() == HiveParser.TOK_CREATEVIEW || ast.getToken().getType() == HiveParser.TOK_CREATE_MATERIALIZED_VIEW || (ast.getToken().getType() == HiveParser.TOK_ALTERVIEW && ast.getChild(1).getType() == HiveParser.TOK_QUERY)) { child = analyzeCreateView(ast, qb, plannerCtx); if (child == null) { return false; } viewSelect = child; // prevent view from referencing itself viewsExpanded.add(createVwDesc.getViewName()); } switch(ast.getToken().getType()) { case HiveParser.TOK_SET_AUTOCOMMIT: assert ast.getChildCount() == 1; if(ast.getChild(0).getType() == HiveParser.TOK_TRUE) { setAutoCommitValue(true); } else if(ast.getChild(0).getType() == HiveParser.TOK_FALSE) { setAutoCommitValue(false); } else { assert false : "Unexpected child of TOK_SET_AUTOCOMMIT: " + ast.getChild(0).getType(); } //fall through case HiveParser.TOK_START_TRANSACTION: case HiveParser.TOK_COMMIT: case HiveParser.TOK_ROLLBACK: if(!(conf.getBoolVar(ConfVars.HIVE_IN_TEST) || conf.getBoolVar(ConfVars.HIVE_IN_TEZ_TEST))) { throw new IllegalStateException(SemanticAnalyzerFactory.getOperation(ast.getToken().getType()) + " is not supported yet."); } queryState.setCommandType(SemanticAnalyzerFactory.getOperation(ast.getToken().getType())); return false; }
dd
/** * Phase 1: (including, but not limited to): * * 1. Gets all the aliases for all the tables / subqueries and makes the * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the * destination and names the clause "inclause" + i 3. Creates a map from a * string representation of an aggregation tree to the actual aggregation AST * 4. Creates a mapping from the clause name to the select expression AST in * destToSelExpr 5. Creates a mapping from a table alias to the lateral view * AST's in aliasToLateralViews * * @param ast * @param qb * @param ctx_1 * @throws SemanticException */ @SuppressWarnings({"fallthrough", "nls"}) public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) throws SemanticException { boolean phase1Result = true; QBParseInfo qbp = qb.getParseInfo(); boolean skipRecursion = false; if (ast.getToken() != null) { skipRecursion = true; switch (ast.getToken().getType()) { case HiveParser.TOK_SELECTDI: qb.countSelDi(); // fall through case HiveParser.TOK_SELECT: qb.countSel(); qbp.setSelExprForClause(ctx_1.dest, ast); int posn = 0; if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) { qbp.setHints((ASTNode) ast.getChild(0)); posn++; } if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) queryProperties.setUsesScript(true); LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest); doPhase1GetColumnAliasesFromSelect(ast, qbp); qbp.setAggregationExprsForClause(ctx_1.dest, aggregations); qbp.setDistinctFuncExprsForClause(ctx_1.dest, doPhase1GetDistinctFuncExprs(aggregations)); break; case HiveParser.TOK_WHERE: qbp.setWhrExprForClause(ctx_1.dest, ast); if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty()) queryProperties.setFilterWithSubQuery(true); break; case HiveParser.TOK_INSERT_INTO: String currentDatabase = SessionState.get().getCurrentDatabase(); String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase); qbp.addInsertIntoTable(tab_name, ast); case HiveParser.TOK_DESTINATION: ctx_1.dest = this.ctx.getDestNamePrefix(ast).toString() + ctx_1.nextNum; ctx_1.nextNum++; boolean isTmpFileDest = false; if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) { ASTNode ch = (ASTNode) ast.getChild(0); if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0 && ch.getChild(0) instanceof ASTNode) { ch = (ASTNode) ch.getChild(0); isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE; } else { if (ast.getToken().getType() == HiveParser.TOK_DESTINATION && ast.getChild(0).getType() == HiveParser.TOK_TAB) { String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), SessionState.get().getCurrentDatabase()); qbp.getInsertOverwriteTables().put(fullTableName, ast); } } } // is there a insert in the subquery if (qbp.getIsSubQ() && !isTmpFileDest) { throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast)); } qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); handleInsertStatementSpecPhase1(ast, qbp, ctx_1); if (qbp.getClauseNamesForDest().size() == 2) { // From the moment that we have two destination clauses, // we know that this is a multi-insert query. // Thus, set property to right value. // Using qbp.getClauseNamesForDest().size() >= 2 would be // equivalent, but we use == to avoid setting the property // multiple times queryProperties.setMultiDestQuery(true); } if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) { plannerCtx.setInsertToken(ast, isTmpFileDest); } else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) { // For multi-insert query, currently we only optimize the FROM clause. // Hence, introduce multi-insert token on top of it. // However, first we need to reset existing token (insert). // Using qbp.getClauseNamesForDest().size() >= 2 would be // equivalent, but we use == to avoid setting the property // multiple times plannerCtx.resetToken(); plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0)); } break; case HiveParser.TOK_FROM: int child_count = ast.getChildCount(); if (child_count != 1) { throw new SemanticException(generateErrorMessage(ast, "Multiple Children " + child_count)); } if (!qbp.getIsSubQ()) { qbp.setQueryFromExpr(ast); } // Check if this is a subquery / lateral view ASTNode frm = (ASTNode) ast.getChild(0); if (frm.getToken().getType() == HiveParser.TOK_TABREF) { processTable(qb, frm); } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) { // Create a temp table with the passed values in it then rewrite this portion of the // tree to be from that table. ASTNode newFrom = genValuesTempTable(frm, qb); ast.setChild(0, newFrom); processTable(qb, newFrom); } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) { processSubQuery(qb, frm); } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) { queryProperties.setHasLateralViews(true); processLateralView(qb, frm); } else if (isJoinToken(frm)) { processJoin(qb, frm); qbp.setJoinExpr(frm); }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){ queryProperties.setHasPTF(true); processPTF(qb, frm); } break; case HiveParser.TOK_CLUSTERBY: // Get the clusterby aliases - these are aliased to the entries in the // select list queryProperties.setHasClusterBy(true); qbp.setClusterByExprForClause(ctx_1.dest, ast); break; case HiveParser.TOK_DISTRIBUTEBY: // Get the distribute by aliases - these are aliased to the entries in // the // select list queryProperties.setHasDistributeBy(true); qbp.setDistributeByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg())); } else if (qbp.getOrderByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg())); } break; case HiveParser.TOK_SORTBY: // Get the sort by aliases - these are aliased to the entries in the // select list queryProperties.setHasSortBy(true); qbp.setSortByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg())); } else if (qbp.getOrderByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg())); } break; case HiveParser.TOK_ORDERBY: // Get the order by aliases - these are aliased to the entries in the // select list queryProperties.setHasOrderBy(true); qbp.setOrderByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg())); } break; case HiveParser.TOK_GROUPBY: case HiveParser.TOK_ROLLUP_GROUPBY: case HiveParser.TOK_CUBE_GROUPBY: case HiveParser.TOK_GROUPING_SETS: // Get the groupby aliases - these are aliased to the entries in the // select list queryProperties.setHasGroupBy(true); if (qbp.getJoinExpr() != null) { queryProperties.setHasJoinFollowedByGroupBy(true); } if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { throw new SemanticException(generateErrorMessage(ast, ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg())); } qbp.setGroupByExprForClause(ctx_1.dest, ast); skipRecursion = true; // Rollup and Cubes are syntactic sugar on top of grouping sets if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) { qbp.getDestRollups().add(ctx_1.dest); } else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) { qbp.getDestCubes().add(ctx_1.dest); } else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) { qbp.getDestGroupingSets().add(ctx_1.dest); } break; case HiveParser.TOK_HAVING: qbp.setHavingExprForClause(ctx_1.dest, ast); qbp.addAggregationExprsForClause(ctx_1.dest, doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest)); break; case HiveParser.KW_WINDOW: if (!qb.hasWindowingSpec(ctx_1.dest) ) { throw new SemanticException(generateErrorMessage(ast, "Query has no Cluster/Distribute By; but has a Window definition")); } handleQueryWindowClauses(qb, ctx_1, ast); break; case HiveParser.TOK_LIMIT: if (ast.getChildCount() == 2) { qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText()), new Integer(ast.getChild(1).getText())); } else { qbp.setDestLimit(ctx_1.dest, new Integer(0), new Integer(ast.getChild(0).getText())); } break; case HiveParser.TOK_ANALYZE: // Case of analyze command String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase(); qb.setTabAlias(table_name, table_name); qb.addAlias(table_name); qb.getParseInfo().setIsAnalyzeCommand(true); qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan); qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan); // Allow analyze the whole table and dynamic partitions HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict"); break; case HiveParser.TOK_UNIONALL: if (!qbp.getIsSubQ()) { // this shouldn't happen. The parser should have converted the union to be // contained in a subquery. Just in case, we keep the error as a fallback. throw new SemanticException(generateErrorMessage(ast, ErrorMsg.UNION_NOTIN_SUBQ.getMsg())); } skipRecursion = false; break; case HiveParser.TOK_INSERT: ASTNode destination = (ASTNode) ast.getChild(0); Tree tab = destination.getChild(0); // Proceed if AST contains partition & If Not Exists if (destination.getChildCount() == 2 && tab.getChildCount() == 2 && destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) { String tableName = tab.getChild(0).getChild(0).getText(); Tree partitions = tab.getChild(1); int childCount = partitions.getChildCount(); HashMap<String, String> partition = new HashMap<String, String>(); for (int i = 0; i < childCount; i++) { String partitionName = partitions.getChild(i).getChild(0).getText(); Tree pvalue = partitions.getChild(i).getChild(1); if (pvalue == null) { break; } String partitionVal = stripQuotes(pvalue.getText()); partition.put(partitionName, partitionVal); } // if it is a dynamic partition throw the exception if (childCount != partition.size()) { throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS .getMsg(partition.toString())); } Table table = null; try { table = this.getTableObjectByName(tableName); } catch (HiveException ex) { throw new SemanticException(ex); } try { Partition parMetaData = db.getPartition(table, partition, false); // Check partition exists if it exists skip the overwrite if (parMetaData != null) { phase1Result = false; skipRecursion = true; LOG.info("Partition already exists so insert into overwrite " + "skipped for partition : " + parMetaData.toString()); break; } } catch (HiveException e) { LOG.info("Error while getting metadata : ", e); } validatePartSpec(table, partition, (ASTNode)tab, conf, false); } skipRecursion = false; break; case HiveParser.TOK_LATERAL_VIEW: case HiveParser.TOK_LATERAL_VIEW_OUTER: // todo: nested LV assert ast.getChildCount() == 1; qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast); break; case HiveParser.TOK_CTE: processCTE(qb, ast); break; default: skipRecursion = false; break; } } if (!skipRecursion) { // Iterate over the rest of the children int child_count = ast.getChildCount(); for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) { // Recurse phase1Result = phase1Result && doPhase1( (ASTNode)ast.getChild(child_pos), qb, ctx_1, plannerCtx); } } return phase1Result; }