/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hadoop.hive.ql.optimizer;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.math.DoubleMath;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.Stack;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo;
import org.apache.hadoop.hive.ql.exec.MuxOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.BigTableSelectorForAutoSMJ;
import org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor;
import org.apache.hadoop.hive.ql.optimizer.TezBucketJoinProcCtx;
import org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile;
import org.apache.hadoop.hive.ql.parse.GenTezUtils;
import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc;
import org.apache.hadoop.hive.ql.plan.CustomBucketFunction;
import org.apache.hadoop.hive.ql.plan.DummyStoreDesc;
import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OpTraits;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ConvertJoinMapJoin
implements SemanticNodeProcessor {
    private static final Logger LOG = LoggerFactory.getLogger((String)ConvertJoinMapJoin.class.getName());
    private static final int DEFAULT_MAX_EXECUTORS_PER_QUERY_CONTAINER_MODE = 3;
    public float hashTableLoadFactor;
    private long maxJoinMemory;
    private HashMapDataStructureType hashMapDataStructure;
    private boolean fastHashTableAvailable;

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object ... nodeOutputs) throws SemanticException {
        OptimizeTezProcContext context = (OptimizeTezProcContext)procCtx;
        this.hashTableLoadFactor = context.conf.getFloatVar(HiveConf.ConfVars.HIVE_HASHTABLE_LOAD_FACTOR);
        this.fastHashTableAvailable = context.conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
        JoinOperator joinOp = (JoinOperator)nd;
        LlapClusterStateForCompile llapInfo = null;
        if ("llap".equalsIgnoreCase(context.conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
            llapInfo = LlapClusterStateForCompile.getClusterInfo((Configuration)context.conf);
            llapInfo.initClusterInfo();
        }
        MemoryMonitorInfo memoryMonitorInfo = this.getMemoryMonitorInfo(context.conf, llapInfo);
        ((JoinDesc)joinOp.getConf()).setMemoryMonitorInfo(memoryMonitorInfo);
        this.maxJoinMemory = memoryMonitorInfo.getAdjustedNoConditionalTaskSize();
        LOG.info("maxJoinMemory: {}", (Object)this.maxJoinMemory);
        this.hashMapDataStructure = HashMapDataStructureType.of((JoinDesc)joinOp.getConf());
        TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
        boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN) & !context.parseContext.getDisableMapJoin();
        if (!hiveConvertJoin) {
            Object retval = this.checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
            if (retval == null) {
                return retval;
            }
            this.fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
        int numBuckets = -1;
        numBuckets = context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ) ? ConvertJoinMapJoin.estimateNumBuckets(joinOp, true) : 1;
        LOG.info("Estimated number of buckets " + numBuckets);
        MapJoinConversion mapJoinConversion = this.getMapJoinConversion(joinOp, context, numBuckets, false, this.maxJoinMemory, true);
        if (mapJoinConversion == null) {
            Object retval = this.checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
            if (retval == null) {
                return retval;
            }
            this.fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
        if (numBuckets > 1 && context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ) && (llapInfo != null ? this.selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversion, numBuckets) : this.convertJoinBucketMapJoin(joinOp, context, mapJoinConversion, tezBucketJoinProcCtx))) {
            return null;
        }
        LOG.info("Convert to non-bucketed map join");
        if (numBuckets != 1) {
            mapJoinConversion = this.getMapJoinConversion(joinOp, context, 1, false, this.maxJoinMemory, true);
        }
        if (mapJoinConversion == null) {
            this.fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
        if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) {
            this.fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
        MapJoinOperator mapJoinOp = this.convertJoinMapJoin(joinOp, context, mapJoinConversion, true);
        if (mapJoinOp == null) {
            this.fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
        mapJoinOp.setOpTraits(new OpTraits(null, null, -1, null, joinOp.getOpTraits().getNumReduceSinks()));
        this.preserveOperatorInfos(mapJoinOp, joinOp, context);
        for (Operator<OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
            this.setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
        }
        return null;
    }

    private boolean selectJoinForLlap(OptimizeTezProcContext context, JoinOperator joinOp, TezBucketJoinProcCtx tezBucketJoinProcCtx, LlapClusterStateForCompile llapInfo, MapJoinConversion mapJoinConversion, int numBuckets) throws SemanticException {
        if (!context.conf.getBoolVar(HiveConf.ConfVars.HIVE_DYNAMIC_PARTITION_HASHJOIN) && numBuckets > 1) {
            return this.convertJoinBucketMapJoin(joinOp, context, mapJoinConversion, tezBucketJoinProcCtx);
        }
        int numExecutorsPerNode = -1;
        if (llapInfo.hasClusterInfo()) {
            numExecutorsPerNode = llapInfo.getNumExecutorsPerNode();
        }
        if (numExecutorsPerNode == -1) {
            numExecutorsPerNode = context.conf.getIntVar(HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS);
        }
        int numNodes = llapInfo.getKnownExecutorCount() / numExecutorsPerNode;
        LOG.debug("Number of nodes = " + numNodes + ". Number of Executors per node = " + numExecutorsPerNode);
        int mapJoinConversionPos = mapJoinConversion.getBigTablePos();
        long totalSize = 0L;
        for (int pos = 0; pos < joinOp.getParentOperators().size(); ++pos) {
            if (pos == mapJoinConversionPos) continue;
            Operator<OperatorDesc> parentOp = joinOp.getParentOperators().get(pos);
            totalSize += this.computeOnlineDataSize(parentOp.getStatistics());
        }
        long bigTableSize = this.computeOnlineDataSize(joinOp.getParentOperators().get(mapJoinConversionPos).getStatistics());
        long networkCostDPHJ = totalSize + bigTableSize;
        LOG.info("Cost of dynamically partitioned hash join : total small table size = " + totalSize + " bigTableSize = " + bigTableSize + "networkCostDPHJ = " + networkCostDPHJ);
        long networkCostMJ = (long)numNodes * totalSize;
        LOG.info("Cost of Bucket Map Join : numNodes = " + numNodes + " total small table size = " + totalSize + " networkCostMJ = " + networkCostMJ);
        if (totalSize <= this.maxJoinMemory) {
            return false;
        }
        if (networkCostDPHJ < networkCostMJ) {
            LOG.info("Dynamically partitioned Hash Join chosen");
            return this.convertJoinDynamicPartitionedHashJoin(joinOp, context);
        }
        if (numBuckets > 1) {
            LOG.info("Bucket Map Join chosen");
            return this.convertJoinBucketMapJoin(joinOp, context, mapJoinConversion, tezBucketJoinProcCtx);
        }
        LOG.info("Falling back to mapjoin no bucket scaling");
        return false;
    }

    public long computeOnlineDataSize(Statistics statistics) {
        if (this.fastHashTableAvailable) {
            return this.computeOnlineDataSizeFast(statistics);
        }
        return this.computeOnlineDataSizeOptimized(statistics);
    }

    public long computeOnlineDataSizeFast(Statistics statistics) {
        switch (this.hashMapDataStructure.ordinal()) {
            case 1: {
                return this.computeOnlineDataSizeFastLongKeyed(statistics);
            }
            case 0: {
                return this.computeOnlineDataSizeFastCompositeKeyed(statistics);
            }
        }
        throw new RuntimeException("invalid mode");
    }

    public long computeOnlineDataSizeFastLongKeyed(Statistics statistics) {
        return this.computeOnlineDataSizeGeneric(statistics, -8L, 16L);
    }

    public long computeOnlineDataSizeFastCompositeKeyed(Statistics statistics) {
        return this.computeOnlineDataSizeGeneric(statistics, 9L, 8L);
    }

    public long computeOnlineDataSizeOptimized(Statistics statistics) {
        return this.computeOnlineDataSizeGeneric(statistics, 12L, 8L);
    }

    public long computeOnlineDataSizeGeneric(Statistics statistics, long overHeadPerRow, long overHeadPerSlot) {
        long onlineDataSize = 0L;
        long numRows = statistics.getNumRows();
        if (numRows <= 0L) {
            numRows = 1L;
        }
        long worstCaseNeededSlots = 1L << DoubleMath.log2((double)((float)numRows / this.hashTableLoadFactor), (RoundingMode)RoundingMode.UP);
        onlineDataSize += statistics.getDataSize() - ConvertJoinMapJoin.hashTableDataSizeAdjustment(numRows, statistics.getColumnStats());
        onlineDataSize += overHeadPerRow * statistics.getNumRows();
        return onlineDataSize += overHeadPerSlot * worstCaseNeededSlots;
    }

    private static long hashTableDataSizeAdjustment(long numRows, List<ColStatistics> colStats) {
        long result = 0L;
        if (numRows <= 0L || colStats == null || colStats.isEmpty()) {
            return result;
        }
        for (ColStatistics cs : colStats) {
            if (cs == null) continue;
            String colTypeLowerCase = cs.getColumnType().toLowerCase();
            long nonNullCount = cs.getNumNulls() > 0L ? numRows - cs.getNumNulls() + 1L : numRows;
            double overhead = 0.0;
            if (colTypeLowerCase.equals("string") || colTypeLowerCase.startsWith("varchar") || colTypeLowerCase.startsWith("char")) {
                overhead = JavaDataModel.get().lengthForStringOfLength(0);
            } else if (colTypeLowerCase.equals("binary")) {
                overhead = JavaDataModel.get().lengthForByteArrayOfSize(0L);
            } else if (colTypeLowerCase.equals("timestamp") || colTypeLowerCase.equals("timestamp with local time zone") || colTypeLowerCase.startsWith("decimal") || colTypeLowerCase.equals("date")) {
                overhead = JavaDataModel.get().object();
            }
            result = StatsUtils.safeAdd(StatsUtils.safeMult(nonNullCount, overhead), result);
        }
        return result;
    }

    @VisibleForTesting
    public MemoryMonitorInfo getMemoryMonitorInfo(HiveConf conf, LlapClusterStateForCompile llapInfo) {
        MemoryMonitorInfo memoryMonitorInfo;
        long maxSize = conf.getLongVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_NOCONDITIONAL_TASK_THRESHOLD);
        double overSubscriptionFactor = conf.getFloatVar(HiveConf.ConfVars.LLAP_MAPJOIN_MEMORY_OVERSUBSCRIBE_FACTOR);
        int maxSlotsPerQuery = this.getMaxSlotsPerQuery(conf, llapInfo);
        long memoryCheckInterval = conf.getLongVar(HiveConf.ConfVars.LLAP_MAPJOIN_MEMORY_MONITOR_CHECK_INTERVAL);
        float inflationFactor = conf.getFloatVar(HiveConf.ConfVars.HIVE_HASH_TABLE_INFLATION_FACTOR);
        if (llapInfo != null) {
            int executorsPerNode;
            if (!llapInfo.hasClusterInfo()) {
                LOG.warn("LLAP cluster information not available. Falling back to getting #executors from hiveconf..");
                executorsPerNode = conf.getIntVar(HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS);
            } else {
                int numExecutorsPerNodeFromCluster = llapInfo.getNumExecutorsPerNode();
                if (numExecutorsPerNodeFromCluster == -1) {
                    LOG.warn("Cannot determine executor count from LLAP cluster information. Falling back to getting #executors from hiveconf..");
                    executorsPerNode = conf.getIntVar(HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS);
                } else {
                    executorsPerNode = numExecutorsPerNodeFromCluster;
                }
            }
            int slotsPerQuery = Math.min(maxSlotsPerQuery, executorsPerNode);
            long llapMaxSize = (long)((double)maxSize + (double)maxSize * overSubscriptionFactor * (double)slotsPerQuery);
            long adjustedMaxSize = Math.max(maxSize, llapMaxSize);
            memoryMonitorInfo = new MemoryMonitorInfo(true, executorsPerNode, maxSlotsPerQuery, overSubscriptionFactor, maxSize, adjustedMaxSize, memoryCheckInterval, inflationFactor);
        } else {
            memoryMonitorInfo = new MemoryMonitorInfo(false, 1, maxSlotsPerQuery, overSubscriptionFactor, maxSize, maxSize, memoryCheckInterval, inflationFactor);
        }
        LOG.info("Memory monitor info set to : {}", (Object)memoryMonitorInfo);
        return memoryMonitorInfo;
    }

    private int getMaxSlotsPerQuery(HiveConf conf, LlapClusterStateForCompile llapInfo) {
        int maxExecutorsPerQuery = conf.getIntVar(HiveConf.ConfVars.LLAP_MEMORY_OVERSUBSCRIPTION_MAX_EXECUTORS_PER_QUERY);
        if (maxExecutorsPerQuery == -1) {
            maxExecutorsPerQuery = llapInfo == null ? 3 : Math.min(Math.max(1, llapInfo.getNumExecutorsPerNode() / 3), 8);
        }
        return maxExecutorsPerQuery;
    }

    private Object checkAndConvertSMBJoin(OptimizeTezProcContext context, JoinOperator joinOp, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
        if (!HiveConf.getBoolVar((Configuration)context.conf, (HiveConf.ConfVars)HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN) || !HiveConf.getBoolVar((Configuration)context.conf, (HiveConf.ConfVars)HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_REDUCE) && joinOp.getOpTraits().getNumReduceSinks() >= 2) {
            this.fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
        Class bigTableMatcherClass = null;
        try {
            String selector = HiveConf.getVar((Configuration)context.parseContext.getConf(), (HiveConf.ConfVars)HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
            bigTableMatcherClass = JavaUtils.loadClass((String)selector);
        }
        catch (ClassNotFoundException e) {
            throw new SemanticException(e.getMessage());
        }
        BigTableSelectorForAutoSMJ bigTableMatcher = (BigTableSelectorForAutoSMJ)ReflectionUtils.newInstance((Class)bigTableMatcherClass, null);
        JoinDesc joinDesc = (JoinDesc)joinOp.getConf();
        JoinCondDesc[] joinCondns = joinDesc.getConds();
        Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
        if (joinCandidates.isEmpty()) {
            return false;
        }
        int mapJoinConversionPos = bigTableMatcher.getBigTablePosition(context.parseContext, joinOp, joinCandidates);
        if (mapJoinConversionPos < 0) {
            this.fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
        if (this.checkConvertJoinSMBJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
            this.convertJoinSMBJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx.getNumBuckets(), true);
        } else {
            this.fallbackToReduceSideJoin(joinOp, context);
        }
        return null;
    }

    private void convertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext context, int mapJoinConversionPos, int numBuckets, boolean adjustParentsChildren) throws SemanticException {
        int pos;
        MapJoinDesc mapJoinDesc = null;
        if (adjustParentsChildren) {
            mapJoinDesc = MapJoinProcessor.getMapJoinDesc(context.conf, joinOp, ((JoinDesc)joinOp.getConf()).isLeftInputJoin(), ((JoinDesc)joinOp.getConf()).getBaseSrc(), ((JoinDesc)joinOp.getConf()).getMapAliases(), mapJoinConversionPos, true);
        } else {
            JoinDesc joinDesc = (JoinDesc)joinOp.getConf();
            mapJoinDesc = new MapJoinDesc((Map)MapJoinProcessor.getKeys(((JoinDesc)joinOp.getConf()).isLeftInputJoin(), ((JoinDesc)joinOp.getConf()).getBaseSrc(), joinOp).getRight(), null, joinDesc.getExprs(), null, null, joinDesc.getOutputColumnNames(), mapJoinConversionPos, joinDesc.getConds(), joinDesc.getFilters(), joinDesc.getNoOuterJoin(), null, joinDesc.getMemoryMonitorInfo(), joinDesc.getInMemoryDataSize());
            mapJoinDesc.setNullSafes(joinDesc.getNullSafes());
            mapJoinDesc.setFilterMap(joinDesc.getFilterMap());
            mapJoinDesc.setResidualFilterExprs(joinDesc.getResidualFilterExprs());
            mapJoinDesc.setColumnExprMap(joinDesc.getColumnExprMap());
            mapJoinDesc.setReversedExprs(joinDesc.getReversedExprs());
            mapJoinDesc.resetOrder();
        }
        CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator)OperatorFactory.get(joinOp.getCompilationOpContext(), new CommonMergeJoinDesc(numBuckets, mapJoinConversionPos, mapJoinDesc), joinOp.getSchema());
        context.parseContext.getContext().getPlanMapper().link(joinOp, mergeJoinOp);
        int numReduceSinks = joinOp.getOpTraits().getNumReduceSinks();
        OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), joinOp.getOpTraits().getCustomBucketFunctions(), numBuckets, joinOp.getOpTraits().getSortCols(), numReduceSinks);
        mergeJoinOp.setOpTraits(opTraits);
        ((CommonMergeJoinDesc)mergeJoinOp.getConf()).setBucketingVersion(((JoinDesc)joinOp.getConf()).getBucketingVersion());
        this.preserveOperatorInfos(mergeJoinOp, joinOp, context);
        for (Operator<OperatorDesc> parentOp : joinOp.getParentOperators()) {
            pos = parentOp.getChildOperators().indexOf(joinOp);
            parentOp.getChildOperators().remove(pos);
            parentOp.getChildOperators().add(pos, mergeJoinOp);
        }
        for (Operator<OperatorDesc> childOp : joinOp.getChildOperators()) {
            pos = childOp.getParentOperators().indexOf(joinOp);
            childOp.getParentOperators().remove(pos);
            childOp.getParentOperators().add(pos, mergeJoinOp);
        }
        List<Operator<OperatorDesc>> childOperators = mergeJoinOp.getChildOperators();
        List<Operator<OperatorDesc>> parentOperators = mergeJoinOp.getParentOperators();
        childOperators.clear();
        parentOperators.clear();
        childOperators.addAll(joinOp.getChildOperators());
        parentOperators.addAll(joinOp.getParentOperators());
        ((CommonMergeJoinDesc)mergeJoinOp.getConf()).setGenJoinKeys(false);
        if (adjustParentsChildren) {
            ((CommonMergeJoinDesc)mergeJoinOp.getConf()).setGenJoinKeys(true);
            ArrayList<Operator<OperatorDesc>> newParentOpList = new ArrayList<Operator<OperatorDesc>>();
            for (Operator<OperatorDesc> parentOp : mergeJoinOp.getParentOperators()) {
                for (Operator<OperatorDesc> grandParentOp : parentOp.getParentOperators()) {
                    grandParentOp.getChildOperators().remove(parentOp);
                    grandParentOp.getChildOperators().add(mergeJoinOp);
                    newParentOpList.add(grandParentOp);
                }
            }
            mergeJoinOp.getParentOperators().clear();
            mergeJoinOp.getParentOperators().addAll(newParentOpList);
            ArrayList<Operator<OperatorDesc>> parentOps = new ArrayList<Operator<OperatorDesc>>(mergeJoinOp.getParentOperators());
            for (Operator operator : parentOps) {
                GroupByOperator gpbyOp;
                int parentIndex = mergeJoinOp.getParentOperators().indexOf(operator);
                if (parentIndex == mapJoinConversionPos) continue;
                if (operator instanceof GroupByOperator && ((GroupByDesc)(gpbyOp = (GroupByOperator)operator).getConf()).getMode() == GroupByDesc.Mode.HASH) {
                    ((GroupByDesc)gpbyOp.getConf()).setMode(GroupByDesc.Mode.FINAL);
                }
                TezDummyStoreOperator dummyStoreOp = new TezDummyStoreOperator(mergeJoinOp.getCompilationOpContext());
                dummyStoreOp.setConf(new DummyStoreDesc());
                dummyStoreOp.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>());
                dummyStoreOp.setChildOperators(new ArrayList<Operator<? extends OperatorDesc>>());
                dummyStoreOp.getChildOperators().add(mergeJoinOp);
                int index = operator.getChildOperators().indexOf(mergeJoinOp);
                operator.getChildOperators().remove(index);
                operator.getChildOperators().add(index, dummyStoreOp);
                dummyStoreOp.getParentOperators().add(operator);
                mergeJoinOp.getParentOperators().remove(parentIndex);
                mergeJoinOp.getParentOperators().add(parentIndex, dummyStoreOp);
            }
        }
        mergeJoinOp.cloneOriginalParentsList(mergeJoinOp.getParentOperators());
    }

    private void setAllChildrenTraits(Operator<? extends OperatorDesc> currentOp, OpTraits opTraits) {
        if (currentOp instanceof ReduceSinkOperator) {
            return;
        }
        currentOp.setOpTraits(new OpTraits(opTraits.getBucketColNames(), opTraits.getCustomBucketFunctions(), opTraits.getNumBuckets(), opTraits.getSortCols(), opTraits.getNumReduceSinks()));
        for (Operator<OperatorDesc> childOp : currentOp.getChildOperators()) {
            if (childOp instanceof ReduceSinkOperator || childOp instanceof GroupByOperator) break;
            this.setAllChildrenTraits(childOp, opTraits);
        }
    }

    private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, MapJoinConversion mapJoinConversion, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
        MemoryMonitorInfo memoryMonitorInfo;
        MapJoinOperator mapJoinOp;
        if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) {
            return false;
        }
        int bigTablePosition = mapJoinConversion.getBigTablePos();
        if (!this.checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) {
            LOG.info("Check conversion to bucket map join failed.");
            return false;
        }
        ReduceSinkOperator bigTableRS = (ReduceSinkOperator)joinOp.getParentOperators().get(bigTablePosition);
        OpTraits opTraits = bigTableRS.getOpTraits();
        List<String> listBucketCols = opTraits.getBucketColNames().get(0);
        List<ExprNodeDesc> bigTablePartitionCols = ((ReduceSinkDesc)bigTableRS.getConf()).getPartitionCols();
        boolean updatePartitionCols = listBucketCols.size() != bigTablePartitionCols.size();
        ArrayList<Integer> positions = new ArrayList<Integer>();
        Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
        boolean[] retainedColumns = new boolean[listBucketCols.size()];
        block0: for (int bucketColIdx = 0; bucketColIdx < listBucketCols.size(); ++bucketColIdx) {
            for (int bigTablePartIdx = 0; bigTablePartIdx < bigTablePartitionCols.size(); ++bigTablePartIdx) {
                ExprNodeDesc bucketColExpr;
                ExprNodeDesc bigTablePartExpr = bigTablePartitionCols.get(bigTablePartIdx);
                if (!bigTablePartExpr.isSame(bucketColExpr = colExprMap.get(listBucketCols.get(bucketColIdx)))) continue;
                positions.add(bigTablePartIdx);
                retainedColumns[bucketColIdx] = true;
                updatePartitionCols = updatePartitionCols || bucketColIdx != bigTablePartIdx;
                continue block0;
            }
        }
        if (positions.size() < listBucketCols.size()) {
            return false;
        }
        CustomBucketFunction bucketFunction = opTraits.getCustomBucketFunctions().get(0);
        if (updatePartitionCols) {
            Preconditions.checkState((opTraits.getCustomBucketFunctions().size() == 1 ? 1 : 0) != 0);
            if (opTraits.getCustomBucketFunctions().get(0) != null) {
                Optional<CustomBucketFunction> selected = opTraits.getCustomBucketFunctions().get(0).select(retainedColumns);
                if (!selected.isPresent()) {
                    LOG.info("{} can't keep itself only with {}", (Object)opTraits.getCustomBucketFunctions().get(0), (Object)retainedColumns);
                    return false;
                }
                bucketFunction = selected.get();
            }
        }
        if ((mapJoinOp = this.convertJoinMapJoin(joinOp, context, mapJoinConversion, true)) == null) {
            LOG.debug("Conversion to bucket map join failed.");
            return false;
        }
        MapJoinDesc joinDesc = (MapJoinDesc)mapJoinOp.getConf();
        joinDesc.setBucketMapJoin(true);
        opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), joinOp.getOpTraits().getCustomBucketFunctions(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
        mapJoinOp.setOpTraits(opTraits);
        this.preserveOperatorInfos(mapJoinOp, joinOp, context);
        this.setNumberOfBucketsOnChildren(mapJoinOp);
        HashMap<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
        bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
        joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
        if (updatePartitionCols) {
            mapJoinOp.getParentOperators().stream().filter(ReduceSinkOperator.class::isInstance).forEach(op -> {
                ReduceSinkOperator rsOp = (ReduceSinkOperator)op;
                ArrayList<ExprNodeDesc> newPartitionCols = new ArrayList<ExprNodeDesc>();
                List<ExprNodeDesc> partitionCols = ((ReduceSinkDesc)rsOp.getConf()).getPartitionCols();
                for (Integer position : positions) {
                    newPartitionCols.add(partitionCols.get(position));
                }
                ((ReduceSinkDesc)rsOp.getConf()).setPartitionCols(newPartitionCols);
            });
        }
        if (bucketFunction != null) {
            Operator<OperatorDesc> bigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition);
            for (TableScanOperator tso : OperatorUtils.findOperatorsUpstream(bigTableOp, TableScanOperator.class)) {
                ((TableScanDesc)tso.getConf()).setGroupingPartitionColumns(bucketFunction.getSourceColumnNames());
                ((TableScanDesc)tso.getConf()).setGroupingNumBuckets(bucketFunction.getNumBuckets());
            }
            CustomBucketFunction finalBucketFunction = bucketFunction;
            mapJoinOp.getParentOperators().stream().filter(ReduceSinkOperator.class::isInstance).forEach(op -> {
                ReduceSinkOperator rsOp = (ReduceSinkOperator)op;
                ((ReduceSinkDesc)rsOp.getConf()).setCustomPartitionFunction(finalBucketFunction);
            });
        }
        if ((memoryMonitorInfo = joinDesc.getMemoryMonitorInfo()).isLlap()) {
            memoryMonitorInfo.setHashTableInflationFactor(1.0);
            memoryMonitorInfo.setMemoryOverSubscriptionFactor(0.0);
        }
        return true;
    }

    private void preserveOperatorInfos(Operator<?> newOp, Operator<?> oldOp, OptimizeTezProcContext context) {
        newOp.setStatistics(oldOp.getStatistics());
        context.parseContext.getContext().getPlanMapper().link(oldOp, newOp);
    }

    private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
        ReduceSinkOperator bigTableRS = (ReduceSinkOperator)joinOp.getParentOperators().get(bigTablePosition);
        int numBuckets = bigTableRS.getParentOperators().get(0).getOpTraits().getNumBuckets();
        int size = -1;
        boolean shouldCheckExternalTables = context.conf.getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS);
        StringBuilder sb = new StringBuilder();
        for (Operator<OperatorDesc> parentOp : joinOp.getParentOperators()) {
            if (shouldCheckExternalTables && !ConvertJoinMapJoin.canTableUseStats(parentOp, sb)) {
                LOG.debug("External table {} found in join and also could not provide statistics - disabling SMB join.", (Object)sb);
                return false;
            }
            if (parentOp.getParentOperators() != null) {
                for (Operator<OperatorDesc> grandParent : parentOp.getParentOperators()) {
                    if (!OperatorUtils.hasMoreOperatorsThan(grandParent, GroupByOperator.class, 1)) continue;
                    LOG.info("We cannot convert to SMB join because one of the join branches has more than one Group by operators in the same reducer.");
                    return false;
                }
            }
            Set<ReduceSinkOperator> set = OperatorUtils.findOperatorsUpstream(parentOp.getParentOperators(), ReduceSinkOperator.class);
            if (size < 0) {
                size = set.size();
                continue;
            }
            if (size > 0 && set.size() > 0 || size == 0 && set.size() == 0) continue;
            return false;
        }
        for (Operator<OperatorDesc> parentOp : joinOp.getParentOperators()) {
            if (!(parentOp instanceof ReduceSinkOperator)) {
                LOG.debug("Found correlation optimizer operators. Cannot convert to SMB at this time.");
                return false;
            }
            ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator)parentOp;
            if (reduceSinkOperator.getOpTraits().hasCustomBucketFunction()) {
                LOG.info("We don't support SMB with custom bucket functions yet");
                return false;
            }
            List<ExprNodeDesc> keyCols = ((ReduceSinkDesc)reduceSinkOperator.getConf()).getKeyCols();
            List<String> sortCols = reduceSinkOperator.getOpTraits().getSortCols().get(0);
            List<String> bucketCols = reduceSinkOperator.getOpTraits().getBucketColNames().get(0);
            if (sortCols.size() != keyCols.size() || bucketCols.size() != keyCols.size()) {
                return false;
            }
            for (int i = 0; i < sortCols.size(); ++i) {
                ExprNodeDesc sortCol = reduceSinkOperator.getColumnExprMap().get(sortCols.get(i));
                ExprNodeDesc bucketCol = reduceSinkOperator.getColumnExprMap().get(bucketCols.get(i));
                if (sortCol.isSame(keyCols.get(i)) && bucketCol.isSame(keyCols.get(i))) continue;
                return false;
            }
            OpTraits parentTraits = reduceSinkOperator.getParentOperators().get(0).getOpTraits();
            if (null == parentTraits) {
                return false;
            }
            if (!this.checkColEquality(parentTraits.getSortCols(), reduceSinkOperator.getOpTraits().getSortCols(), reduceSinkOperator.getColumnExprMap(), false)) {
                LOG.info("We cannot convert to SMB because the sort column names do not match.");
                return false;
            }
            if (this.checkColEquality(parentTraits.getBucketColNames(), reduceSinkOperator.getOpTraits().getBucketColNames(), reduceSinkOperator.getColumnExprMap(), true)) continue;
            LOG.info("We cannot convert to SMB because bucket column names do not match.");
            return false;
        }
        if (numBuckets < 0) {
            numBuckets = ((ReduceSinkDesc)bigTableRS.getConf()).getNumReducers();
        }
        tezBucketJoinProcCtx.setNumBuckets(numBuckets);
        int bucketingVersion = -1;
        for (Operator operator : joinOp.getParentOperators()) {
            assert (operator.getParentOperators() != null && operator.getParentOperators().size() == 1);
            Operator<?> op = OperatorUtils.findSourceOperatorInSameBranch(operator);
            if (!(op instanceof TableScanOperator)) continue;
            int localVersion = ((TableScanDesc)((TableScanOperator)op).getConf()).getTableMetadata().getBucketingVersion();
            if (bucketingVersion == -1) {
                bucketingVersion = localVersion;
                continue;
            }
            if (bucketingVersion == localVersion) continue;
            LOG.debug("SMB Join can't be performed due to bucketing version mismatch");
            return false;
        }
        Boolean prevRsHasUniformTrait = null;
        for (Operator<OperatorDesc> parentOp : joinOp.getParentOperators()) {
            Operator<?> op = OperatorUtils.findSourceOperatorInSameBranch(parentOp.getParentOperators().get(0));
            if (!(op instanceof ReduceSinkOperator)) continue;
            boolean hasUniformTrait = ((ReduceSinkDesc)((ReduceSinkOperator)op).getConf()).getReducerTraits().contains((Object)ReduceSinkDesc.ReducerTraits.UNIFORM);
            if (prevRsHasUniformTrait == null) {
                prevRsHasUniformTrait = hasUniformTrait;
                continue;
            }
            if (prevRsHasUniformTrait == hasUniformTrait) continue;
            LOG.debug("SMB Join can't be performed due to partition hash generator mismatch across join branches");
            return false;
        }
        LOG.info("We can convert the join to an SMB join.");
        return true;
    }

    private void setNumberOfBucketsOnChildren(Operator<? extends OperatorDesc> currentOp) {
        int numBuckets = currentOp.getOpTraits().getNumBuckets();
        for (Operator<OperatorDesc> op : currentOp.getChildOperators()) {
            if (op instanceof ReduceSinkOperator || op instanceof GroupByOperator) continue;
            op.getOpTraits().setNumBuckets(numBuckets);
            this.setNumberOfBucketsOnChildren(op);
        }
    }

    private boolean checkConvertJoinBucketMapJoin(JoinOperator joinOp, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) {
        CustomBucketFunction parentBucketFunction;
        int numBuckets;
        if (!(joinOp.getParentOperators().get(0) instanceof ReduceSinkOperator)) {
            LOG.info("Operator is " + joinOp.getParentOperators().get(0).getName() + ". Cannot convert to bucket map join");
            return false;
        }
        ReduceSinkOperator rs = (ReduceSinkOperator)joinOp.getParentOperators().get(bigTablePosition);
        List<List<String>> parentColNames = rs.getOpTraits().getBucketColNames();
        Operator<OperatorDesc> parentOfParent = rs.getParentOperators().get(0);
        List<List<String>> grandParentColNames = parentOfParent.getOpTraits().getBucketColNames();
        Preconditions.checkState((rs.getOpTraits().getCustomBucketFunctions() == null || rs.getOpTraits().getCustomBucketFunctions().size() == 1 ? 1 : 0) != 0);
        boolean hasCustomBucketFunction = rs.getOpTraits().hasCustomBucketFunction();
        if (!this.checkColEquality(grandParentColNames, parentColNames, rs.getColumnExprMap(), !hasCustomBucketFunction)) {
            LOG.info("No info available to check for bucket map join. Cannot convert");
            return false;
        }
        boolean shouldCheckExternalTables = tezBucketJoinProcCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS);
        if (shouldCheckExternalTables) {
            StringBuilder sb = new StringBuilder();
            for (Operator<OperatorDesc> parentOp : joinOp.getParentOperators()) {
                if (ConvertJoinMapJoin.canTableUseStats(parentOp, sb)) continue;
                LOG.debug("External table {} found in join and also could not provide statistics - disabling bucket map join.", (Object)sb);
                return false;
            }
        }
        int n = numBuckets = (parentBucketFunction = rs.getOpTraits().getCustomBucketFunctions().get(0)) != null ? parentBucketFunction.getNumBuckets() : parentOfParent.getOpTraits().getNumBuckets();
        if (numBuckets < 0) {
            numBuckets = ((ReduceSinkDesc)rs.getConf()).getNumReducers();
        }
        tezBucketJoinProcCtx.setNumBuckets(numBuckets);
        return true;
    }

    private boolean checkColEquality(List<List<String>> grandParentColNames, List<List<String>> parentColNames, Map<String, ExprNodeDesc> colExprMap, boolean strict) {
        if (grandParentColNames == null || parentColNames == null) {
            return false;
        }
        if (!parentColNames.isEmpty()) {
            block0: for (List<String> listBucketCols : grandParentColNames) {
                if (listBucketCols.isEmpty()) continue;
                int colCount = 0;
                for (String colName : parentColNames.get(0)) {
                    if (listBucketCols.size() <= colCount) {
                        return false;
                    }
                    ExprNodeDesc exprNodeDesc = colExprMap.get(colName);
                    if (exprNodeDesc instanceof ExprNodeColumnDesc) {
                        if (!((ExprNodeColumnDesc)exprNodeDesc).getColumn().equals(listBucketCols.get(colCount))) continue block0;
                        ++colCount;
                    }
                    if (colCount != parentColNames.get(0).size()) continue;
                    return !strict || colCount == listBucketCols.size();
                }
            }
            return false;
        }
        return false;
    }

    private boolean hasOuterJoin(JoinOperator joinOp) throws SemanticException {
        boolean hasOuter = false;
        block4: for (JoinCondDesc joinCondDesc : ((JoinDesc)joinOp.getConf()).getConds()) {
            switch (joinCondDesc.getType()) {
                case 0: 
                case 4: 
                case 5: 
                case 6: {
                    hasOuter = false;
                    continue block4;
                }
                case 1: 
                case 2: 
                case 3: {
                    hasOuter = true;
                    continue block4;
                }
                default: {
                    throw new SemanticException("Unknown join type " + joinCondDesc.getType());
                }
            }
        }
        return hasOuter;
    }

    private boolean isCrossProduct(JoinOperator joinOp) {
        ExprNodeDesc[][] joinExprs = ((JoinDesc)joinOp.getConf()).getJoinKeys();
        if (joinExprs != null) {
            for (ExprNodeDesc[] expr : joinExprs) {
                if (expr == null || expr.length == 0) continue;
                return false;
            }
        }
        return true;
    }

    public MapJoinConversion getMapJoinConversion(JoinOperator joinOp, OptimizeTezProcContext context, int buckets, boolean skipJoinTypeChecks, long maxSize, boolean checkMapJoinThresholds) throws SemanticException {
        Set<Integer> bigTableCandidateSet;
        boolean isFullOuterEnabled;
        JoinDesc joinDesc = (JoinDesc)joinOp.getConf();
        JoinCondDesc[] conds = joinDesc.getConds();
        if (!skipJoinTypeChecks && conds.length > 1 && this.hasOuterJoin(joinOp)) {
            return null;
        }
        boolean isFullOuterEnabledForDynamicPartitionHashJoin = false;
        boolean isFullOuterEnabledForMapJoin = false;
        boolean isFullOuterJoin = MapJoinProcessor.precheckFullOuter(context.conf, joinOp);
        if (isFullOuterJoin && (isFullOuterEnabled = MapJoinProcessor.isFullOuterMapEnabled(context.conf, joinOp))) {
            isFullOuterEnabledForDynamicPartitionHashJoin = MapJoinProcessor.isFullOuterEnabledForDynamicPartitionHashJoin(context.conf, joinOp);
        }
        if ((bigTableCandidateSet = MapJoinProcessor.getBigTableCandidates(conds, true)).isEmpty()) {
            return null;
        }
        int bigTablePosition = -1;
        long bigInputCumulativeCardinality = -1L;
        Statistics bigInputStat = null;
        boolean foundInputNotFittingInMemory = false;
        long totalSize = 0L;
        boolean convertDPHJ = false;
        for (int pos = 0; pos < joinOp.getParentOperators().size(); ++pos) {
            boolean selectedBigTable;
            long currentInputCumulativeCardinality;
            Operator<OperatorDesc> parentOp = joinOp.getParentOperators().get(pos);
            Statistics currInputStat = parentOp.getStatistics();
            if (currInputStat == null) {
                LOG.warn("Couldn't get statistics from: " + String.valueOf(parentOp));
                return null;
            }
            long inputSize = this.computeOnlineDataSize(currInputStat);
            LOG.info("Join input#{}; onlineDataSize: {}; Statistics: {}", new Object[]{pos, inputSize, currInputStat});
            boolean currentInputNotFittingInMemory = false;
            if (bigInputStat == null || inputSize > this.computeOnlineDataSize(bigInputStat)) {
                if (foundInputNotFittingInMemory) {
                    return null;
                }
                if (inputSize / (long)buckets > maxSize) {
                    if (!bigTableCandidateSet.contains(pos)) {
                        return null;
                    }
                    currentInputNotFittingInMemory = true;
                    foundInputNotFittingInMemory = true;
                }
            }
            if (foundInputNotFittingInMemory) {
                currentInputCumulativeCardinality = -1L;
            } else {
                Long cardinality = ConvertJoinMapJoin.computeCumulativeCardinality(parentOp);
                if (cardinality == null) {
                    return null;
                }
                currentInputCumulativeCardinality = cardinality;
            }
            boolean bl = selectedBigTable = bigTableCandidateSet.contains(pos) && (bigInputStat == null || currentInputNotFittingInMemory || !foundInputNotFittingInMemory && (currentInputCumulativeCardinality > bigInputCumulativeCardinality || currentInputCumulativeCardinality == bigInputCumulativeCardinality && inputSize > this.computeOnlineDataSize(bigInputStat)));
            if (bigInputStat != null && selectedBigTable) {
                totalSize += this.computeOnlineDataSize(bigInputStat);
                if (checkMapJoinThresholds && !this.checkNumberOfEntriesForHashTable(joinOp, bigTablePosition, context)) {
                    convertDPHJ = true;
                }
            } else if (!selectedBigTable) {
                totalSize += inputSize;
                if (checkMapJoinThresholds && !this.checkNumberOfEntriesForHashTable(joinOp, pos, context)) {
                    convertDPHJ = true;
                }
            }
            if (totalSize / (long)buckets > maxSize) {
                return null;
            }
            if (!selectedBigTable) continue;
            bigTablePosition = pos;
            bigInputCumulativeCardinality = currentInputCumulativeCardinality;
            bigInputStat = currInputStat;
        }
        if (bigTablePosition == -1) {
            LOG.debug("No big table selected, no MapJoin");
            return null;
        }
        if (checkMapJoinThresholds && convertDPHJ && this.checkShuffleSizeForLargeTable(joinOp, bigTablePosition, context)) {
            LOG.debug("Conditions to convert to MapJoin are not met");
            return null;
        }
        boolean cartesianProductEdgeEnabled = HiveConf.getBoolVar((Configuration)context.conf, (HiveConf.ConfVars)HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED);
        if (cartesianProductEdgeEnabled && !this.hasOuterJoin(joinOp) && this.isCrossProduct(joinOp)) {
            for (int i = 0; i < joinOp.getParentOperators().size(); ++i) {
                Statistics parentStats;
                if (i == bigTablePosition || (parentStats = joinOp.getParentOperators().get(i).getStatistics()).getNumRows() <= (long)HiveConf.getIntVar((Configuration)context.conf, (HiveConf.ConfVars)HiveConf.ConfVars.XPROD_SMALL_TABLE_ROWS_THRESHOLD)) continue;
                return null;
            }
        }
        ((JoinDesc)joinOp.getConf()).setInMemoryDataSize(totalSize / (long)buckets);
        return new MapJoinConversion(bigTablePosition, isFullOuterJoin, isFullOuterEnabledForDynamicPartitionHashJoin, isFullOuterEnabledForMapJoin);
    }

    private static Long computeCumulativeCardinality(Operator<? extends OperatorDesc> op) {
        Statistics currInputStat;
        long cumulativeCardinality = 0L;
        if (op instanceof CommonJoinOperator) {
            for (Operator<OperatorDesc> inputOp : op.getParentOperators()) {
                Long inputCardinality = ConvertJoinMapJoin.computeCumulativeCardinality(inputOp);
                if (inputCardinality == null) {
                    return null;
                }
                if (inputCardinality <= cumulativeCardinality) continue;
                cumulativeCardinality = inputCardinality;
            }
        } else {
            for (Operator<OperatorDesc> inputOp : op.getParentOperators()) {
                Long inputCardinality = ConvertJoinMapJoin.computeCumulativeCardinality(inputOp);
                if (inputCardinality == null) {
                    return null;
                }
                cumulativeCardinality += inputCardinality.longValue();
            }
        }
        if ((currInputStat = op.getStatistics()) == null) {
            LOG.warn("Couldn't get statistics from: " + String.valueOf(op));
            return null;
        }
        return cumulativeCardinality += currInputStat.getNumRows();
    }

    public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, MapJoinConversion mapJoinConversion, boolean removeReduceSink) throws SemanticException {
        Operator<OperatorDesc> parentBigTableOp;
        for (Operator<OperatorDesc> parentOp : joinOp.getParentOperators()) {
            if (!(parentOp instanceof MuxOperator)) continue;
            return null;
        }
        int bigTablePosition = mapJoinConversion.getBigTablePos();
        MapJoinOperator mapJoinOp = MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, ((JoinDesc)joinOp.getConf()).isLeftInputJoin(), ((JoinDesc)joinOp.getConf()).getBaseSrc(), ((JoinDesc)joinOp.getConf()).getMapAliases(), bigTablePosition, true, removeReduceSink);
        if (mapJoinOp == null) {
            return null;
        }
        MapJoinDesc mapJoinDesc = (MapJoinDesc)mapJoinOp.getConf();
        mapJoinDesc.setHybridHashJoin(HiveConf.getBoolVar((Configuration)context.conf, (HiveConf.ConfVars)HiveConf.ConfVars.HIVE_USE_HYBRIDGRACE_HASHJOIN));
        List<ExprNodeDesc> joinExprs = mapJoinDesc.getKeys().values().iterator().next();
        if (joinExprs.size() == 0) {
            mapJoinDesc.setHybridHashJoin(false);
        }
        if ((parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition)) instanceof ReduceSinkOperator) {
            Operator<OperatorDesc> parentSelectOpOfBigTableOp = parentBigTableOp.getParentOperators().get(0);
            if (removeReduceSink) {
                for (Operator<OperatorDesc> p : parentBigTableOp.getParentOperators()) {
                    HashSet<Operator<OperatorDesc>> dynamicPartitionOperators = new HashSet<Operator<OperatorDesc>>();
                    HashMap<Operator<OperatorDesc>, AppMasterEventOperator> opEventPairs = new HashMap<Operator<OperatorDesc>, AppMasterEventOperator>();
                    for (Operator<OperatorDesc> c : p.getChildOperators()) {
                        AppMasterEventOperator event = this.findDynamicPartitionBroadcast(c);
                        if (event == null) continue;
                        dynamicPartitionOperators.add(c);
                        opEventPairs.put(c, event);
                    }
                    for (Operator<OperatorDesc> c : dynamicPartitionOperators) {
                        if (!context.pruningOpsRemovedByPriorOpt.isEmpty() && context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) continue;
                        p.removeChild(c);
                        LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc)((AppMasterEventOperator)opEventPairs.get(c)).getConf()).getTableScan().getName() + ". Need to be removed together with reduce sink");
                    }
                    for (Operator<OperatorDesc> op : dynamicPartitionOperators) {
                        context.pruningOpsRemovedByPriorOpt.add((AppMasterEventOperator)opEventPairs.get(op));
                    }
                }
                mapJoinOp.getParentOperators().remove(bigTablePosition);
                if (!mapJoinOp.getParentOperators().contains(parentBigTableOp.getParentOperators().get(0))) {
                    mapJoinOp.getParentOperators().add(bigTablePosition, parentBigTableOp.getParentOperators().get(0));
                }
                parentBigTableOp.getParentOperators().get(0).removeChild(parentBigTableOp);
            }
            for (Operator<OperatorDesc> op : mapJoinOp.getParentOperators()) {
                if (!op.getChildOperators().contains(mapJoinOp)) {
                    op.getChildOperators().add(mapJoinOp);
                }
                op.getChildOperators().remove(joinOp);
            }
            if (context.parseContext.getRsToSemiJoinBranchInfo().size() > 0 && removeReduceSink) {
                this.removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, context.parseContext);
            }
        }
        return mapJoinOp;
    }

    private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, Operator<?> parentSelectOpOfBigTable, ParseContext parseContext) throws SemanticException {
        HashMap<ReduceSinkOperator, TableScanOperator> semiJoinMap = new HashMap<ReduceSinkOperator, TableScanOperator>();
        for (Operator<OperatorDesc> op : parentSelectOpOfBigTable.getChildOperators()) {
            if (!(op instanceof SelectOperator)) continue;
            while (op.getChildOperators().size() > 0) {
                op = op.getChildOperators().get(0);
            }
            if (!(op instanceof ReduceSinkOperator)) continue;
            ReduceSinkOperator rs = (ReduceSinkOperator)op;
            TableScanOperator ts = parseContext.getRsToSemiJoinBranchInfo().get(rs).getTsOp();
            if (ts == null) continue;
            Operator<OperatorDesc> parentGB = op.getParentOperators().get(0);
            block2: for (Operator<OperatorDesc> childRS : parentGB.getChildOperators()) {
                rs = (ReduceSinkOperator)childRS;
                ts = parseContext.getRsToSemiJoinBranchInfo().get(rs).getTsOp();
                assert (ts != null);
                for (Operator<OperatorDesc> parent : mapjoinOp.getParentOperators()) {
                    if (!(parent instanceof ReduceSinkOperator)) continue;
                    Set<TableScanOperator> tsOps = OperatorUtils.findOperatorsUpstream(parent, TableScanOperator.class);
                    boolean found = false;
                    for (TableScanOperator parentTS : tsOps) {
                        if (ts != parentTS) continue;
                        semiJoinMap.put(rs, ts);
                        found = true;
                        break;
                    }
                    if (!found) continue;
                    continue block2;
                }
            }
        }
        if (semiJoinMap.size() > 0) {
            for (ReduceSinkOperator rs : semiJoinMap.keySet()) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Found semijoin optimization from the big table side of a map join, which will cause a task cycle. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty((Operator)semiJoinMap.get(rs)));
                }
                GenTezUtils.removeBranch(rs);
                GenTezUtils.removeSemiJoinOperator(parseContext, rs, (TableScanOperator)semiJoinMap.get(rs));
            }
        }
    }

    private AppMasterEventOperator findDynamicPartitionBroadcast(Operator<?> parent) {
        block0: for (Operator<OperatorDesc> op : parent.getChildOperators()) {
            while (op != null) {
                if (op instanceof AppMasterEventOperator && op.getConf() instanceof DynamicPruningEventDesc) {
                    return (AppMasterEventOperator)op;
                }
                if (op instanceof ReduceSinkOperator || op instanceof FileSinkOperator || op.getChildOperators().size() != 1) continue block0;
                op = op.getChildOperators().get(0);
            }
        }
        return null;
    }

    private static int estimateNumBuckets(JoinOperator joinOp, boolean useOpTraits) {
        int numBuckets = -1;
        int estimatedBuckets = -1;
        for (Operator<OperatorDesc> parentOp : joinOp.getParentOperators()) {
            if (!(parentOp instanceof ReduceSinkOperator)) continue;
            OpTraits parentOpTraits = parentOp.getOpTraits();
            numBuckets = Math.max(numBuckets, parentOpTraits.getNumBuckets());
            if (parentOpTraits.hasCustomBucketFunction()) {
                Preconditions.checkState((parentOpTraits.getCustomBucketFunctions().size() == 1 ? 1 : 0) != 0);
                numBuckets = Math.max(numBuckets, parentOpTraits.getCustomBucketFunctions().get(0).getNumBuckets());
            }
            if (useOpTraits) continue;
            ReduceSinkOperator rs = (ReduceSinkOperator)parentOp;
            estimatedBuckets = Math.max(estimatedBuckets, ((ReduceSinkDesc)rs.getConf()).getNumReducers());
        }
        if (!useOpTraits) {
            numBuckets = -1;
        }
        if (numBuckets <= 0 && (numBuckets = estimatedBuckets) <= 0) {
            numBuckets = 1;
        }
        return numBuckets;
    }

    private boolean convertJoinDynamicPartitionedHashJoin(JoinOperator joinOp, OptimizeTezProcContext context) throws SemanticException {
        int numReducers = ConvertJoinMapJoin.estimateNumBuckets(joinOp, false);
        LOG.info("Try dynamic partitioned hash join with estimated " + numReducers + " reducers");
        MapJoinConversion mapJoinConversion = this.getMapJoinConversion(joinOp, context, numReducers, false, this.maxJoinMemory, false);
        if (mapJoinConversion != null) {
            if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForDynamicPartitionHashJoin()) {
                return false;
            }
            int bigTablePos = mapJoinConversion.getBigTablePos();
            ReduceSinkOperator bigTableParentRS = (ReduceSinkOperator)joinOp.getParentOperators().get(bigTablePos);
            numReducers = ((ReduceSinkDesc)bigTableParentRS.getConf()).getNumReducers();
            LOG.debug("Real big table reducers = " + numReducers);
            MapJoinOperator mapJoinOp = this.convertJoinMapJoin(joinOp, context, mapJoinConversion, false);
            if (mapJoinOp != null) {
                LOG.info("Selected dynamic partitioned hash join");
                MapJoinDesc mapJoinDesc = (MapJoinDesc)mapJoinOp.getConf();
                mapJoinDesc.setDynamicPartitionHashJoin(true);
                OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), joinOp.getOpTraits().getCustomBucketFunctions(), numReducers, null, joinOp.getOpTraits().getNumReduceSinks());
                mapJoinOp.setOpTraits(opTraits);
                this.preserveOperatorInfos(mapJoinOp, joinOp, context);
                for (Operator<OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
                    this.setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
                }
                return true;
            }
        }
        return false;
    }

    private void fallbackToReduceSideJoin(JoinOperator joinOp, OptimizeTezProcContext context) throws SemanticException {
        if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN) && context.conf.getBoolVar(HiveConf.ConfVars.HIVE_DYNAMIC_PARTITION_HASHJOIN) && this.convertJoinDynamicPartitionedHashJoin(joinOp, context)) {
            return;
        }
        this.fallbackToMergeJoin(joinOp, context);
    }

    private void fallbackToMergeJoin(JoinOperator joinOp, OptimizeTezProcContext context) throws SemanticException {
        int pos;
        MapJoinConversion mapJoinConversion = this.getMapJoinConversion(joinOp, context, ConvertJoinMapJoin.estimateNumBuckets(joinOp, false), true, Long.MAX_VALUE, false);
        if (mapJoinConversion == null || mapJoinConversion.getBigTablePos() == -1) {
            LOG.info("Could not get a valid join position. Defaulting to position 0");
            pos = 0;
        } else {
            pos = mapJoinConversion.getBigTablePos();
        }
        LOG.info("Fallback to common merge join operator");
        this.convertJoinSMBJoin(joinOp, context, pos, 0, false);
    }

    private boolean checkNumberOfEntriesForHashTable(JoinOperator joinOp, int position, OptimizeTezProcContext context) {
        long max = HiveConf.getLongVar((Configuration)context.parseContext.getConf(), (HiveConf.ConfVars)HiveConf.ConfVars.HIVE_CONVERT_JOIN_MAX_ENTRIES_HASHTABLE);
        if (max < 1L) {
            return true;
        }
        ReduceSinkOperator rsOp = (ReduceSinkOperator)joinOp.getParentOperators().get(position);
        List<String> keys = StatsUtils.getQualifedReducerKeyNames(((ReduceSinkDesc)rsOp.getConf()).getOutputKeyColumnNames());
        Statistics inputStats = rsOp.getStatistics();
        ArrayList<ColStatistics> columnStats = new ArrayList<ColStatistics>();
        for (String key : keys) {
            ColStatistics cs = inputStats.getColumnStatisticsFromColName(key);
            if (cs == null) {
                return true;
            }
            columnStats.add(cs);
        }
        long numRows = inputStats.getNumRows();
        long estimation = ConvertJoinMapJoin.estimateNDV(numRows, columnStats);
        LOG.debug("Estimated NDV for input {}: {}; Max NDV for MapJoin conversion: {}", new Object[]{position, estimation, max});
        if (estimation > max) {
            LOG.debug("Number of different entries for HashTable is greater than the max; we do not convert to MapJoin");
            return false;
        }
        return true;
    }

    private boolean checkShuffleSizeForLargeTable(JoinOperator joinOp, int position, OptimizeTezProcContext context) {
        long max = HiveConf.getLongVar((Configuration)context.parseContext.getConf(), (HiveConf.ConfVars)HiveConf.ConfVars.HIVE_CONVERT_JOIN_MAX_SHUFFLE_SIZE);
        if (max < 1L) {
            return false;
        }
        ReduceSinkOperator rsOp = (ReduceSinkOperator)joinOp.getParentOperators().get(position);
        Statistics inputStats = rsOp.getStatistics();
        long inputSize = this.computeOnlineDataSize(inputStats);
        LOG.debug("Estimated size for input {}: {}; Max size for DPHJ conversion: {}", new Object[]{position, inputSize, max});
        if (inputSize > max) {
            LOG.debug("Size of input is greater than the max; we do not convert to DPHJ");
            return false;
        }
        return true;
    }

    private static long estimateNDV(long numRows, List<ColStatistics> columnStats) {
        if (columnStats.size() == 1) {
            return columnStats.get(0).getCountDistint();
        }
        long n = 1L;
        for (ColStatistics cs : columnStats) {
            long ndv = cs.getCountDistint();
            if (ndv <= 1L) continue;
            n = StatsUtils.safeMult(n, ndv);
        }
        double nn = n;
        double a = (nn - 1.0) / nn;
        if (a == 1.0) {
            return numRows;
        }
        double v = nn * (1.0 - Math.pow(a, numRows));
        return Math.min(Math.round(v), numRows);
    }

    private static boolean canTableUseStats(Operator op, StringBuilder sb) {
        TableScanOperator ts;
        Boolean canUseStats;
        Operator ancestor = OperatorUtils.findSingleOperatorUpstream(op, TableScanOperator.class);
        if (ancestor != null && !(canUseStats = Boolean.valueOf(StatsUtils.checkCanProvideStats(new Table(((TableScanDesc)(ts = (TableScanOperator)ancestor).getConf()).getTableMetadata().getTTable())))).booleanValue()) {
            sb.append(((TableScanDesc)ts.getConf()).getTableMetadata().getFullyQualifiedName());
            return false;
        }
        return true;
    }

    private static enum HashMapDataStructureType {
        COMPOSITE_KEYED,
        LONG_KEYED;


        public static HashMapDataStructureType of(JoinDesc conf) {
            TypeInfo typeInfo;
            ExprNodeDesc[][] keys = conf.getJoinKeys();
            if (keys != null && keys[0].length == 1 && (typeInfo = keys[0][0].getTypeInfo()) instanceof PrimitiveTypeInfo) {
                PrimitiveTypeInfo pti = (PrimitiveTypeInfo)typeInfo;
                PrimitiveObjectInspector.PrimitiveCategory pCat = pti.getPrimitiveCategory();
                switch (pCat) {
                    case BOOLEAN: 
                    case BYTE: 
                    case SHORT: 
                    case INT: 
                    case LONG: {
                        return LONG_KEYED;
                    }
                }
            }
            return COMPOSITE_KEYED;
        }
    }

    public static class MapJoinConversion {
        private final int bigTablePos;
        private final boolean isFullOuterJoin;
        private final boolean isFullOuterEnabledForDynamicPartitionHashJoin;
        private final boolean isFullOuterEnabledForMapJoin;

        public MapJoinConversion(int bigTablePos, boolean isFullOuterJoin, boolean isFullOuterEnabledForDynamicPartitionHashJoin, boolean isFullOuterEnabledForMapJoin) {
            this.bigTablePos = bigTablePos;
            this.isFullOuterJoin = isFullOuterJoin;
            this.isFullOuterEnabledForDynamicPartitionHashJoin = isFullOuterEnabledForDynamicPartitionHashJoin;
            this.isFullOuterEnabledForMapJoin = isFullOuterEnabledForMapJoin;
        }

        public int getBigTablePos() {
            return this.bigTablePos;
        }

        public boolean getIsFullOuterJoin() {
            return this.isFullOuterJoin;
        }

        public boolean getIsFullOuterEnabledForDynamicPartitionHashJoin() {
            return this.isFullOuterEnabledForDynamicPartitionHashJoin;
        }

        public boolean getIsFullOuterEnabledForMapJoin() {
            return this.isFullOuterEnabledForMapJoin;
        }
    }
}

