/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark.builder;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Output;
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kylin.engine.spark.builder.CubeBuilderHelper$;
import org.apache.kylin.engine.spark.job.NSparkCubingUtil;
import org.apache.kylin.engine.spark.metadata.ColumnDesc;
import org.apache.kylin.engine.spark.metadata.SegmentInfo;
import org.apache.spark.dict.NGlobalDictionary;
import org.apache.spark.internal.Logging;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.KylinFunctions$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.utils.SparkVersionUtils$;
import org.slf4j.Logger;
import scala.Function0;
import scala.Function1;
import scala.Predef$;
import scala.collection.GenTraversableOnce;
import scala.collection.IterableLike;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.TraversableLike;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.Seq$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.ObjectRef;

public final class CubeTableEncoder$
implements Logging {
    public static CubeTableEncoder$ MODULE$;
    private transient Logger org$apache$spark$internal$Logging$$log_;

    static {
        new CubeTableEncoder$();
    }

    public String logName() {
        return Logging.logName$((Logging)this);
    }

    public Logger log() {
        return Logging.log$((Logging)this);
    }

    public void logInfo(Function0<String> msg) {
        Logging.logInfo$((Logging)this, msg);
    }

    public void logDebug(Function0<String> msg) {
        Logging.logDebug$((Logging)this, msg);
    }

    public void logTrace(Function0<String> msg) {
        Logging.logTrace$((Logging)this, msg);
    }

    public void logWarning(Function0<String> msg) {
        Logging.logWarning$((Logging)this, msg);
    }

    public void logError(Function0<String> msg) {
        Logging.logError$((Logging)this, msg);
    }

    public void logInfo(Function0<String> msg, Throwable throwable) {
        Logging.logInfo$((Logging)this, msg, (Throwable)throwable);
    }

    public void logDebug(Function0<String> msg, Throwable throwable) {
        Logging.logDebug$((Logging)this, msg, (Throwable)throwable);
    }

    public void logTrace(Function0<String> msg, Throwable throwable) {
        Logging.logTrace$((Logging)this, msg, (Throwable)throwable);
    }

    public void logWarning(Function0<String> msg, Throwable throwable) {
        Logging.logWarning$((Logging)this, msg, (Throwable)throwable);
    }

    public void logError(Function0<String> msg, Throwable throwable) {
        Logging.logError$((Logging)this, msg, (Throwable)throwable);
    }

    public boolean isTraceEnabled() {
        return Logging.isTraceEnabled$((Logging)this);
    }

    public void initializeLogIfNecessary(boolean isInterpreter) {
        Logging.initializeLogIfNecessary$((Logging)this, (boolean)isInterpreter);
    }

    public boolean initializeLogIfNecessary(boolean isInterpreter, boolean silent) {
        return Logging.initializeLogIfNecessary$((Logging)this, (boolean)isInterpreter, (boolean)silent);
    }

    public boolean initializeLogIfNecessary$default$2() {
        return Logging.initializeLogIfNecessary$default$2$((Logging)this);
    }

    public void initializeForcefully(boolean isInterpreter, boolean silent) {
        Logging.initializeForcefully$((Logging)this, (boolean)isInterpreter, (boolean)silent);
    }

    public Logger org$apache$spark$internal$Logging$$log_() {
        return this.org$apache$spark$internal$Logging$$log_;
    }

    public void org$apache$spark$internal$Logging$$log__$eq(Logger x$1) {
        this.org$apache$spark$internal$Logging$$log_ = x$1;
    }

    public Dataset<Row> encodeTable(Dataset<Row> ds, SegmentInfo seg, Set<ColumnDesc> cols, String jobId) {
        ObjectRef partitionedDs;
        block2: {
            if (SparkVersionUtils$.MODULE$.isLessThanSparkVersion("2.4", true)) {
                Predef$.MODULE$.assert(!new StringOps(Predef$.MODULE$.augmentString(ds.sparkSession().conf().get("spark.sql.adaptive.enabled", "false"))).toBoolean(), (Function0 & Serializable & scala.Serializable)() -> "Parameter 'spark.sql.adaptive.enabled' must be false when encode tables.");
            }
            StructType structType = ds.schema();
            partitionedDs = ObjectRef.create(ds);
            ds.sparkSession().sparkContext().setJobDescription("Encode count source data.");
            long sourceCnt = ds.count();
            int bucketThreshold = seg.kylinconf().getGlobalDictV2ThresholdBucketSize();
            long minBucketSize = sourceCnt / (long)bucketThreshold;
            IntRef repartitionSizeAfterEncode = IntRef.create((int)0);
            ((IterableLike)JavaConverters$.MODULE$.asScalaSetConverter(cols).asScala()).foreach((Function1 & Serializable & scala.Serializable)ref -> {
                CubeTableEncoder$.$anonfun$encodeTable$2(seg, minBucketSize, repartitionSizeAfterEncode, structType, partitionedDs, ds, jobId, ref);
                return BoxedUnit.UNIT;
            });
            ds.sparkSession().sparkContext().setJobDescription(null);
            if (cols.isEmpty() || !seg.kylinconf().rePartitionEncodedDatasetWithRowKey()) break block2;
            Seq colsInDS = (Seq)((Dataset)partitionedDs.elem).schema().map((Function1 & Serializable & scala.Serializable)x$1 -> x$1.name(), scala.collection.Seq$.MODULE$.canBuildFrom());
            List rowKeyColRefs = (List)((List)((TraversableLike)seg.allRowKeyCols().map((Function1 & Serializable & scala.Serializable)colDesc -> NSparkCubingUtil.convertFromDot(colDesc.identity()), List$.MODULE$.canBuildFrom())).filter((Function1 & Serializable & scala.Serializable)elem -> BoxesRunTime.boxToBoolean((boolean)colsInDS.contains(elem)))).map((Function1 & Serializable & scala.Serializable)colName -> functions$.MODULE$.col(colName), List$.MODULE$.canBuildFrom());
            if (seg.kylinconf().getRepartitionNumAfterEncode() > 0) {
                repartitionSizeAfterEncode.elem = seg.kylinconf().getRepartitionNumAfterEncode();
            }
            this.logInfo((Function0<String>)(Function0 & Serializable & scala.Serializable)() -> new StringBuilder(61).append("repartition encoded dataset to ").append(repartitionSizeAfterEncode$1.elem).append(" partitions to avoid data skew").toString());
            partitionedDs.elem = ((Dataset)partitionedDs.elem).repartition(repartitionSizeAfterEncode.elem, (Seq)Predef$.MODULE$.wrapRefArray((Object[])rowKeyColRefs.toArray(ClassTag$.MODULE$.apply(Column.class))));
        }
        return (Dataset)partitionedDs.elem;
    }

    public static final /* synthetic */ void $anonfun$encodeTable$2(SegmentInfo seg$1, long minBucketSize$1, IntRef repartitionSizeAfterEncode$1, StructType structType$1, ObjectRef partitionedDs$1, Dataset ds$1, String jobId$1, ColumnDesc ref) {
        block3: {
            NGlobalDictionary globalDict = new NGlobalDictionary(seg$1.project(), ref.tableAliasName(), ref.columnName(), seg$1.kylinconf().getHdfsWorkingDirectory());
            int bucketSize = globalDict.getBucketSizeOrDefault(seg$1.kylinconf().getGlobalDictV2MinHashPartitions());
            int enlargedBucketSize = (int)((minBucketSize$1 / (long)bucketSize + 1L) * (long)bucketSize);
            if (enlargedBucketSize > repartitionSizeAfterEncode$1.elem) {
                repartitionSizeAfterEncode$1.elem = enlargedBucketSize;
            }
            String encodeColRef = NSparkCubingUtil.convertFromDot(ref.identity());
            int columnIndex = structType$1.fieldIndex(encodeColRef);
            String dictParams = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new String[]{seg$1.project(), ref.tableAliasName(), ref.columnName(), seg$1.kylinconf().getHdfsWorkingDirectory()})).mkString("_0_DOT_0_");
            String aliasName = structType$1.apply(columnIndex).name().concat(CubeBuilderHelper$.MODULE$.ENCODE_SUFFIX());
            Column encodeCol = KylinFunctions$.MODULE$.dict_encode(functions$.MODULE$.col(encodeColRef).cast((DataType)StringType$.MODULE$), functions$.MODULE$.lit((Object)dictParams), functions$.MODULE$.lit((Object)BoxesRunTime.boxToInteger((int)bucketSize)).cast((DataType)StringType$.MODULE$)).as(aliasName);
            Seq columns = (Seq)((Dataset)partitionedDs$1.elem).schema().map((Function1 & Serializable & scala.Serializable)ty -> functions$.MODULE$.col(ty.name()), scala.collection.Seq$.MODULE$.canBuildFrom());
            boolean scatterSkewedData = false;
            if (seg$1.kylinconf().detectDataSkewInDictEncodingEnabled()) {
                Column castEncodeColRef = functions$.MODULE$.col(encodeColRef).cast((DataType)StringType$.MODULE$);
                Dataset sampleData = ds$1.select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{castEncodeColRef})).sample(seg$1.kylinconf().sampleRateInEncodingSkewDetection()).cache();
                long totalCount = sampleData.count();
                Path skewDictStorage = new Path(new StringBuilder(14).append(seg$1.kylinconf().getJobTmpDir(seg$1.project())).append("/").append(jobId$1).append("/skewed_data/").append(ref.identity()).toString());
                Object2LongOpenHashMap skewedDict = new Object2LongOpenHashMap();
                new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])sampleData.groupBy(encodeColRef, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).agg(functions$.MODULE$.count(functions$.MODULE$.lit((Object)BoxesRunTime.boxToInteger((int)1))).alias("count_value"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0])).filter(functions$.MODULE$.col("count_value").$greater((Object)BoxesRunTime.boxToDouble((double)((double)totalCount * seg$1.kylinconf().skewPercentageThreshHold())))).repartition(enlargedBucketSize, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{castEncodeColRef})).select((Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{castEncodeColRef, encodeCol}))).collect())).foreach((Function1 & Serializable & scala.Serializable)row -> BoxesRunTime.boxToLong((long)skewedDict.put((Object)row.getString(0), row.getLong(1))));
                sampleData.unpersist();
                if (skewedDict.size() > 0) {
                    scatterSkewedData = true;
                    Kryo kryo = new Kryo();
                    FileSystem fs = skewDictStorage.getFileSystem(new Configuration());
                    Object object = fs.exists(skewDictStorage) ? BoxesRunTime.boxToBoolean((boolean)fs.delete(skewDictStorage, true)) : BoxedUnit.UNIT;
                    Output output = new Output((OutputStream)fs.create(skewDictStorage));
                    kryo.writeClassAndObject(output, (Object)skewedDict);
                    output.close();
                    Column scatterColumn = KylinFunctions$.MODULE$.scatter_skew_data(castEncodeColRef, functions$.MODULE$.lit((Object)skewDictStorage.toString())).alias(new StringBuilder(18).append("scatter_skew_data_").append(ref.columnName()).toString());
                    dictParams = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new String[]{seg$1.project(), ref.tableAliasName(), ref.columnName(), seg$1.kylinconf().getHdfsWorkingDirectory(), skewDictStorage.toString()})).mkString("_0_DOT_0_");
                    encodeCol = KylinFunctions$.MODULE$.dict_encode(functions$.MODULE$.col(encodeColRef).cast((DataType)StringType$.MODULE$), functions$.MODULE$.lit((Object)dictParams), functions$.MODULE$.lit((Object)BoxesRunTime.boxToInteger((int)bucketSize)).cast((DataType)StringType$.MODULE$)).alias(aliasName);
                    partitionedDs$1.elem = ((Dataset)partitionedDs$1.elem).select((Seq)columns.$plus$plus((GenTraversableOnce)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{scatterColumn})), scala.collection.Seq$.MODULE$.canBuildFrom())).repartition(enlargedBucketSize, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col(new StringBuilder(18).append("scatter_skew_data_").append(ref.columnName()).toString())})).select((Seq)columns.$plus$plus((GenTraversableOnce)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{encodeCol})), scala.collection.Seq$.MODULE$.canBuildFrom()));
                }
            }
            if (scatterSkewedData) break block3;
            partitionedDs$1.elem = ((Dataset)partitionedDs$1.elem).repartition(enlargedBucketSize, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col(encodeColRef).cast((DataType)StringType$.MODULE$)})).select((Seq)columns.$plus$plus((GenTraversableOnce)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{encodeCol})), scala.collection.Seq$.MODULE$.canBuildFrom()));
        }
    }

    private CubeTableEncoder$() {
        MODULE$ = this;
        Logging.$init$((Logging)this);
    }
}

