/*
 * Decompiled with CFR 0.152.
 */
package org.apache.druid.indexer.hadoop;

import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Supplier;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.indexer.HadoopDruidIndexerConfig;
import org.apache.druid.indexer.JobHelper;
import org.apache.druid.indexer.hadoop.DatasourceIngestionSpec;
import org.apache.druid.indexer.hadoop.DatasourceInputSplit;
import org.apache.druid.indexer.hadoop.DatasourceRecordReader;
import org.apache.druid.indexer.hadoop.WindowedDataSegment;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

public class DatasourceInputFormat
extends InputFormat<NullWritable, InputRow> {
    private static final Logger logger = new Logger(DatasourceInputFormat.class);
    private static final String CONF_DATASOURCES = "druid.datasource.input.datasources";
    private static final String CONF_SCHEMA = "druid.datasource.input.schema";
    private static final String CONF_SEGMENTS = "druid.datasource.input.segments";
    private static final String CONF_MAX_SPLIT_SIZE = "druid.datasource.input.split.max.size";
    private Supplier<org.apache.hadoop.mapred.InputFormat> supplier = new Supplier<org.apache.hadoop.mapred.InputFormat>(){

        public org.apache.hadoop.mapred.InputFormat get() {
            return new TextInputFormat(){

                protected boolean isSplitable(FileSystem fs, Path file) {
                    return false;
                }

                protected FileStatus[] listStatus(JobConf job) throws IOException {
                    ArrayList<FileStatus> statusList = new ArrayList<FileStatus>();
                    for (Path path : FileInputFormat.getInputPaths((JobConf)job)) {
                        statusList.add(path.getFileSystem((Configuration)job).getFileStatus(path));
                    }
                    return statusList.toArray(new FileStatus[0]);
                }
            };
        }
    };

    public List<InputSplit> getSplits(JobContext context) throws IOException {
        JobConf conf = new JobConf(context.getConfiguration());
        List<String> dataSources = DatasourceInputFormat.getDataSources((Configuration)conf);
        ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
        for (String dataSource : dataSources) {
            List<WindowedDataSegment> segments = DatasourceInputFormat.getSegments((Configuration)conf, dataSource);
            if (segments == null || segments.size() == 0) {
                throw new ISE("No segments found to read for dataSource[%s]", new Object[]{dataSource});
            }
            for (int i = 0; i < segments.size(); ++i) {
                WindowedDataSegment segment = segments.get(i);
                logger.info("Segment %,d/%,d for dataSource[%s] has identifier[%s], interval[%s]", new Object[]{i, segments.size(), dataSource, segment.getSegment().getId(), segment.getInterval()});
            }
            long maxSize = DatasourceInputFormat.getMaxSplitSize((Configuration)conf, dataSource);
            if (maxSize < 0L) {
                long totalSize = 0L;
                for (WindowedDataSegment segment : segments) {
                    totalSize += segment.getSegment().getSize();
                }
                int mapTask = conf.getNumMapTasks();
                if (mapTask > 0) {
                    maxSize = totalSize / (long)mapTask;
                }
            }
            if (maxSize > 0L) {
                segments.sort(Comparator.comparingLong(s -> s.getSegment().getSize()));
            }
            ArrayList<WindowedDataSegment> list = new ArrayList<WindowedDataSegment>();
            long size = 0L;
            org.apache.hadoop.mapred.InputFormat fio = (org.apache.hadoop.mapred.InputFormat)this.supplier.get();
            for (WindowedDataSegment segment : segments) {
                if (size + segment.getSegment().getSize() > maxSize && size > 0L) {
                    splits.add(this.toDataSourceSplit(list, fio, conf));
                    list = new ArrayList();
                    size = 0L;
                }
                list.add(segment);
                size += segment.getSegment().getSize();
            }
            if (list.size() <= 0) continue;
            splits.add(this.toDataSourceSplit(list, fio, conf));
        }
        logger.info("Number of splits [%d]", new Object[]{splits.size()});
        return splits;
    }

    public RecordReader<NullWritable, InputRow> createRecordReader(InputSplit split, TaskAttemptContext context) {
        return new DatasourceRecordReader();
    }

    @VisibleForTesting
    DatasourceInputFormat setSupplier(Supplier<org.apache.hadoop.mapred.InputFormat> supplier) {
        this.supplier = supplier;
        return this;
    }

    private DatasourceInputSplit toDataSourceSplit(List<WindowedDataSegment> segments, org.apache.hadoop.mapred.InputFormat fio, JobConf conf) {
        String[] locations = DatasourceInputFormat.getFrequentLocations(DatasourceInputFormat.getLocations(segments, fio, conf));
        return new DatasourceInputSplit(segments, locations);
    }

    @VisibleForTesting
    static Stream<String> getLocations(List<WindowedDataSegment> segments, org.apache.hadoop.mapred.InputFormat fio, JobConf conf) {
        return ((Stream)segments.stream().sequential()).flatMap(segment -> {
            FileInputFormat.setInputPaths((JobConf)conf, (Path[])new Path[]{new Path(JobHelper.getURIFromSegment(segment.getSegment()))});
            try {
                return Arrays.stream(fio.getSplits(conf, 1)).flatMap(split -> {
                    try {
                        return Arrays.stream(split.getLocations());
                    }
                    catch (Exception e) {
                        logger.error((Throwable)e, "Exception getting locations", new Object[0]);
                        return Stream.empty();
                    }
                });
            }
            catch (Exception e) {
                logger.error((Throwable)e, "Exception getting splits", new Object[0]);
                return Stream.empty();
            }
        });
    }

    @VisibleForTesting
    static String[] getFrequentLocations(Stream<String> locations) {
        Map<String, Long> locationCountMap = locations.collect(Collectors.groupingBy(location -> location, Collectors.counting()));
        Comparator valueComparator = Map.Entry.comparingByValue(Comparator.reverseOrder());
        Comparator keyComparator = Map.Entry.comparingByKey();
        return (String[])locationCountMap.entrySet().stream().sorted(valueComparator.thenComparing(keyComparator)).limit(3L).map(Map.Entry::getKey).toArray(String[]::new);
    }

    public static List<String> getDataSources(Configuration conf) throws IOException {
        String currentDatasources = conf.get(CONF_DATASOURCES);
        if (currentDatasources == null) {
            return Collections.emptyList();
        }
        return (List)HadoopDruidIndexerConfig.JSON_MAPPER.readValue(currentDatasources, (TypeReference)new TypeReference<List<String>>(){});
    }

    public static DatasourceIngestionSpec getIngestionSpec(Configuration conf, String dataSource) throws IOException {
        String specString = conf.get(StringUtils.format((String)"%s.%s", (Object[])new Object[]{CONF_SCHEMA, dataSource}));
        if (specString == null) {
            throw new NullPointerException(StringUtils.format((String)"null spec for dataSource[%s]", (Object[])new Object[]{dataSource}));
        }
        DatasourceIngestionSpec spec = (DatasourceIngestionSpec)HadoopDruidIndexerConfig.JSON_MAPPER.readValue(specString, DatasourceIngestionSpec.class);
        if (spec.getDimensions() == null || spec.getDimensions().size() == 0) {
            throw new ISE("load schema does not have dimensions", new Object[0]);
        }
        if (spec.getMetrics() == null || spec.getMetrics().size() == 0) {
            throw new ISE("load schema does not have metrics", new Object[0]);
        }
        return spec;
    }

    public static List<WindowedDataSegment> getSegments(Configuration conf, String dataSource) throws IOException {
        return (List)HadoopDruidIndexerConfig.JSON_MAPPER.readValue(conf.get(StringUtils.format((String)"%s.%s", (Object[])new Object[]{CONF_SEGMENTS, dataSource})), (TypeReference)new TypeReference<List<WindowedDataSegment>>(){});
    }

    public static long getMaxSplitSize(Configuration conf, String dataSource) {
        return conf.getLong(StringUtils.format((String)"%s.%s", (Object[])new Object[]{CONF_MAX_SPLIT_SIZE, dataSource}), 0L);
    }

    public static void addDataSource(Configuration conf, DatasourceIngestionSpec spec, List<WindowedDataSegment> segments, long maxSplitSize) throws IOException {
        List<String> dataSources = DatasourceInputFormat.getDataSources(conf);
        if (dataSources.contains(spec.getDataSource())) {
            throw new ISE("Oops, cannot load the same dataSource twice!", new Object[0]);
        }
        ArrayList<String> newDataSources = new ArrayList<String>(dataSources);
        newDataSources.add(spec.getDataSource());
        conf.set(CONF_DATASOURCES, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(newDataSources));
        conf.set(StringUtils.format((String)"%s.%s", (Object[])new Object[]{CONF_SCHEMA, spec.getDataSource()}), HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString((Object)spec));
        conf.set(StringUtils.format((String)"%s.%s", (Object[])new Object[]{CONF_SEGMENTS, spec.getDataSource()}), HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(segments));
        conf.set(StringUtils.format((String)"%s.%s", (Object[])new Object[]{CONF_MAX_SPLIT_SIZE, spec.getDataSource()}), String.valueOf(maxSplitSize));
    }
}

