/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.eval.app.tools;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.tika.eval.app.tools.LeipzigHelper;

public class LeipzigSampler {
    public static void main(String[] args) throws Exception {
        int sentsPerLanguage = 10;
        Path leipzigDir = Paths.get(args[0], new String[0]);
        Path sampleFile = Paths.get(args[1], new String[0]);
        LeipzigSampler leipzigSampler = new LeipzigSampler();
        try (BufferedWriter writer = Files.newBufferedWriter(sampleFile, StandardCharsets.UTF_8, new OpenOption[0]);){
            leipzigSampler.execute(leipzigDir, sentsPerLanguage, writer);
        }
    }

    private void execute(Path leipzigDir, int sentsPerLang, BufferedWriter writer) throws IOException {
        Map<String, List<Path>> fileMap = LeipzigHelper.getFiles(leipzigDir);
        for (Map.Entry<String, List<Path>> e : fileMap.entrySet()) {
            ArrayList<String> sentences = new ArrayList<String>();
            for (Path p : e.getValue()) {
                this.addSentences(p, sentences);
            }
            Collections.shuffle(sentences);
            String lang = e.getKey();
            for (int i = 0; i < sentsPerLang; ++i) {
                writer.write(this.row(lang, (String)sentences.get(i)));
            }
        }
    }

    private void addSentences(Path p, List<String> sentences) throws IOException {
        try (BufferedReader reader = Files.newBufferedReader(p, StandardCharsets.UTF_8);){
            String line = reader.readLine();
            while (line != null) {
                int tab = line.indexOf("\t");
                if (tab > -1) {
                    line = line.substring(tab + 1);
                }
                sentences.add(line);
                line = reader.readLine();
            }
        }
    }

    private String row(String lang, String s) {
        s = s.replaceAll("\\s+", " ");
        return lang + "\t" + s + "\n";
    }
}

