/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.CategoryWordTag;
import edu.stanford.nlp.ling.CategoryWordTagFactory;
import edu.stanford.nlp.ling.StringLabel;
import edu.stanford.nlp.ling.StringLabelFactory;
import edu.stanford.nlp.parser.lexparser.CNFTransformers;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.PostSplitter;
import edu.stanford.nlp.parser.lexparser.Train;
import edu.stanford.nlp.parser.lexparser.TreeAnnotator;
import edu.stanford.nlp.parser.lexparser.TreeBinarizer;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Pair;
import java.io.FileFilter;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

public class TreeAnnotatorAndBinarizer
implements TreeTransformer {
    private TreeFactory tf;
    private TreebankLanguagePack tlp;
    private TreeTransformer annotator;
    private TreeTransformer binarizer;
    private TreeTransformer postSplitter;
    private boolean forceCNF;
    private ClassicCounter<Tree> annotatedRuleCounts;
    private ClassicCounter<String> annotatedStateCounts;

    public TreeAnnotatorAndBinarizer(TreebankLangParserParams tlpParams, boolean forceCNF, boolean insideFactor, boolean doSubcategorization) {
        this(tlpParams.headFinder(), tlpParams.headFinder(), tlpParams, forceCNF, insideFactor, doSubcategorization);
    }

    public TreeAnnotatorAndBinarizer(HeadFinder annotationHF, HeadFinder binarizationHF, TreebankLangParserParams tlpParams, boolean forceCNF, boolean insideFactor, boolean doSubcategorization) {
        this.annotator = doSubcategorization ? new TreeAnnotator(annotationHF, tlpParams) : new TreeNullAnnotator(annotationHF);
        this.binarizer = new TreeBinarizer(binarizationHF, tlpParams.treebankLanguagePack(), insideFactor, Train.markovFactor, Train.markovOrder, Train.compactGrammar() > 0, Train.compactGrammar() > 1, Train.HSEL_CUT, Train.markFinalStates);
        if (Train.selectivePostSplit) {
            this.postSplitter = new PostSplitter(tlpParams);
        }
        this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory());
        this.tlp = tlpParams.treebankLanguagePack();
        this.forceCNF = forceCNF;
        if (Train.printAnnotatedRuleCounts) {
            this.annotatedRuleCounts = new ClassicCounter();
        }
        if (Train.printAnnotatedStateCounts) {
            this.annotatedStateCounts = new ClassicCounter();
        }
    }

    public void dumpStats() {
        if (Train.selectivePostSplit) {
            ((PostSplitter)this.postSplitter).dumpStats();
        }
    }

    public void setDoSelectiveSplit(boolean doSelectiveSplit) {
        ((TreeBinarizer)this.binarizer).setDoSelectiveSplit(doSelectiveSplit);
    }

    public void addRoot(Tree t) {
        if (t.isLeaf()) {
            System.err.println("Warning: tree is leaf: " + t);
            t = this.tf.newTreeNode(this.tlp.startSymbol(), Collections.singletonList(t));
        }
        t.setLabel(new CategoryWordTag(this.tlp.startSymbol(), ".$.", ".$$."));
        ArrayList<Tree> preTermChildList = new ArrayList<Tree>();
        Tree boundaryTerm = this.tf.newLeaf(new StringLabel(".$."));
        preTermChildList.add(boundaryTerm);
        Tree boundaryPreTerm = this.tf.newTreeNode(new CategoryWordTag(".$$.", ".$.", ".$$."), preTermChildList);
        List<Tree> childList = t.getChildrenAsList();
        childList.add(boundaryPreTerm);
        t.setChildren(childList);
    }

    public Tree transformTree(Tree t) {
        if (Train.printTreeTransformations > 0) {
            Train.printTrainTree(null, "ORIGINAL TREE:", t);
        }
        Tree trTree = this.annotator.transformTree(t);
        if (Train.selectivePostSplit) {
            trTree = this.postSplitter.transformTree(trTree);
        }
        if (Train.printTreeTransformations > 0) {
            Train.printTrainTree(Train.printAnnotatedPW, "ANNOTATED TREE:", trTree);
        }
        if (Train.printAnnotatedRuleCounts) {
            Tree tr2 = trTree.deeperCopy(new LabeledScoredTreeFactory(), new StringLabelFactory());
            Set<Tree> localTrees = tr2.localTrees();
            for (Tree tr : localTrees) {
                this.annotatedRuleCounts.incrementCount(tr);
            }
        }
        if (Train.printAnnotatedStateCounts) {
            for (Tree subt : trTree) {
                if (subt.isLeaf()) continue;
                this.annotatedStateCounts.incrementCount(subt.label().value());
            }
        }
        this.addRoot(trTree);
        Tree binarizedTree = this.binarizer.transformTree(trTree);
        if (Train.printTreeTransformations > 0) {
            Train.printTrainTree(Train.printBinarizedPW, "BINARIZED TREE:", binarizedTree);
            --Train.printTreeTransformations;
        }
        if (this.forceCNF) {
            binarizedTree = new CNFTransformers.ToCNFTransformer().transformTree(binarizedTree);
        }
        return binarizedTree;
    }

    public void printRuleCounts() {
        System.err.println();
        for (Tree t : this.annotatedRuleCounts.keySet()) {
            System.err.print(this.annotatedRuleCounts.getCount(t) + "\t" + t.label().value() + " -->");
            for (Tree dtr : t.getChildrenAsList()) {
                System.err.print(" ");
                System.err.print(dtr.label().value());
            }
            System.err.println();
        }
    }

    public void printStateCounts() {
        System.err.println();
        System.err.println("Annotated state counts");
        Set<String> keys = this.annotatedStateCounts.keySet();
        ArrayList<String> keyList = new ArrayList<String>(keys);
        Collections.sort(keyList);
        for (String s : keyList) {
            System.err.println(s + "\t" + this.annotatedStateCounts.getCount(s));
        }
    }

    private static int numSubArgs(String[] args, int index) {
        int i = index;
        while (i + 1 < args.length && args[i + 1].charAt(0) != '-') {
            ++i;
        }
        return i - index;
    }

    public static void main(String[] args) {
        Options op = new Options();
        String treebankPath = null;
        FileFilter trainFilter = null;
        int i = 0;
        while (i < args.length && args[i].startsWith("-")) {
            if (args[i].equalsIgnoreCase("-train")) {
                int numSubArgs = TreeAnnotatorAndBinarizer.numSubArgs(args, i);
                ++i;
                if (numSubArgs >= 1) {
                    treebankPath = args[i];
                    ++i;
                } else {
                    throw new RuntimeException("Error: -train option must have treebankPath as first argument.");
                }
                if (numSubArgs == 2) {
                    trainFilter = new NumberRangesFileFilter(args[i++], true);
                    continue;
                }
                if (numSubArgs < 3) continue;
                int low = Integer.parseInt(args[i]);
                int high = Integer.parseInt(args[i + 1]);
                trainFilter = new NumberRangeFileFilter(low, high, true);
                i += 2;
                continue;
            }
            i = op.setOption(args, i);
        }
        if (i < args.length) {
            System.err.println("usage: java TreeAnnotatorAndBinarizer options*");
            System.err.println("  Options are like for lexicalized parser including -train treebankPath fileRange]");
            System.exit(0);
        }
        System.err.println("Annotating from treebank dir: " + treebankPath);
        DiskTreebank trainTreebank = op.tlpParams.diskTreebank();
        if (trainFilter == null) {
            trainTreebank.loadPath(treebankPath);
        } else {
            trainTreebank.loadPath(treebankPath, trainFilter);
        }
        Pair<List<Tree>, List<Tree>> pair = LexicalizedParser.getAnnotatedBinaryTreebankFromTreebank(trainTreebank, null, op);
        List<Tree> binaryTrainTreebank = pair.first();
        Iterator it = ((AbstractCollection)trainTreebank).iterator();
        for (Tree t : binaryTrainTreebank) {
            System.out.println("Original tree:");
            ((Tree)it.next()).pennPrint();
            System.out.println("Binarized tree:");
            t.pennPrint();
            System.out.println();
        }
    }

    static class TreeNullAnnotator
    implements TreeTransformer {
        private TreeFactory tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory());
        private HeadFinder hf;

        public Tree transformTree(Tree t) {
            Tree copy = t.deepCopy(this.tf);
            return this.transformTreeHelper(copy);
        }

        private Tree transformTreeHelper(Tree t) {
            if (t != null) {
                String cat = t.label().value();
                if (t.isLeaf()) {
                    StringLabel label = new StringLabel(cat);
                    t.setLabel(label);
                } else {
                    String tag;
                    String word;
                    Tree[] kids;
                    for (Tree child : kids = t.children()) {
                        this.transformTreeHelper(child);
                    }
                    Tree headChild = this.hf.determineHead(t);
                    if (headChild == null) {
                        System.err.println("ERROR: null head for tree\n" + t.toString());
                        word = null;
                        tag = null;
                    } else if (headChild.isLeaf()) {
                        tag = cat;
                        word = headChild.label().value();
                    } else {
                        CategoryWordTag headLabel = (CategoryWordTag)headChild.label();
                        word = headLabel.word();
                        tag = headLabel.tag();
                    }
                    CategoryWordTag label = new CategoryWordTag(cat, word, tag);
                    t.setLabel(label);
                }
            }
            return t;
        }

        public TreeNullAnnotator(HeadFinder hf) {
            this.hf = hf;
        }
    }
}

