/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.clustering.lingo;

import com.carrotsearch.hppc.BitSet;
import java.util.Collections;
import java.util.List;
import org.carrot2.clustering.lingo.ClusterBuilder;
import org.carrot2.clustering.lingo.LingoProcessingContext;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.CommonAttributes;
import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.shaded.guava.common.collect.Lists;
import org.carrot2.text.clustering.IMonolingualClusteringAlgorithm;
import org.carrot2.text.clustering.MultilingualClustering;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.pipeline.CompletePreprocessingPipeline;
import org.carrot2.text.preprocessing.pipeline.IPreprocessingPipeline;
import org.carrot2.text.vsm.ReducedVectorSpaceModelContext;
import org.carrot2.text.vsm.TermDocumentMatrixBuilder;
import org.carrot2.text.vsm.TermDocumentMatrixReducer;
import org.carrot2.text.vsm.VectorSpaceModelContext;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.Output;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.DoubleRange;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix="LingoClusteringAlgorithm", inherit={CommonAttributes.class})
@Label(value="Lingo Clustering")
public class LingoClusteringAlgorithm
extends ProcessingComponentBase
implements IClusteringAlgorithm {
    @Processing
    @Input
    @Internal
    @Attribute(key="query", inherit=true)
    public String query = null;
    @Processing
    @Input
    @Required
    @Internal
    @Attribute(key="documents", inherit=true)
    public List<Document> documents;
    @Processing
    @Output
    @Internal
    @Attribute(key="clusters", inherit=true)
    public List<Cluster> clusters = null;
    @Input
    @Processing
    @Attribute
    @DoubleRange(min=0.0, max=1.0)
    @Label(value="Size-Score sorting ratio")
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Clusters")
    public double scoreWeight = 0.0;
    @Input
    @Processing
    @Attribute
    @IntRange(min=2, max=100)
    @Label(value="Cluster count base")
    @Level(value=AttributeLevel.BASIC)
    @Group(value="Clusters")
    public int desiredClusterCountBase = 30;
    @Init
    @Input
    @Attribute
    @Internal
    @ImplementingClasses(classes={}, strict=false)
    @Level(value=AttributeLevel.ADVANCED)
    public IPreprocessingPipeline preprocessingPipeline = new CompletePreprocessingPipeline();
    public final TermDocumentMatrixBuilder matrixBuilder = new TermDocumentMatrixBuilder();
    public final TermDocumentMatrixReducer matrixReducer = new TermDocumentMatrixReducer();
    public final ClusterBuilder clusterBuilder = new ClusterBuilder();
    public final LabelFormatter labelFormatter = new LabelFormatter();
    public final MultilingualClustering multilingualClustering = new MultilingualClustering();

    @Override
    public void process() throws ProcessingException {
        List<Document> originalDocuments = this.documents;
        this.clusters = this.multilingualClustering.process(this.documents, new IMonolingualClusteringAlgorithm(){

            @Override
            public List<Cluster> process(List<Document> documents, LanguageCode language) {
                LingoClusteringAlgorithm.this.documents = documents;
                LingoClusteringAlgorithm.this.cluster(language);
                return LingoClusteringAlgorithm.this.clusters;
            }
        });
        this.documents = originalDocuments;
    }

    private void cluster(LanguageCode language) {
        PreprocessingContext context = this.preprocessingPipeline.preprocess(this.documents, this.query, language);
        this.clusters = Lists.newArrayList();
        if (context.hasLabels()) {
            VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(context);
            ReducedVectorSpaceModelContext reducedVsmContext = new ReducedVectorSpaceModelContext(vsmContext);
            LingoProcessingContext lingoContext = new LingoProcessingContext(reducedVsmContext);
            this.matrixBuilder.buildTermDocumentMatrix(vsmContext);
            this.matrixBuilder.buildTermPhraseMatrix(vsmContext);
            this.matrixReducer.reduce(reducedVsmContext, LingoClusteringAlgorithm.computeClusterCount(this.desiredClusterCountBase, this.documents.size()));
            this.clusterBuilder.buildLabels(lingoContext, this.matrixBuilder.termWeighting);
            this.clusterBuilder.assignDocuments(lingoContext);
            this.clusterBuilder.merge(lingoContext);
            int[] clusterLabelIndex = lingoContext.clusterLabelFeatureIndex;
            BitSet[] clusterDocuments = lingoContext.clusterDocuments;
            double[] clusterLabelScore = lingoContext.clusterLabelScore;
            for (int i = 0; i < clusterLabelIndex.length; ++i) {
                Cluster cluster = new Cluster();
                int labelFeature = clusterLabelIndex[i];
                if (labelFeature < 0) continue;
                cluster.addPhrases(this.labelFormatter.format(context, labelFeature));
                cluster.setAttribute("score", clusterLabelScore[i]);
                BitSet bs = clusterDocuments[i];
                int bit = bs.nextSetBit(0);
                while (bit >= 0) {
                    cluster.addDocuments(this.documents.get(bit));
                    bit = bs.nextSetBit(bit + 1);
                }
                this.clusters.add(cluster);
            }
            Collections.sort(this.clusters, Cluster.byReversedWeightedScoreAndSizeComparator(this.scoreWeight));
        }
        Cluster.appendOtherTopics(this.documents, this.clusters);
    }

    static int computeClusterCount(int desiredClusterCountBase, int documentCount) {
        return Math.min((int)((double)desiredClusterCountBase / 10.0 * Math.sqrt(documentCount)), documentCount);
    }
}

