/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.linguistic;

import java.io.IOException;
import java.io.StringReader;
import java.util.EnumMap;
import org.carrot2.core.LanguageCode;
import org.carrot2.shaded.guava.common.base.Predicate;
import org.carrot2.shaded.guava.common.collect.Maps;
import org.carrot2.text.analysis.ExtendedWhitespaceTokenizer;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.linguistic.ITokenizerFactory;
import org.carrot2.text.linguistic.JapaneseUnsupportedStub;
import org.carrot2.text.linguistic.lucene.ChineseTokenizerAdapter;
import org.carrot2.text.linguistic.lucene.ThaiTokenizerAdapter;
import org.carrot2.util.annotations.ThreadSafe;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.factory.FallbackFactory;
import org.carrot2.util.factory.IFactory;
import org.carrot2.util.factory.NewClassInstanceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Bindable
@ThreadSafe
public class DefaultTokenizerFactory
implements ITokenizerFactory {
    private static final Logger logger = LoggerFactory.getLogger(DefaultTokenizerFactory.class);
    private static final EnumMap<LanguageCode, IFactory<ITokenizer>> tokenizerFactories;
    private static final Predicate<ITokenizer> tokenizerVerifier;

    @Override
    public ITokenizer getTokenizer(LanguageCode languageCode) {
        return tokenizerFactories.get((Object)languageCode).createInstance();
    }

    private static EnumMap<LanguageCode, IFactory<ITokenizer>> createDefaultTokenizers() {
        EnumMap map = Maps.newEnumMap(LanguageCode.class);
        NewClassInstanceFactory<ExtendedWhitespaceTokenizer> whitespaceTokenizerFactory = new NewClassInstanceFactory<ExtendedWhitespaceTokenizer>(ExtendedWhitespaceTokenizer.class);
        for (LanguageCode lc : LanguageCode.values()) {
            map.put(lc, whitespaceTokenizerFactory);
        }
        map.put(LanguageCode.CHINESE_SIMPLIFIED, new NewClassInstanceFactory<ChineseTokenizerAdapter>(ChineseTokenizerAdapter.class));
        map.put(LanguageCode.THAI, new NewClassInstanceFactory<ThaiTokenizerAdapter>(ThaiTokenizerAdapter.class));
        map.put(LanguageCode.JAPANESE, new JapaneseUnsupportedStub());
        for (LanguageCode lc : LanguageCode.values()) {
            if (map.containsKey((Object)lc)) {
                IFactory factory = (IFactory)map.get((Object)lc);
                if (factory == whitespaceTokenizerFactory) continue;
                map.put(lc, new FallbackFactory<ITokenizer>(factory, whitespaceTokenizerFactory, tokenizerVerifier, logger, "Tokenizer for " + lc.toString() + " (" + lc.getIsoCode() + ") is not available. This may degrade clustering quality of " + lc.toString() + " content. Cause: {}"));
                continue;
            }
            map.put(lc, whitespaceTokenizerFactory);
        }
        return map;
    }

    static {
        tokenizerVerifier = new Predicate<ITokenizer>(){

            public boolean apply(ITokenizer tokenizer) {
                try {
                    tokenizer.reset(new StringReader("verify"));
                    tokenizer.nextToken();
                }
                catch (IOException e) {
                    throw new RuntimeException(e);
                }
                return true;
            }
        };
        tokenizerFactories = DefaultTokenizerFactory.createDefaultTokenizers();
    }
}

