/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tokenizers;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.JvmStatic;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.StringsKt;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.languagetool.tokenizers.Tokenizer;
import org.languagetool.tools.StringTools;

@Metadata(mv={1, 1, 16}, bv={1, 0, 3}, k=1, d1={"\u0000,\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0010\u000b\n\u0002\b\u0002\n\u0002\u0010 \n\u0002\b\b\n\u0002\u0010\b\n\u0002\b\u0004\b\u0016\u0018\u0000 \u00172\u00020\u0001:\u0001\u0017B\u0005\u00a2\u0006\u0002\u0010\u0002J\u0010\u0010\u0007\u001a\u00020\b2\u0006\u0010\t\u001a\u00020\u0004H\u0002J\u001c\u0010\n\u001a\b\u0012\u0004\u0012\u00020\u00040\u000b2\f\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u00040\u000bH\u0004J\u001c\u0010\r\u001a\b\u0012\u0004\u0012\u00020\u00040\u000b2\f\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u00040\u000bH\u0004J\u001c\u0010\u000e\u001a\b\u0012\u0004\u0012\u00020\u00040\u000b2\f\u0010\u000f\u001a\b\u0012\u0004\u0012\u00020\u00040\u000bH\u0004J\u0016\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00040\u000b2\u0006\u0010\u0011\u001a\u00020\u0004H\u0016J(\u0010\u0012\u001a\u00020\b2\u0006\u0010\u0013\u001a\u00020\u00142\f\u0010\u000f\u001a\b\u0012\u0004\u0012\u00020\u00040\u000b2\b\u0010\u0015\u001a\u0004\u0018\u00010\u0004H\u0002J\u001e\u0010\u0016\u001a\u00020\b2\u0006\u0010\u0013\u001a\u00020\u00142\f\u0010\u000f\u001a\b\u0012\u0004\u0012\u00020\u00040\u000bH\u0002R\u0014\u0010\u0003\u001a\u00020\u00048VX\u0096\u0004\u00a2\u0006\u0006\u001a\u0004\b\u0005\u0010\u0006\u00a8\u0006\u0018"}, d2={"Lorg/languagetool/tokenizers/WordTokenizer;", "Lorg/languagetool/tokenizers/Tokenizer;", "()V", "tokenizingCharacters", "", "getTokenizingCharacters", "()Ljava/lang/String;", "isProtocol", "", "token", "joinEMails", "", "list", "joinEMailsAndUrls", "joinUrls", "l", "tokenize", "text", "urlEndsAt", "i", "", "urlQuote", "urlStartsAt", "Companion", "languagetool-core"})
public class WordTokenizer
implements Tokenizer {
    @NotNull
    private static final List<String> protocols;
    private static final Pattern URL_CHARS;
    private static final Pattern DOMAIN_CHARS;
    private static final Pattern NO_PROTOCOL_URL;
    private static final Pattern E_MAIL;
    private static final String TOKENIZING_CHARACTERS = " \u00a0\u115f\u1160\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u200c\u200d\u200e\u200f\u2028\u2029\u202a\u202b\u202c\u202d\u202e\u202f\u205f\u2060\u2061\u2062\u2063\u206a\u206b\u206c\u206d\u206e\u206f\u3000\u3164\ufeff\uffa0\ufff9\ufffa\ufffb,.;()[]{}=*#\u2217\u00d7\u00b7+\u00f7<>!?:/|\\\"'\u00ab\u00bb\u201e\u201d\u201c`\u00b4\u2018\u2019\u201b\u2032\u203a\u2039\u2026\u00bf\u00a1\u2192\u203c\u2047\u2048\u2049_\u2014\t\n\r";
    public static final Companion Companion;

    @Override
    @NotNull
    public List<String> tokenize(@NotNull String text) {
        Intrinsics.checkParameterIsNotNull((Object)text, (String)"text");
        List l = new ArrayList();
        StringTokenizer st = new StringTokenizer(text, this.getTokenizingCharacters(), true);
        while (st.hasMoreElements()) {
            String string = st.nextToken();
            Intrinsics.checkExpressionValueIsNotNull((Object)string, (String)"st.nextToken()");
            l.add(string);
        }
        return this.joinEMailsAndUrls(l);
    }

    @NotNull
    public String getTokenizingCharacters() {
        return TOKENIZING_CHARACTERS;
    }

    @NotNull
    protected final List<String> joinEMailsAndUrls(@NotNull List<String> list) {
        Intrinsics.checkParameterIsNotNull(list, (String)"list");
        return this.joinUrls(this.joinEMails(list));
    }

    @NotNull
    protected final List<String> joinEMails(@NotNull List<String> list) {
        Intrinsics.checkParameterIsNotNull(list, (String)"list");
        String text = CollectionsKt.joinToString$default((Iterable)list, (CharSequence)"", null, null, (int)0, null, null, (int)62, null);
        if (StringsKt.contains$default((CharSequence)text, (CharSequence)"@", (boolean)false, (int)2, null) && E_MAIL.matcher(text).find()) {
            Matcher matcher = E_MAIL.matcher(text);
            List l = new ArrayList();
            int currentPosition = 0;
            int start = 0;
            int end = 0;
            int idx = 0;
            while (matcher.find()) {
                start = matcher.start();
                end = matcher.end();
                while (currentPosition < end) {
                    if (currentPosition < start) {
                        l.add(list.get(idx));
                    } else if (currentPosition == start) {
                        String string = matcher.group();
                        Intrinsics.checkExpressionValueIsNotNull((Object)string, (String)"matcher.group()");
                        l.add(string);
                    }
                    currentPosition += list.get(idx).length();
                    ++idx;
                }
            }
            if (currentPosition < text.length()) {
                l.addAll((Collection)list.subList(idx, list.size()));
            }
            return l;
        }
        return list;
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    protected final List<String> joinUrls(@NotNull List<String> l) {
        Intrinsics.checkParameterIsNotNull(l, (String)"l");
        List newList = new ArrayList();
        boolean inUrl = false;
        StringBuilder url = new StringBuilder();
        String urlQuote = null;
        int n = 0;
        int n2 = ((Collection)l).size();
        while (n < n2) {
            void i;
            if (this.urlStartsAt((int)i, l)) {
                inUrl = true;
                if (i - true >= 0) {
                    urlQuote = l.get((int)(i - true));
                }
                url.append(l.get((int)i));
            } else if (inUrl && this.urlEndsAt((int)i, l, urlQuote)) {
                inUrl = false;
                urlQuote = null;
                String string = url.toString();
                Intrinsics.checkExpressionValueIsNotNull((Object)string, (String)"url.toString()");
                newList.add(string);
                url.setLength(0);
                newList.add(l.get((int)i));
            } else if (inUrl) {
                url.append(l.get((int)i));
            } else {
                newList.add(l.get((int)i));
            }
            ++i;
        }
        CharSequence charSequence = url;
        n2 = 0;
        if (charSequence.length() > 0) {
            String string = url.toString();
            Intrinsics.checkExpressionValueIsNotNull((Object)string, (String)"url.toString()");
            newList.add(string);
        }
        return newList;
    }

    private final boolean urlStartsAt(int i, List<String> l) {
        String nnToken;
        String nToken;
        String token = l.get(i);
        if (this.isProtocol(token) && l.size() > i + 3) {
            nToken = l.get(i + 1);
            nnToken = l.get(i + 2);
            String nnnToken = l.get(i + 3);
            if (Intrinsics.areEqual((Object)nToken, (Object)":") && Intrinsics.areEqual((Object)nnToken, (Object)"/") && Intrinsics.areEqual((Object)nnnToken, (Object)"/")) {
                return true;
            }
        }
        if (l.size() > i + 1) {
            nToken = l.get(i);
            nnToken = l.get(i + 1);
            if (Intrinsics.areEqual((Object)nToken, (Object)"www") && Intrinsics.areEqual((Object)nnToken, (Object)".")) {
                return true;
            }
        }
        return l.size() > i + 3 && Intrinsics.areEqual((Object)l.get(i + 1), (Object)".") && Intrinsics.areEqual((Object)l.get(i + 3), (Object)"/") && DOMAIN_CHARS.matcher(token).matches() && DOMAIN_CHARS.matcher(l.get(i + 2)).matches() ? true : l.size() > i + 5 && Intrinsics.areEqual((Object)l.get(i + 1), (Object)".") && Intrinsics.areEqual((Object)l.get(i + 3), (Object)".") && Intrinsics.areEqual((Object)l.get(i + 5), (Object)"/") && DOMAIN_CHARS.matcher(token).matches() && DOMAIN_CHARS.matcher(l.get(i + 2)).matches() && DOMAIN_CHARS.matcher(l.get(i + 4)).matches();
    }

    private final boolean isProtocol(String token) {
        return protocols.contains(token);
    }

    private final boolean urlEndsAt(int i, List<String> l, String urlQuote) {
        String token = l.get(i);
        if (StringTools.isWhitespace(token) || Intrinsics.areEqual((Object)token, (Object)")") || Intrinsics.areEqual((Object)token, (Object)"]")) {
            return true;
        }
        if (l.size() > i + 1) {
            String nextToken = l.get(i + 1);
            if ((StringTools.isWhitespace(nextToken) || StringUtils.equalsAny((CharSequence)nextToken, (CharSequence[])new CharSequence[]{"\"", "\u00bb", "\u00ab", "\u2018", "\u2019", "\u201c", "\u201d", "'", "."})) && (StringUtils.equalsAny((CharSequence)token, (CharSequence[])new CharSequence[]{".", ",", ";", ":", "!", "?"}) || Intrinsics.areEqual((Object)token, (Object)urlQuote))) {
                return true;
            }
            if (!URL_CHARS.matcher(token).matches()) {
                return true;
            }
        } else if (!URL_CHARS.matcher(token).matches() || Intrinsics.areEqual((Object)token, (Object)".")) {
            return true;
        }
        return false;
    }

    static {
        Companion = new Companion(null);
        protocols = CollectionsKt.listOf((Object[])new String[]{"http", "https", "ftp"});
        URL_CHARS = Pattern.compile("[a-zA-Z0-9/%$-_.+!*'(),\\?#]+");
        DOMAIN_CHARS = Pattern.compile("[a-zA-Z0-9][a-zA-Z0-9-]+");
        NO_PROTOCOL_URL = Pattern.compile("([a-zA-Z0-9][a-zA-Z0-9-]+\\.)?([a-zA-Z0-9][a-zA-Z0-9-]+)\\.([a-zA-Z0-9][a-zA-Z0-9-]+)/.*");
        E_MAIL = Pattern.compile("(?<!:)\\b[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\])|(([a-zA-Z\\-0-9]+\\.)+[a-zA-Z]{2,}))\\b");
    }

    @JvmStatic
    public static final boolean isUrl(@NotNull String token) {
        return Companion.isUrl(token);
    }

    @JvmStatic
    public static final boolean isEMail(@NotNull String token) {
        return Companion.isEMail(token);
    }

    @Metadata(mv={1, 1, 16}, bv={1, 0, 3}, k=1, d1={"\u0000,\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0010 \n\u0002\b\u0003\n\u0002\u0010\u000b\n\u0002\b\u0003\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002J\u0010\u0010\u000f\u001a\u00020\u00102\u0006\u0010\u0011\u001a\u00020\tH\u0007J\u0010\u0010\u0012\u001a\u00020\u00102\u0006\u0010\u0011\u001a\u00020\tH\u0007R\u0016\u0010\u0003\u001a\n \u0005*\u0004\u0018\u00010\u00040\u0004X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0016\u0010\u0006\u001a\n \u0005*\u0004\u0018\u00010\u00040\u0004X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0016\u0010\u0007\u001a\n \u0005*\u0004\u0018\u00010\u00040\u0004X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\b\u001a\u00020\tX\u0082T\u00a2\u0006\u0002\n\u0000R\u0016\u0010\n\u001a\n \u0005*\u0004\u0018\u00010\u00040\u0004X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0017\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\t0\f\u00a2\u0006\b\n\u0000\u001a\u0004\b\r\u0010\u000e\u00a8\u0006\u0013"}, d2={"Lorg/languagetool/tokenizers/WordTokenizer$Companion;", "", "()V", "DOMAIN_CHARS", "Ljava/util/regex/Pattern;", "kotlin.jvm.PlatformType", "E_MAIL", "NO_PROTOCOL_URL", "TOKENIZING_CHARACTERS", "", "URL_CHARS", "protocols", "", "getProtocols", "()Ljava/util/List;", "isEMail", "", "token", "isUrl", "languagetool-core"})
    public static final class Companion {
        @NotNull
        public final List<String> getProtocols() {
            return protocols;
        }

        @JvmStatic
        public final boolean isUrl(@NotNull String token) {
            Intrinsics.checkParameterIsNotNull((Object)token, (String)"token");
            for (String protocol : this.getProtocols()) {
                if (!StringsKt.startsWith$default((String)token, (String)(protocol + "://"), (boolean)false, (int)2, null) && !StringsKt.startsWith$default((String)token, (String)"www.", (boolean)false, (int)2, null)) continue;
                return true;
            }
            return NO_PROTOCOL_URL.matcher(token).matches();
        }

        @JvmStatic
        public final boolean isEMail(@NotNull String token) {
            Intrinsics.checkParameterIsNotNull((Object)token, (String)"token");
            return E_MAIL.matcher(token).matches();
        }

        private Companion() {
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

