use smallvec::SmallVec;
use std::str;

/// String slices pointing to the fields of a borrowed `Entry`'s JSON data.
#[derive(PartialEq, Debug)]
pub struct MetricText<'a> {
    pub family_name: &'a str,
    pub metric_name: &'a str,
    pub labels: SmallVec<[&'a str; 4]>,
    pub values: SmallVec<[&'a str; 4]>,
}

#[derive(PartialEq, Debug)]
struct MetricNames<'a> {
    label_json: &'a str,
    family_name: &'a str,
    metric_name: &'a str,
}

#[derive(PartialEq, Debug)]
struct MetricLabelVals<'a> {
    labels: SmallVec<[&'a str; 4]>,
    values: SmallVec<[&'a str; 4]>,
}

/// Parse Prometheus metric data stored in the following format:
///
/// ["metric","name",["label_a","label_b"],["value_a","value_b"]]
///
/// There will be 1-8 trailing spaces to ensure at least a one-byte
/// gap between the json and value, and to pad to an 8-byte alignment.
/// We strip the surrounding double quotes from all items. Values may
/// or may not have surrounding double quotes, depending on their type.
pub fn parse_metrics(json: &str) -> Option<MetricText> {
    // It would be preferable to use `serde_json` here, but the values
    // may be strings, numbers, or null, and so don't parse easily to a
    // defined struct. Using `serde_json::Value` is an option, but since
    // we're just copying the literal values into a buffer this will be
    // inefficient and verbose.

    // Trim trailing spaces from string before processing. We use
    // `trim_end_matches()` instead of `trim_end()` because we know the
    // trailing bytes are always ASCII 0x20 bytes. `trim_end()` will also
    // check for unicode spaces and consume a few more CPU cycles.
    let trimmed = json.trim_end_matches(' ');

    let names = parse_names(trimmed)?;
    let label_json = names.label_json;

    let label_vals = parse_label_values(label_json)?;

    Some(MetricText {
        family_name: names.family_name,
        metric_name: names.metric_name,
        labels: label_vals.labels,
        values: label_vals.values,
    })
}

fn parse_names(json: &str) -> Option<MetricNames> {
    // Starting with: ["family_name","metric_name",[...
    if !json.starts_with("[\"") {
        return None;
    }

    // Now: family_name","metric_name",[...
    let remainder = json.get(2..)?;

    let names_end = remainder.find('[')?;

    // Save the rest of the slice to parse for labels later.
    let label_json = remainder.get(names_end..)?;

    //  Now: family_name","metric_name",
    let remainder = remainder.get(..names_end)?;

    // Split on commas into:
    // family_name","metric_name",
    // ^^^^one^^^^^ ^^^^^two^^^^^
    let mut token_iter = remainder.split(',');

    // Captured: family_name","metric_name",
    //           ^^^^^^^^^^^
    let family_name = token_iter.next()?.trim_end_matches('"');

    // Captured: "family_name","metric_name",
    //                          ^^^^^^^^^^^
    let metric_name = token_iter.next()?.trim_matches('"');

    // Confirm the final entry of the iter is empty, the the trailing ','.
    if !token_iter.next()?.is_empty() {
        return None;
    }

    Some(MetricNames {
        label_json,
        family_name,
        metric_name,
    })
}

fn parse_label_values(json: &str) -> Option<MetricLabelVals> {
    // Starting with: ["label_a","label_b"],["value_a", "value_b"]]
    if !(json.starts_with('[') && json.ends_with("]]")) {
        return None;
    }

    // Validate we either have the start of a label string or an
    // empty array, e.g. `["` or `[]`.
    if !matches!(json.as_bytes().get(1)?, b'"' | b']') {
        return None;
    }

    // Now: "label_a","label_b"
    let labels_end = json.find(']')?;
    let label_range = json.get(1..labels_end)?;

    let mut labels = SmallVec::new();

    // Split on commas into:
    // "label_a","label_b"
    // ^^^one^^^ ^^^two^^^
    for label in label_range.split(',') {
        // Captured: "label_a","label_b"
        //            ^^^^^^^
        // If there are no labels, e.g. `[][]`, then don't capture anything.
        if !label.is_empty() {
            labels.push(label.trim_matches('"'));
        }
    }

    // Now: ],["value_a", "value_b"]]
    let mut values_range = json.get(labels_end..)?;

    // Validate we have a separating comma with one and only one leading bracket.
    if !(values_range.starts_with("],[") && values_range.as_bytes().get(3)? != &b'[') {
        return None;
    }

    // Now: "value_a", "value_b"]]
    values_range = values_range.get(3..)?;

    let values_end = values_range.find(']')?;

    // Validate we have only two trailing brackets.
    if values_range.get(values_end..)?.len() > 2 {
        return None;
    }

    // Now: "value_a", "value_b"
    values_range = values_range.get(..values_end)?;

    let mut values = SmallVec::new();

    // Split on commas into:
    // "value_a","value_b"
    // ^^^one^^^ ^^^two^^^
    for value in values_range.split(',') {
        // Captured: "value_a","value_b"
        //           ^^^^^^^^^
        // If there are no values, e.g. `[][]`, then don't capture anything.
        if !value.is_empty() {
            values.push(value.trim_matches('"'));
        }
    }

    if values.len() != labels.len() {
        return None;
    }

    Some(MetricLabelVals { labels, values })
}

#[cfg(test)]
mod test {
    use smallvec::smallvec;

    use super::*;

    struct TestCase {
        name: &'static str,
        input: &'static str,
        expected: Option<MetricText<'static>>,
    }

    #[test]
    fn valid_json() {
        let tc = vec![
            TestCase {
                name: "basic",
                input: r#"["metric","name",["label_a","label_b"],["value_a","value_b"]]"#,
                expected: Some(MetricText {
                    family_name: "metric",
                    metric_name: "name",
                    labels: smallvec!["label_a", "label_b"],
                    values: smallvec!["value_a", "value_b"],
                }),
            },
            TestCase {
                name: "many labels",
                input: r#"["metric","name",["label_a","label_b","label_c","label_d","label_e"],["value_a","value_b","value_c","value_d","value_e"]]"#,

                expected: Some(MetricText {
                    family_name: "metric",
                    metric_name: "name",
                    labels: smallvec!["label_a", "label_b", "label_c", "label_d", "label_e"],
                    values: smallvec!["value_a", "value_b", "value_c", "value_d", "value_e"],
                }),
            },
            TestCase {
                name: "numeric value",
                input: r#"["metric","name",["label_a","label_b"],["value_a",403]]"#,
                expected: Some(MetricText {
                    family_name: "metric",
                    metric_name: "name",
                    labels: smallvec!["label_a", "label_b"],
                    values: smallvec!["value_a", "403"],
                }),
            },
            TestCase {
                name: "null value",
                input: r#"["metric","name",["label_a","label_b"],[null,"value_b"]]"#,
                expected: Some(MetricText {
                    family_name: "metric",
                    metric_name: "name",
                    labels: smallvec!["label_a", "label_b"],
                    values: smallvec!["null", "value_b"],
                }),
            },
            TestCase {
                name: "no labels",
                input: r#"["metric","name",[],[]]"#,
                expected: Some(MetricText {
                    family_name: "metric",
                    metric_name: "name",
                    labels: smallvec![],
                    values: smallvec![],
                }),
            },
        ];

        for case in tc {
            assert_eq!(
                parse_metrics(case.input),
                case.expected,
                "test case: {}",
                case.name,
            );
        }
    }

    #[test]
    fn invalid_json() {
        let tc = vec![
            TestCase {
                name: "not json",
                input: "hello, world",
                expected: None,
            },
            TestCase {
                name: "no names",
                input: r#"[["label_a","label_b"],["value_a","value_b"]]"#,
                expected: None,
            },
            TestCase {
                name: "too many names",
                input: r#"["metric","name","unexpected_name",["label_a","label_b"],["value_a","value_b"]]"#,
                expected: None,
            },
            TestCase {
                name: "too many labels",
                input: r#"["metric","name","unexpected_name",["label_a","label_b","label_c"],["value_a","value_b"]]"#,
                expected: None,
            },
            TestCase {
                name: "too many values",
                input: r#"["metric","name",["label_a","label_b"],["value_a","value_b",null]]"#,
                expected: None,
            },
            TestCase {
                name: "no values",
                input: r#"["metric","name",["label_a","label_b"]"#,
                expected: None,
            },
            TestCase {
                name: "no arrays",
                input: r#"["metric","name","label_a","value_a"]"#,
                expected: None,
            },
            TestCase {
                name: "too many leading brackets",
                input: r#"[["metric","name",["label_a","label_b"],["value_a","value_b"]]"#,
                expected: None,
            },
            TestCase {
                name: "too many trailing brackets",
                input: r#"["metric","name",["label_a","label_b"],["value_a","value_b"]]]"#,
                expected: None,
            },
            TestCase {
                name: "too many leading label brackets",
                input: r#"["metric","name",[["label_a","label_b"],["value_a","value_b"]]"#,
                expected: None,
            },
            TestCase {
                name: "too many trailing label brackets",
                input: r#"["metric","name",["label_a","label_b"]],["value_a","value_b"]]"#,
                expected: None,
            },
            TestCase {
                name: "too many leading value brackets",
                input: r#"["metric","name",["label_a","label_b"],[["value_a","value_b"]]"#,
                expected: None,
            },
            TestCase {
                name: "comma in family name",
                input: r#"["met,ric","name",["label_a","label_b"],["value_a","value_b"]]"#,
                expected: None,
            },
            TestCase {
                name: "comma in metric name",
                input: r#"["metric","na,me",["label_a","label_b"],["value_a","value_b"]]"#,
                expected: None,
            },
            TestCase {
                name: "comma in value",
                input: r#"["metric","na,me",["label_a","label_b"],["val,ue_a","value_b"]]"#,
                expected: None,
            },
            TestCase {
                name: "comma in numeric value",
                input: r#"["metric","name",["label_a","label_b"],[400,0,"value_b"]]"#,
                expected: None,
            },
        ];

        for case in tc {
            assert_eq!(
                case.expected,
                parse_metrics(case.input),
                "test case: {}",
                case.name,
            );
        }
    }
}
