use crate::cursor;
use crate::extractor::pre_processors::pre_processor::PreProcessor;
use bstr::ByteSlice;

#[derive(Debug, Default)]
pub struct Clojure;

/// This is meant to be a rough estimate of a valid ClojureScript keyword
///
/// This can be approximated by the following regex:
/// /::?[a-zA-Z0-9!#$%&*+./:<=>?_|-]+/
///
/// However, keywords are intended to be detected as utilities. Since the set
/// of valid characters in a utility (outside of arbitrary values) is smaller,
/// along with the fact that neither `[]` nor `()` are allowed in keywords we
/// can simplify this list quite a bit.
#[inline]
fn is_keyword_character(byte: u8) -> bool {
    (matches!(
        byte,
        b'!' | b'#' | b'%' | b'*' | b'+' | b'-' | b'.' | b'/' | b':' | b'_'
    ) | byte.is_ascii_alphanumeric())
}

impl PreProcessor for Clojure {
    fn process(&self, content: &[u8]) -> Vec<u8> {
        let content = content
            .replace(":class", "      ")
            .replace(":className", "          ");
        let len = content.len();
        let mut result = content.to_vec();
        let mut cursor = cursor::Cursor::new(&content);

        while cursor.pos < len {
            match cursor.curr {
                // Consume strings as-is
                b'"' => {
                    result[cursor.pos] = b' ';
                    cursor.advance();

                    while cursor.pos < len {
                        match cursor.curr {
                            // Escaped character, skip ahead to the next character
                            b'\\' => cursor.advance_twice(),

                            // End of the string
                            b'"' => {
                                result[cursor.pos] = b' ';
                                break;
                            }

                            // Everything else is valid
                            _ => cursor.advance(),
                        };
                    }
                }

                // Discard line comments until the end of the line.
                // Comments start with `;;`
                b';' if matches!(cursor.next, b';') => {
                    while cursor.pos < len && cursor.curr != b'\n' {
                        result[cursor.pos] = b' ';
                        cursor.advance();
                    }
                }

                // Consume keyword until a terminating character is reached.
                b':' => {
                    result[cursor.pos] = b' ';
                    cursor.advance();

                    while cursor.pos < len {
                        match cursor.curr {
                            // A `.` surrounded by digits is a decimal number, so we don't want to replace it.
                            //
                            // E.g.:
                            // ```
                            // gap-1.5
                            //      ^
                            // ```
                            b'.' if cursor.prev.is_ascii_digit()
                                && cursor.next.is_ascii_digit() =>
                            {
                                // Keep the `.` as-is
                            }
                            // A `.` not surrounded by digits denotes the start of a new class name in a
                            // dot-delimited keyword.
                            //
                            // E.g.:
                            // ```
                            // flex.gap-1.5
                            //     ^
                            // ```
                            b'.' => {
                                result[cursor.pos] = b' ';
                            }
                            // End of keyword.
                            _ if !is_keyword_character(cursor.curr) => {
                                result[cursor.pos] = b' ';
                                break;
                            }

                            // Consume everything else.
                            _ => {}
                        };

                        cursor.advance();
                    }
                }

                // Handle quote with a list, e.g.: `'(…)`
                // and with a vector, e.g.: `'[…]`
                b'\'' if matches!(cursor.next, b'[' | b'(') => {
                    result[cursor.pos] = b' ';
                    cursor.advance();
                    result[cursor.pos] = b' ';
                    let end = match cursor.curr {
                        b'[' => b']',
                        b'(' => b')',
                        _ => unreachable!(),
                    };

                    // Consume until the closing `]`
                    while cursor.pos < len {
                        match cursor.curr {
                            x if x == end => {
                                result[cursor.pos] = b' ';
                                break;
                            }

                            // Consume strings as-is
                            b'"' => {
                                result[cursor.pos] = b' ';
                                cursor.advance();

                                while cursor.pos < len {
                                    match cursor.curr {
                                        // Escaped character, skip ahead to the next character
                                        b'\\' => cursor.advance_twice(),

                                        // End of the string
                                        b'"' => {
                                            result[cursor.pos] = b' ';
                                            break;
                                        }

                                        // Everything else is valid
                                        _ => cursor.advance(),
                                    };
                                }
                            }
                            _ => {}
                        };

                        cursor.advance();
                    }
                }

                // Handle quote with a keyword, e.g.: `'bg-white`
                b'\'' if !cursor.next.is_ascii_whitespace() => {
                    result[cursor.pos] = b' ';
                    cursor.advance();

                    while cursor.pos < len {
                        match cursor.curr {
                            // End of keyword.
                            _ if !is_keyword_character(cursor.curr) => {
                                result[cursor.pos] = b' ';
                                break;
                            }

                            // Consume everything else.
                            _ => {}
                        };

                        cursor.advance();
                    }
                }

                // Aggressively discard everything else, reducing false positives and preventing
                // characters surrounding keywords from producing false negatives.
                // E.g.:
                // ```
                // (when condition :bg-white)
                //                          ^
                // ```
                // A ')' is never a valid part of a keyword, but will nonetheless prevent 'bg-white'
                // from being extracted if not discarded.
                _ => {
                    result[cursor.pos] = b' ';
                }
            };

            cursor.advance();
        }

        result
    }
}

#[cfg(test)]
mod tests {
    use super::Clojure;
    use crate::extractor::pre_processors::pre_processor::PreProcessor;

    #[test]
    fn test_clojure_pre_processor() {
        for (input, expected) in [
            (":div.flex-1.flex-2", " div flex-1 flex-2"),
            (
                ":.flex-3.flex-4 ;defaults to div",
                "  flex-3 flex-4                 ",
            ),
            ("{:class :flex-5.flex-6", "         flex-5 flex-6"),
            (r#"{:class "flex-7 flex-8"}"#, r#"         flex-7 flex-8  "#),
            (
                r#"{:class  ["flex-9" :flex-10]}"#,
                r#"           flex-9   flex-10  "#,
            ),
            (
                r#"(dom/div {:class "flex-11 flex-12"})"#,
                r#"                  flex-11 flex-12   "#,
            ),
            ("(dom/div :.flex-13.flex-14", "           flex-13 flex-14"),
            (
                r#"[:div#hello.bg-white.pr-1.5 {:class ["grid grid-cols-[auto,1fr] grid-rows-2"]}]"#,
                r#"  div#hello bg-white pr-1.5           grid grid-cols-[auto,1fr] grid-rows-2    "#,
            ),
        ] {
            Clojure::test(input, expected);
        }
    }

    #[test]
    fn test_extract_candidates() {
        // https://github.com/luckasRanarison/tailwind-tools.nvim/issues/68#issuecomment-2660951258
        let input = r#"
            :div.c1.c2
            :.c3.c4 ;defaults to div
            {:class :c5.c6
            {:class "c7 c8"}
            {:class  ["c9" :c10]}
            (dom/div {:class "c11 c12"})
            (dom/div :.c13.c14
            {:className :c15.c16
            {:className "c17 c18"}
            {:className  ["c19" :c20]}
            (dom/div {:className "c21 c22"})
        "#;

        Clojure::test_extract_contains(
            input,
            vec![
                "c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8", "c9", "c10", "c11", "c12", "c13",
                "c14", "c15", "c16", "c17", "c18", "c19", "c20", "c21", "c22",
            ],
        );

        // Similar structure but using real classes
        let input = r#"
            :div.flex-1.flex-2
            :.flex-3.flex-4 ;defaults to div
            {:class :flex-5.flex-6
            {:class "flex-7 flex-8"}
            {:class  ["flex-9" :flex-10]}
            (dom/div {:class "flex-11 flex-12"})
            (dom/div :.flex-13.flex-14
            {:className :flex-15.flex-16
            {:className "flex-17 flex-18"}
            {:className  ["flex-19" :flex-20]}
            (dom/div {:className "flex-21 flex-22"})
        "#;

        Clojure::test_extract_contains(
            input,
            vec![
                "flex-1", "flex-2", "flex-3", "flex-4", "flex-5", "flex-6", "flex-7", "flex-8",
                "flex-9", "flex-10", "flex-11", "flex-12", "flex-13", "flex-14", "flex-15",
                "flex-16", "flex-17", "flex-18", "flex-19", "flex-20", "flex-21", "flex-22",
            ],
        );
    }

    #[test]
    fn test_special_characters_are_valid_in_strings() {
        // In this case the `:` and `.` should not be replaced by ` ` because they are inside a
        // string.
        let input = r#"
            (dom/div {:class "hover:flex px-1.5"})
        "#;

        Clojure::test_extract_contains(input, vec!["hover:flex", "px-1.5"]);
    }

    #[test]
    fn test_ignore_comments_with_invalid_strings() {
        let input = r#"
            ;; This is an unclosed string: "
            (dom/div {:class "hover:flex px-1.5"})
        "#;

        Clojure::test_extract_contains(input, vec!["hover:flex", "px-1.5"]);
    }

    // https://github.com/tailwindlabs/tailwindcss/issues/17760
    #[test]
    fn test_extraction_of_classes_with_dots() {
        let input = r#"
            ($ :div {:class [:flex :gap-1.5 :p-1]} …)
        "#;

        Clojure::test_extract_contains(input, vec!["flex", "gap-1.5", "p-1"]);
    }

    // https://github.com/tailwindlabs/tailwindcss/issues/18336
    #[test]
    fn test_extraction_of_pseudoclasses_from_keywords() {
        let input = r#"
            ($ :div {:class [:flex :first:lg:pr-6 :first:2xl:pl-6 :group-hover/2:2xs:pt-6]} …)

            :.hover:bg-white

            [:div#hello.bg-white.pr-1.5]
        "#;

        Clojure::test_extract_contains(
            input,
            vec![
                "flex",
                "first:lg:pr-6",
                "first:2xl:pl-6",
                "group-hover/2:2xs:pt-6",
                "hover:bg-white",
                "bg-white",
                "pr-1.5",
            ],
        );
    }

    // https://github.com/tailwindlabs/tailwindcss/issues/18344
    #[test]
    fn test_noninterference_of_parens_on_keywords() {
        let input = r#"
            (get props :y-padding :py-5)
            ($ :div {:class [:flex.pr-1.5 (if condition :bg-white :bg-black)]})
        "#;

        Clojure::test_extract_contains(
            input,
            vec!["py-5", "flex", "pr-1.5", "bg-white", "bg-black"],
        );
    }

    // https://github.com/tailwindlabs/tailwindcss/issues/18882
    #[test]
    fn test_extract_from_symbol_list() {
        let input = r#"
            [:div {:class '[z-1 z-2
                            z-3 z-4]}]
        "#;
        Clojure::test_extract_contains(input, vec!["z-1", "z-2", "z-3", "z-4"]);

        // https://github.com/tailwindlabs/tailwindcss/pull/18345#issuecomment-3253403847
        let input = r#"
            (def hl-class-names '[ring ring-blue-500])

            [:div
             {:class (cond-> '[input w-full]
                       textarea? (conj 'textarea)
                       (seq errors) (concat '[border-red-500 bg-red-100])
                       highlight? (concat hl-class-names))}]
        "#;
        Clojure::test_extract_contains(
            input,
            vec![
                "ring",
                "ring-blue-500",
                "input",
                "w-full",
                "textarea",
                "border-red-500",
                "bg-red-100",
            ],
        );

        let input = r#"
          [:div
            {:class '[h-100 lg:h-200 max-w-32 mx-auto py-60
                      flex flex-col justify-end items-center
                      lg:flex-row lg:justify-between
                      bg-cover bg-center bg-no-repeat rounded-3xl overflow-hidden
                      font-semibold text-gray-900]}]
        "#;
        Clojure::test_extract_contains(
            input,
            vec![
                "h-100",
                "lg:h-200",
                "max-w-32",
                "mx-auto",
                "py-60",
                "flex",
                "flex-col",
                "justify-end",
                "items-center",
                "lg:flex-row",
                "lg:justify-between",
                "bg-cover",
                "bg-center",
                "bg-no-repeat",
                "rounded-3xl",
                "overflow-hidden",
                "font-semibold",
                "text-gray-900",
            ],
        );

        // `/` is invalid and requires explicit quoting
        let input = r#"
            '[p-32 "text-black/50"]
        "#;
        Clojure::test_extract_contains(input, vec!["p-32", "text-black/50"]);

        // `[…]` is invalid and requires explicit quoting
        let input = r#"
            (print '[ring ring-blue-500 "bg-[#0088cc]"])
        "#;
        Clojure::test_extract_contains(input, vec!["ring", "ring-blue-500", "bg-[#0088cc]"]);

        // `'(…)` looks similar to `[…]` but uses parentheses instead of brackets
        let input = r#"
            (print '(ring ring-blue-500 "bg-[#0088cc]"))
        "#;
        Clojure::test_extract_contains(input, vec!["ring", "ring-blue-500", "bg-[#0088cc]"]);
    }
}