Collected.Press

pub mod auto_source_detection;
pub mod detect_sources;
pub mod init_tracing;
pub mod sources;

use crate::extractor::{Extracted, Extractor};
use crate::glob::optimize_patterns;
use crate::scanner::detect_sources::resolve_globs;
use crate::scanner::sources::{
    public_source_entries_to_private_source_entries, PublicSourceEntry, SourceEntry, Sources,
};
use crate::GlobEntry;
use auto_source_detection::BINARY_EXTENSIONS_GLOB;
use bstr::ByteSlice;
use fast_glob::glob_match;
use fxhash::{FxHashMap, FxHashSet};
use ignore::{gitignore::GitignoreBuilder, WalkBuilder};
use init_tracing::{init_tracing, SHOULD_TRACE};
use rayon::prelude::*;
use std::collections::{BTreeMap, BTreeSet};
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use std::time::SystemTime;
use tracing::event;

// @source "some/folder";               // This is auto source detection
// @source "some/folder/**/*";          // This is auto source detection
// @source "some/folder/*.html";        // This is just a glob, but new files matching this should be included
// @source "node_modules/my-ui-lib";    // Auto source detection but since node_modules is explicit we allow it
//                                      // Maybe could be considered `external(…)` automatically if:
//                                      // 1. It's git ignored but listed explicitly
//                                      // 2. It exists outside of the current working directory (do we know that?)
//
// @source "do-include-me.bin";         // `.bin` is typically ignored, but now it's explicit so should be included
// @source "git-ignored.html";          // A git ignored file that is listed explicitly, should be scanned

#[derive(Debug, Clone)]
pub enum ChangedContent {
    File(PathBuf, String),
    Content(String, String),
}

#[derive(Debug, Clone)]
pub struct ScanOptions {
    /// Base path to start scanning from
    pub base: Option<String>,

    /// Glob sources
    pub sources: Vec<GlobEntry>,
}

#[derive(Debug, Clone)]
pub struct ScanResult {
    pub candidates: Vec<String>,
    pub files: Vec<String>,
    pub globs: Vec<GlobEntry>,
}

#[derive(Debug, Clone, Default)]
pub struct Scanner {
    /// Content sources
    sources: Sources,

    /// The walker to detect all files that we have to scan
    walker: Option<WalkBuilder>,

    /// All found extensions
    extensions: FxHashSet<String>,

    /// All files that we have to scan
    files: FxHashSet<PathBuf>,

    /// All directories, sub-directories, etc… we saw during source detection
    dirs: FxHashSet<PathBuf>,

    /// All generated globs, used for setting up watchers
    globs: Option<Vec<GlobEntry>>,

    /// Track unique set of candidates
    candidates: FxHashSet<String>,

    /// Track mtimes for files so re-scans can skip unchanged files.
    /// Only populated after the first scan completes (to avoid unnecessary
    /// metadata calls on initial build).
    mtimes: FxHashMap<PathBuf, SystemTime>,

    /// Whether we've completed at least one full scan. When false, we skip
    /// mtime tracking entirely so the initial build stays fast.
    has_scanned_once: bool,

    /// Whether sources have been scanned since the last `scan()` call
    sources_scanned: bool,
}

impl Scanner {
    pub fn new(sources: Vec<PublicSourceEntry>) -> Self {
        init_tracing();

        if *SHOULD_TRACE {
            event!(tracing::Level::INFO, "Provided sources:");
            for source in &sources {
                event!(tracing::Level::INFO, "Source: {:?}", source);
            }
        }

        let sources = Sources::new(public_source_entries_to_private_source_entries(sources));
        if *SHOULD_TRACE {
            event!(tracing::Level::INFO, "Optimized sources:");
            for source in sources.iter() {
                event!(tracing::Level::INFO, "Source: {:?}", source);
            }
        }

        let walker = create_walker(&sources);

        Self {
            sources,
            walker,
            ..Default::default()
        }
    }

    pub fn scan(&mut self) -> Vec<String> {
        self.sources_scanned = false;

        let (scanned_blobs, css_files) = self.discover_sources();

        self.extract_candidates(scanned_blobs, css_files);

        // Return all candidates sorted
        let mut result = self.candidates.iter().cloned().collect::<Vec<_>>();
        result.par_sort_unstable();
        result
    }

    #[tracing::instrument(skip_all)]
    pub fn scan_content(&mut self, changed_content: Vec<ChangedContent>) -> Vec<String> {
        let (changed_files, changed_contents) =
            changed_content
                .into_iter()
                .partition::<Vec<_>, _>(|x| match x {
                    ChangedContent::File(_, _) => true,
                    ChangedContent::Content(_, _) => false,
                });

        // Raw content can be parsed directly, no need to verify if the file exists and is allowed
        // to be scanned.
        let mut content_to_scan: Vec<ChangedContent> = changed_contents;

        // Fully resolve all files
        let changed_files = changed_files
            .into_iter()
            .filter_map(|changed_content| match changed_content {
                ChangedContent::File(file, extension) => {
                    let Ok(file) = dunce::canonicalize(file) else {
                        return None;
                    };
                    Some(ChangedContent::File(file, extension))
                }
                _ => unreachable!(),
            })
            .collect::<Vec<_>>();

        let (known_files, mut new_unknown_files) = changed_files
            .into_iter()
            .partition::<Vec<_>, _>(|changed_file| match changed_file {
                ChangedContent::Content(_, _) => unreachable!(),
                ChangedContent::File(file, _) => self.files.contains(file),
            });

        // All known files are allowed to be scanned
        content_to_scan.extend(known_files);

        // Figure out if the new unknown files are allowed to be scanned
        if !new_unknown_files.is_empty() {
            if let Some(walk_builder) = &mut self.walker {
                for entry in walk_builder.build().filter_map(Result::ok) {
                    let path = entry.path();
                    if !path.is_file() {
                        continue;
                    }

                    let mut drop_file_indexes = vec![];
                    for (idx, changed_file) in new_unknown_files.iter().enumerate().rev() {
                        let ChangedContent::File(file, _) = changed_file else {
                            continue;
                        };

                        // When the file is found on disk it means that all the rules pass. We can
                        // extract the current file and remove it from the list of passed in files.
                        if file == path {
                            self.files.insert(path.to_path_buf()); // Track for future use
                            content_to_scan.push(changed_file.clone()); // Track for parsing
                            drop_file_indexes.push(idx);
                        }
                    }

                    // Remove all files that we found on disk
                    if !drop_file_indexes.is_empty() {
                        drop_file_indexes.into_iter().for_each(|idx| {
                            new_unknown_files.remove(idx);
                        });
                    }

                    // We can stop walking the file system if all files we are interested in have
                    // been found.
                    if new_unknown_files.is_empty() {
                        break;
                    }
                }
            }
        }

        // Read all content into blobs for extraction
        let blobs = read_all_files(content_to_scan);
        self.extract_candidates(blobs, vec![])
    }

    #[tracing::instrument(skip_all)]
    fn extract_candidates(&mut self, blobs: Vec<Vec<u8>>, css_files: Vec<PathBuf>) -> Vec<String> {
        // Extract all candidates from the pre-read blobs
        let mut new_candidates = parse_all_blobs(blobs);

        // Extract all CSS variables from the CSS files
        if !css_files.is_empty() {
            let css_variables = extract_css_variables(read_all_files(
                css_files
                    .into_iter()
                    .map(|file| ChangedContent::File(file, "css".into()))
                    .collect(),
            ));

            new_candidates.extend(css_variables);
        }

        // Only keep candidates we haven't seen before
        for existing in self.candidates.iter() {
            new_candidates.remove(existing);
        }

        // Track new candidates for subsequent calls
        self.candidates.extend(new_candidates.iter().cloned());

        let mut result: Vec<String> = new_candidates.into_iter().collect();
        result.par_sort_unstable();

        result
    }

    #[tracing::instrument(skip_all)]
    pub fn get_files(&mut self) -> Vec<String> {
        let _ = self.discover_sources();

        self.files
            .par_iter()
            .filter_map(|x| x.clone().into_os_string().into_string().ok())
            .collect()
    }

    #[tracing::instrument(skip_all)]
    pub fn get_globs(&mut self) -> Vec<GlobEntry> {
        if let Some(globs) = &self.globs {
            return globs.clone();
        }

        let _ = self.discover_sources();

        let mut globs = vec![];
        for source in self.sources.iter() {
            match source {
                SourceEntry::Auto { base } | SourceEntry::External { base } => {
                    globs.extend(resolve_globs(
                        base.to_path_buf(),
                        &self.dirs,
                        &self.extensions,
                    ));
                }
                SourceEntry::Pattern { base, pattern } => {
                    globs.push(GlobEntry {
                        base: base.to_string_lossy().to_string(),
                        pattern: pattern.to_string(),
                    });
                }
                _ => {}
            }
        }

        // Re-optimize the globs to reduce the number of patterns we have to scan.
        globs = optimize_patterns(&globs);

        // Track the globs for subsequent calls
        self.globs = Some(globs.clone());

        globs
    }

    #[tracing::instrument(skip_all)]
    pub fn get_normalized_sources(&self) -> Vec<GlobEntry> {
        self.sources
            .iter()
            .filter_map(|source| match source {
                SourceEntry::Auto { base } | SourceEntry::External { base } => Some(GlobEntry {
                    base: base.to_string_lossy().to_string(),
                    pattern: "**/*".to_string(),
                }),
                SourceEntry::Pattern { base, pattern } => Some(GlobEntry {
                    base: base.to_string_lossy().to_string(),
                    pattern: pattern.to_string(),
                }),
                _ => None,
            })
            .collect()
    }

    #[tracing::instrument(skip_all)]
    pub fn get_candidates_with_positions(
        &mut self,
        changed_content: ChangedContent,
    ) -> Vec<(String, usize)> {
        let content = read_changed_content(changed_content).unwrap_or_default();
        let original_content = &content;

        // Workaround for legacy upgrades:
        //
        // `-[]` won't parse in the new parser (`[…]` must contain _something_), but we do need it
        // for people using `group-[]` (which we will later replace with `in-[.group]` instead).
        let content = content.replace("-[]", "XYZ");
        let offset = content.as_ptr() as usize;

        let mut extractor = Extractor::new(&content[..]);

        extractor
            .extract()
            .into_par_iter()
            .flat_map(|extracted| match extracted {
                Extracted::Candidate(s) => {
                    let i = s.as_ptr() as usize - offset;
                    let original = &original_content[i..i + s.len()];
                    if original.contains_str("-[]") {
                        return Some(unsafe {
                            (String::from_utf8_unchecked(original.to_vec()), i)
                        });
                    }

                    // SAFETY: When we parsed the candidates, we already guaranteed that the byte
                    // slices are valid, therefore we don't have to re-check here when we want to
                    // convert it back to a string.
                    Some(unsafe { (String::from_utf8_unchecked(s.to_vec()), i) })
                }

                _ => None,
            })
            .collect()
    }

    #[tracing::instrument(skip_all)]
    fn discover_sources(&mut self) -> (Vec<Vec<u8>>, Vec<PathBuf>) {
        if self.sources_scanned {
            return (vec![], vec![]);
        }
        self.sources_scanned = true;

        let Some(walker) = &mut self.walker else {
            return (vec![], vec![]);
        };

        // Use synchronous walk for the initial build (lower overhead) and parallel
        // walk for subsequent calls (watch mode) where the overhead is amortised.
        let all_entries = if self.has_scanned_once {
            walk_parallel(walker)
        } else {
            walk_synchronous(walker)
        };

        let mut css_files: Vec<PathBuf> = vec![];
        let mut content_paths: Vec<(PathBuf, String)> = Vec::new();
        let mut seen_files: FxHashSet<PathBuf> = FxHashSet::default();

        for (path, is_dir, extension) in all_entries {
            if is_dir {
                self.dirs.insert(path);
            } else {
                // Deduplicate: parallel walk can visit the same file from multiple threads
                if !seen_files.insert(path.clone()) {
                    continue;
                }

                // On re-scans, check mtime to skip unchanged files.
                // On the first scan we skip this entirely to avoid extra
                // metadata syscalls.
                let changed = if self.has_scanned_once {
                    let current_mtime = std::fs::metadata(&path)
                        .ok()
                        .and_then(|m| m.modified().ok());

                    match current_mtime {
                        Some(mtime) => {
                            let prev = self.mtimes.insert(path.clone(), mtime);
                            prev.is_none_or(|prev| prev != mtime)
                        }
                        None => true,
                    }
                } else {
                    true
                };

                match extension.as_str() {
                    // Special handing for CSS files, we don't want to extract candidates from
                    // these files, but we do want to extract used CSS variables.
                    "css" => {
                        if changed {
                            css_files.push(path.clone());
                        }
                    }
                    _ => {
                        if changed {
                            content_paths.push((path.clone(), extension.clone()));
                        }
                    }
                }

                self.extensions.insert(extension);
                self.files.insert(path);
            }
        }

        // Read + preprocess all discovered files in parallel
        let scanned_blobs: Vec<Vec<u8>> = content_paths
            .into_par_iter()
            .filter_map(|(path, ext)| {
                let content = std::fs::read(&path).ok()?;
                event!(tracing::Level::INFO, "Reading {:?}", path);
                let processed = pre_process_input(content, &ext);
                if processed.is_empty() {
                    None
                } else {
                    Some(processed)
                }
            })
            .collect();

        if !self.has_scanned_once {
            self.has_scanned_once = true;
        }

        (scanned_blobs, css_files)
    }
}

fn read_changed_content(c: ChangedContent) -> Option<Vec<u8>> {
    let (content, extension) = match c {
        ChangedContent::File(file, extension) => match std::fs::read(&file) {
            Ok(content) => {
                event!(tracing::Level::INFO, "Reading {:?}", file);
                (content, extension)
            }
            Err(e) => {
                event!(tracing::Level::ERROR, "Failed to read file: {:?}", e);
                return None;
            }
        },

        ChangedContent::Content(contents, extension) => (contents.into_bytes(), extension),
    };

    Some(pre_process_input(content, &extension))
}

pub fn pre_process_input(content: Vec<u8>, extension: &str) -> Vec<u8> {
    use crate::extractor::pre_processors::*;

    match extension {
        "clj" | "cljs" | "cljc" => Clojure.process(&content),
        "heex" | "eex" | "ex" | "exs" => Elixir.process(&content),
        "cshtml" | "razor" => Razor.process(&content),
        "haml" => Haml.process(&content),
        "json" | "jsonl" | "ndjson" => Json.process(&content),
        "md" | "mdx" => Markdown.process(&content),
        "pug" => Pug.process(&content),
        "rb" | "erb" => Ruby.process(&content),
        "slim" | "slang" => Slim.process(&content),
        "svelte" => Svelte.process(&content),
        "rs" => Rust.process(&content),
        "vue" => Vue.process(&content),
        _ => content,
    }
}

#[tracing::instrument(skip_all)]
fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
    event!(
        tracing::Level::INFO,
        "Reading {:?} file(s)",
        changed_content.len()
    );

    changed_content
        .into_par_iter()
        .filter_map(read_changed_content)
        .collect()
}

#[tracing::instrument(skip_all)]
fn extract_css_variables(blobs: Vec<Vec<u8>>) -> FxHashSet<String> {
    extract(blobs, |mut extractor| {
        extractor.extract_variables_from_css()
    })
}

#[tracing::instrument(skip_all)]
fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> FxHashSet<String> {
    extract(blobs, |mut extractor| extractor.extract())
}

#[tracing::instrument(skip_all)]
fn extract<H>(blobs: Vec<Vec<u8>>, handle: H) -> FxHashSet<String>
where
    H: Fn(Extractor) -> Vec<Extracted> + std::marker::Sync,
{
    blobs
        .par_iter()
        .flat_map(|blob| blob.par_split(|x| *x == b'\n'))
        .filter_map(|blob| {
            if blob.is_empty() {
                return None;
            }

            let extracted = handle(crate::extractor::Extractor::new(blob));
            if extracted.is_empty() {
                return None;
            }

            Some(FxHashSet::from_iter(extracted.into_iter().map(
                |x| match x {
                    Extracted::Candidate(bytes) => bytes,
                    Extracted::CssVariable(bytes) => bytes,
                },
            )))
        })
        .reduce(Default::default, |mut a, b| {
            a.extend(b);
            a
        })
        .into_iter()
        .map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) })
        .collect()
}

type WalkEntry = (PathBuf, bool, String);

/// Walk the file system synchronously. Used for the initial build where the overhead of spawning
/// parallel walker threads is not worth it.
#[tracing::instrument(skip_all)]
fn walk_synchronous(walker: &mut WalkBuilder) -> Vec<WalkEntry> {
    let mut entries = Vec::new();

    for entry in walker.build().filter_map(Result::ok) {
        let is_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
        let path = entry.into_path();

        if is_dir {
            entries.push((path, true, String::new()));
        } else {
            let ext = path
                .extension()
                .and_then(|x| x.to_str())
                .unwrap_or_default()
                .to_owned();
            entries.push((path, false, ext));
        }
    }

    entries
}

/// Walk the file system in parallel. Used in watch mode where the parallel walker overhead is
/// amortised across many rebuilds and subsequent calls are much faster.
#[tracing::instrument(skip_all)]
fn walk_parallel(walker: &mut WalkBuilder) -> Vec<WalkEntry> {
    struct FlushOnDrop {
        local: Vec<WalkEntry>,
        shared: Arc<Mutex<Vec<WalkEntry>>>,
    }

    impl Drop for FlushOnDrop {
        fn drop(&mut self) {
            if !self.local.is_empty() {
                self.shared.lock().unwrap().append(&mut self.local);
            }
        }
    }

    let collected: Arc<Mutex<Vec<WalkEntry>>> = Arc::new(Mutex::new(Vec::new()));

    walker.build_parallel().run(|| {
        let mut buf = FlushOnDrop {
            local: Vec::with_capacity(256),
            shared: collected.clone(),
        };

        Box::new(move |entry| {
            let Ok(entry) = entry else {
                return ignore::WalkState::Continue;
            };

            let is_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
            let path = entry.into_path();

            if is_dir {
                buf.local.push((path, true, String::new()));
            } else {
                let ext = path
                    .extension()
                    .and_then(|x| x.to_str())
                    .unwrap_or_default()
                    .to_owned();
                buf.local.push((path, false, ext));
            }

            if buf.local.len() >= 256 {
                buf.shared.lock().unwrap().append(&mut buf.local);
            }

            ignore::WalkState::Continue
        })
    });

    // All threads have finished and flushed their buffers via FlushOnDrop::drop
    Arc::try_unwrap(collected).unwrap().into_inner().unwrap()
}

/// Sets up a WalkBuilder with all source roots, gitignore rules, and source pattern matching.
///
/// This is the common setup shared between the full walker (with mtime tracking for re-scans)
/// and the parallel walker (without mtime tracking for the initial scan).
fn create_walker(sources: &Sources) -> Option<WalkBuilder> {
    let mut other_roots: FxHashSet<&PathBuf> = FxHashSet::default();
    let mut first_root: Option<&PathBuf> = None;
    let mut ignores: BTreeMap<&PathBuf, BTreeSet<String>> = Default::default();

    for source in sources.iter() {
        match source {
            SourceEntry::Auto { base } => {
                if first_root.is_none() {
                    first_root = Some(base);
                } else {
                    other_roots.insert(base);
                }
            }
            SourceEntry::Pattern { base, pattern } => {
                let mut pattern = pattern.to_string();

                if first_root.is_none() {
                    first_root = Some(base);
                } else {
                    other_roots.insert(base);
                }

                if !pattern.contains("**") {
                    // Ensure that the pattern is pinned to the base path.
                    if !pattern.starts_with("/") {
                        pattern = format!("/{pattern}");
                    }

                    // Specific patterns should take precedence even over git-ignored files:
                    ignores
                        .entry(base)
                        .or_default()
                        .insert(format!("!{}", pattern));
                } else {
                    // Assumption: the pattern we receive will already be brace expanded. So
                    // `*.{html,jsx}` will result in two separate patterns: `*.html` and `*.jsx`.
                    if let Some(extension) = Path::new(&pattern).extension() {
                        // Extend auto source detection to include the extension
                        ignores
                            .entry(base)
                            .or_default()
                            .insert(format!("!*.{}", extension.to_string_lossy()));
                    }
                }
            }
            SourceEntry::Ignored { base, pattern } => {
                let mut pattern = pattern.to_string();
                // Ensure that the pattern is pinned to the base path.
                if !pattern.starts_with("/") {
                    pattern = format!("/{pattern}");
                }
                ignores.entry(base).or_default().insert(pattern);
            }
            SourceEntry::External { base } => {
                if first_root.is_none() {
                    first_root = Some(base);
                } else {
                    other_roots.insert(base);
                }

                // External sources should take precedence even over git-ignored files:
                ignores
                    .entry(base)
                    .or_default()
                    .insert(format!("!{}", "/**/*"));

                // External sources should still disallow binary extensions:
                ignores
                    .entry(base)
                    .or_default()
                    .insert(BINARY_EXTENSIONS_GLOB.clone());
            }
        }
    }

    let mut builder = WalkBuilder::new(first_root?);

    // We have to follow symlinks
    builder.follow_links(true);

    // Scan hidden files / directories
    builder.hidden(false);

    // Don't respect global gitignore files
    builder.git_global(false);

    // By default, allow .gitignore files to be used regardless of whether or not
    // a .git directory is present. This is an optimization for when projects
    // are first created and may not be in a git repo yet.
    builder.require_git(false);

    // If we are in a git repo then require it to ensure that only rules within
    // the repo are used. For example, we don't want to consider a .gitignore file
    // in the user's home folder if we're in a git repo.
    //
    // The alternative is using a call like `.parents(false)` but that will
    // prevent looking at parent directories for .gitignore files from within
    // the repo and that's not what we want.
    //
    // For example, in a project with this structure:
    //
    // home
    // .gitignore
    //  my-project
    //   .gitignore
    //   apps
    //     .gitignore
    //     web
    //       {root}
    //
    // We do want to consider all .gitignore files listed:
    // - home/.gitignore
    // - my-project/.gitignore
    // - my-project/apps/.gitignore
    //
    // However, if a repo is initialized inside my-project then only the following
    // make sense for consideration:
    // - my-project/.gitignore
    // - my-project/apps/.gitignore
    //
    // Setting the require_git(true) flag conditionally allows us to do this.
    for parent in first_root?.ancestors() {
        if parent.join(".git").exists() {
            builder.require_git(true);
            break;
        }
    }

    for root in other_roots {
        builder.add(root);
    }

    // Setup auto source detection rules
    for ignore in auto_source_detection::RULES.iter() {
        builder.add_gitignore(ignore.clone());
    }

    // Setup ignores based on `@source` definitions
    for (base, patterns) in ignores {
        let mut ignore_builder = GitignoreBuilder::new(base);
        for pattern in patterns {
            ignore_builder.add_line(None, &pattern).unwrap();
        }
        let ignore = ignore_builder.build().unwrap();
        builder.add_gitignore(ignore);
    }

    // Pre-compute source matching data to avoid allocations in the hot filter_entry path
    let auto_bases: Vec<PathBuf> = sources
        .iter()
        .filter_map(|source| match source {
            SourceEntry::Auto { base } | SourceEntry::External { base } => Some(base.clone()),
            _ => None,
        })
        .collect();

    let pattern_sources: Vec<(PathBuf, String)> = sources
        .iter()
        .filter_map(|source| match source {
            SourceEntry::Pattern { base, pattern } => {
                let normalized = if pattern.starts_with("/") {
                    pattern.to_string()
                } else {
                    format!("/{pattern}")
                };
                Some((base.clone(), normalized))
            }
            _ => None,
        })
        .collect();

    // Source pattern matching filter (lock-free, safe for parallel walking)
    builder.filter_entry(move |entry| {
        let path = entry.path();

        // Ensure the entries are matching any of the provided source patterns (this is
        // necessary for manual-patterns that can filter the file extension)
        if path.is_file() {
            let mut matches = false;

            for base in &auto_bases {
                if path.starts_with(base) {
                    matches = true;
                    break;
                }
            }

            if !matches {
                for (base, pattern) in &pattern_sources {
                    let remainder = path.strip_prefix(base);
                    if remainder.is_ok_and(|remainder| {
                        let mut path_str = remainder.to_string_lossy().to_string();
                        if !path_str.starts_with("/") {
                            path_str = format!("/{path_str}");
                        }
                        glob_match(pattern, path_str.as_bytes())
                    }) {
                        matches = true;
                        break;
                    }
                }
            }

            if !matches {
                return false;
            }
        }

        true
    });

    Some(builder)
}

#[cfg(test)]
mod tests {
    use super::{ChangedContent, Scanner};
    use pretty_assertions::assert_eq;

    #[test]
    fn test_positions() {
        let mut scanner = Scanner::new(vec![]);

        for (input, expected) in [
            // Before migrations
            (
                r#"<div class="!tw__flex sm:!tw__block tw__bg-gradient-to-t flex tw:[color:red] group-[]:tw__flex"#,
                vec![
                    ("class".to_string(), 5),
                    ("!tw__flex".to_string(), 12),
                    ("sm:!tw__block".to_string(), 22),
                    ("tw__bg-gradient-to-t".to_string(), 36),
                    ("flex".to_string(), 57),
                    ("tw:[color:red]".to_string(), 62),
                    ("group-[]:tw__flex".to_string(), 77),
                ],
            ),
            // After migrations
            (
                r#"<div class="tw:flex! tw:sm:block! tw:bg-linear-to-t flex tw:[color:red] tw:in-[.tw\:group]:flex"></div>"#,
                vec![
                    ("class".to_string(), 5),
                    ("tw:flex!".to_string(), 12),
                    ("tw:sm:block!".to_string(), 21),
                    ("tw:bg-linear-to-t".to_string(), 34),
                    ("flex".to_string(), 52),
                    ("tw:[color:red]".to_string(), 57),
                    ("tw:in-[.tw\\:group]:flex".to_string(), 72),
                ],
            ),
        ] {
            let candidates = scanner.get_candidates_with_positions(ChangedContent::Content(
                input.to_string(),
                "html".into(),
            ));
            assert_eq!(candidates, expected);
        }
    }
}