use crate::glob::hoist_static_glob_parts;
use crate::parser::Extractor;
use crate::scanner::allowed_paths::resolve_paths;
use crate::scanner::detect_sources::DetectSources;
use bexpand::Expression;
use bstr::ByteSlice;
use fxhash::{FxHashMap, FxHashSet};
use glob::optimize_patterns;
use glob_match::glob_match;
use paths::Path;
use rayon::prelude::*;
use scanner::allowed_paths::read_dir;
use std::fs;
use std::path::PathBuf;
use std::sync;
use std::time::SystemTime;
use tracing::event;
pub mod cursor;
pub mod fast_skip;
pub mod glob;
pub mod parser;
pub mod paths;
pub mod scanner;
static SHOULD_TRACE: sync::LazyLock<bool> = sync::LazyLock::new(
|| matches!(std::env::var("DEBUG"), Ok(value) if value.eq("*") || value.eq("1") || value.eq("true") || value.contains("tailwind")),
);
fn init_tracing() {
if !*SHOULD_TRACE {
return;
}
_ = tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
.compact()
.try_init();
}
#[derive(Debug, Clone)]
pub struct ChangedContent {
pub file: Option<PathBuf>,
pub content: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ScanOptions {
pub base: Option<String>,
pub sources: Vec<GlobEntry>,
}
#[derive(Debug, Clone)]
pub struct ScanResult {
pub candidates: Vec<String>,
pub files: Vec<String>,
pub globs: Vec<GlobEntry>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct GlobEntry {
pub base: String,
pub pattern: String,
}
#[derive(Debug, Clone, Default)]
pub struct Scanner {
sources: Option<Vec<GlobEntry>>,
ready: bool,
files: Vec<PathBuf>,
dirs: Vec<PathBuf>,
globs: Vec<GlobEntry>,
mtimes: FxHashMap<PathBuf, SystemTime>,
candidates: FxHashSet<String>,
}
impl Scanner {
pub fn new(sources: Option<Vec<GlobEntry>>) -> Self {
Self {
sources,
..Default::default()
}
}
pub fn scan(&mut self) -> Vec<String> {
init_tracing();
self.prepare();
self.check_for_new_files();
self.compute_candidates();
let mut candidates: Vec<String> = self.candidates.clone().into_iter().collect();
candidates.sort();
candidates
}
#[tracing::instrument(skip_all)]
pub fn scan_content(&mut self, changed_content: Vec<ChangedContent>) -> Vec<String> {
self.prepare();
let candidates = parse_all_blobs(read_all_files(changed_content));
let mut new_candidates = vec![];
for candidate in candidates {
if self.candidates.contains(&candidate) {
continue;
}
self.candidates.insert(candidate.clone());
new_candidates.push(candidate);
}
new_candidates
}
#[tracing::instrument(skip_all)]
pub fn get_candidates_with_positions(
&mut self,
changed_content: ChangedContent,
) -> Vec<(String, usize)> {
self.prepare();
let content = read_changed_content(changed_content).unwrap_or_default();
let extractor = Extractor::with_positions(&content[..], Default::default());
let candidates: Vec<(String, usize)> = extractor
.into_iter()
.map(|(s, i)| {
unsafe { (String::from_utf8_unchecked(s.to_vec()), i) }
})
.collect();
candidates
}
#[tracing::instrument(skip_all)]
pub fn get_files(&mut self) -> Vec<String> {
self.prepare();
self.files
.iter()
.filter_map(|x| Path::from(x.clone()).canonicalize().ok())
.map(|x| x.to_string())
.collect()
}
#[tracing::instrument(skip_all)]
pub fn get_globs(&mut self) -> Vec<GlobEntry> {
self.prepare();
self.globs.clone()
}
#[tracing::instrument(skip_all)]
fn compute_candidates(&mut self) {
let mut changed_content = vec![];
for path in &self.files {
let current_time = fs::metadata(path)
.and_then(|m| m.modified())
.unwrap_or(SystemTime::now());
let previous_time = self.mtimes.insert(path.clone(), current_time);
let should_scan_file = match previous_time {
Some(prev) if prev != current_time => true,
Some(_) => false,
None => true,
};
if should_scan_file {
changed_content.push(ChangedContent {
file: Some(path.clone()),
content: None,
});
}
}
if !changed_content.is_empty() {
let candidates = parse_all_blobs(read_all_files(changed_content));
self.candidates.extend(candidates);
}
}
fn prepare(&mut self) {
if self.ready {
return;
}
self.scan_sources();
self.ready = true;
}
#[tracing::instrument(skip_all)]
fn check_for_new_files(&mut self) {
let mut modified_dirs: Vec<PathBuf> = vec![];
for path in &self.dirs {
let current_time = fs::metadata(path)
.and_then(|m| m.modified())
.unwrap_or(SystemTime::now());
let previous_time = self.mtimes.insert(path.clone(), current_time);
let should_scan = match previous_time {
Some(prev) if prev != current_time => true,
Some(_) => false,
None => true,
};
if should_scan {
modified_dirs.push(path.clone());
}
}
let mut known = FxHashSet::from_iter(self.files.iter().chain(self.dirs.iter()).cloned());
while !modified_dirs.is_empty() {
let new_entries = modified_dirs
.iter()
.flat_map(|dir| read_dir(dir, Some(1)))
.map(|entry| entry.path().to_owned())
.filter(|path| !known.contains(path))
.collect::<Vec<_>>();
modified_dirs.clear();
for path in new_entries {
if path.is_file() {
known.insert(path.clone());
self.files.push(path);
} else if path.is_dir() {
known.insert(path.clone());
self.dirs.push(path.clone());
modified_dirs.push(path);
}
}
}
}
#[tracing::instrument(skip_all)]
fn scan_sources(&mut self) {
let Some(sources) = &self.sources else {
return;
};
if sources.is_empty() {
return;
}
let sources = sources
.iter()
.flat_map(|source| {
let expression: Result<Expression, _> = source.pattern[..].try_into();
let Ok(expression) = expression else {
return vec![source.clone()];
};
expression
.into_iter()
.filter_map(Result::ok)
.map(move |pattern| GlobEntry {
base: source.base.clone(),
pattern: pattern.into(),
})
.collect::<Vec<_>>()
})
.collect::<Vec<_>>();
let (auto_sources, glob_sources): (Vec<_>, Vec<_>) = sources.iter().partition(|source| {
if source.pattern.ends_with("**/*") {
return true;
}
if PathBuf::from(&source.base).join(&source.pattern).is_dir() {
return true;
}
false
});
fn join_paths(a: &str, b: &str) -> PathBuf {
let mut tmp = a.to_owned();
let b = b.trim_end_matches("**/*").trim_end_matches('/');
if b.starts_with('/') {
return PathBuf::from(b);
}
if b.chars().nth(1) == Some(':') && b.chars().nth(2) == Some('/') {
return PathBuf::from(b);
}
tmp += "/";
tmp += b;
PathBuf::from(&tmp)
}
for path in auto_sources.iter().filter_map(|source| {
dunce::canonicalize(join_paths(&source.base, &source.pattern)).ok()
}) {
self.globs.push(GlobEntry {
base: path.to_string_lossy().into(),
pattern: "*".into(),
});
let detect_sources = DetectSources::new(path);
let (files, globs, dirs) = detect_sources.detect();
self.files.extend(files);
self.globs.extend(globs);
self.dirs.extend(dirs);
}
let glob_sources: Vec<_> = glob_sources.into_iter().cloned().collect();
let hoisted = hoist_static_glob_parts(&glob_sources);
for source in &hoisted {
let mut full_pattern = source.base.clone().replace('\\', "/");
if !source.pattern.is_empty() {
full_pattern.push('/');
full_pattern.push_str(&source.pattern);
}
let base = PathBuf::from(&source.base);
for entry in resolve_paths(&base) {
let Some(file_type) = entry.file_type() else {
continue;
};
if !file_type.is_file() {
continue;
}
let file_path = entry.into_path();
let Some(file_path_str) = file_path.to_str() else {
continue;
};
let file_path_str = file_path_str.replace('\\', "/");
if glob_match(&full_pattern, &file_path_str) {
self.files.push(file_path);
}
}
}
self.globs.extend(hoisted);
self.globs = optimize_patterns(&self.globs);
}
}
fn read_changed_content(c: ChangedContent) -> Option<Vec<u8>> {
if let Some(content) = c.content {
return Some(content.into_bytes());
}
let Some(file) = c.file else {
return Default::default();
};
let Ok(content) = std::fs::read(&file).map_err(|e| {
event!(tracing::Level::ERROR, "Failed to read file: {:?}", e);
e
}) else {
return Default::default();
};
let Some(extension) = file.extension().map(|x| x.to_str()) else {
return Some(content);
};
match extension {
Some("svelte") => Some(content.replace(" class:", " ")),
_ => Some(content),
}
}
#[tracing::instrument(skip_all)]
fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
event!(
tracing::Level::INFO,
"Reading {:?} file(s)",
changed_content.len()
);
changed_content
.into_par_iter()
.filter_map(read_changed_content)
.collect()
}
#[tracing::instrument(skip_all)]
fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> {
let input: Vec<_> = blobs.iter().map(|blob| &blob[..]).collect();
let input = &input[..];
let mut result: Vec<String> = input
.par_iter()
.map(|input| Extractor::unique(input, Default::default()))
.reduce(Default::default, |mut a, b| {
a.extend(b);
a
})
.into_iter()
.map(|s| {
unsafe { String::from_utf8_unchecked(s.to_vec()) }
})
.collect();
result.sort();
result
}