use crate::cursor;
use crate::extractor::bracket_stack;
use crate::extractor::pre_processors::pre_processor::PreProcessor;
use crate::scanner::pre_process_input;
use bstr::ByteVec;
use regex::{Regex, RegexBuilder};
use std::sync;
static TEMPLATE_START_REGEX: sync::LazyLock<Regex> = sync::LazyLock::new(|| {
RegexBuilder::new(r#"\s*([a-z0-9_-]+)_template\s*<<[-~]?([A-Z]+)$"#)
.multi_line(true)
.build()
.unwrap()
});
static TEMPLATE_END_REGEX: sync::LazyLock<Regex> = sync::LazyLock::new(|| {
RegexBuilder::new(r#"^\s*([A-Z]+)"#)
.multi_line(true)
.build()
.unwrap()
});
#[derive(Debug, Default)]
pub struct Ruby;
impl PreProcessor for Ruby {
fn process(&self, content: &[u8]) -> Vec<u8> {
let len = content.len();
let mut result = content.to_vec();
let mut cursor = cursor::Cursor::new(content);
let mut bracket_stack = bracket_stack::BracketStack::default();
let content_as_str = std::str::from_utf8(content).unwrap();
let starts = TEMPLATE_START_REGEX
.captures_iter(content_as_str)
.collect::<Vec<_>>();
let ends = TEMPLATE_END_REGEX
.captures_iter(content_as_str)
.collect::<Vec<_>>();
for start in starts.iter() {
let lang = start.get(1).unwrap().as_str();
let delimiter_start = start.get(2).unwrap().as_str();
let body_start = start.get(0).unwrap().end();
for end in ends.iter() {
let body_end = end.get(0).unwrap().start();
if body_end < body_start {
continue;
}
let delimiter_end = end.get(1).unwrap().as_str();
if delimiter_end != delimiter_start {
continue;
}
let body = &content_as_str[body_start..body_end];
let replaced = pre_process_input(body.as_bytes(), &lang.to_ascii_lowercase());
result.replace_range(body_start..body_end, replaced);
break;
}
}
while cursor.pos < len {
match cursor.curr {
b'"' => {
cursor.advance();
while cursor.pos < len {
match cursor.curr {
b'\\' => cursor.advance_twice(),
b'"' => break,
_ => cursor.advance(),
};
}
cursor.advance();
continue;
}
b'\'' => {
cursor.advance();
while cursor.pos < len {
match cursor.curr {
b'\\' => cursor.advance_twice(),
b'\'' => break,
_ => cursor.advance(),
};
}
cursor.advance();
continue;
}
b'#' if !matches!(cursor.prev, b'%') => {
result[cursor.pos] = b' ';
cursor.advance();
while cursor.pos < len {
match cursor.curr {
b'\n' => break,
_ => {
result[cursor.pos] = b' ';
cursor.advance();
}
};
}
cursor.advance();
continue;
}
_ => {}
}
if cursor.curr != b'%' || !matches!(cursor.next, b'w' | b'W' | b'p') {
cursor.advance();
continue;
}
cursor.advance_twice();
let boundary = match cursor.curr {
b'[' => b']',
b'(' => b')',
b'{' => b'}',
b'#' => b'#',
b' ' => b'\n',
_ => {
cursor.advance();
continue;
}
};
bracket_stack.reset();
result[cursor.pos] = b' ';
cursor.advance();
while cursor.pos < len {
match cursor.curr {
b'\\' => {
if cursor.next == b' ' {
result[cursor.pos] = b' ';
}
cursor.advance();
}
b'[' | b'(' | b'{' => {
bracket_stack.push(cursor.curr);
}
b']' | b')' | b'}' if !bracket_stack.is_empty() => {
if !bracket_stack.pop(cursor.curr) {
cursor.advance();
}
}
_ if cursor.curr == boundary => {
if boundary != b'\n' {
result[cursor.pos] = b' ';
}
break;
}
_ => {}
}
cursor.advance();
}
}
result
}
}
#[cfg(test)]
mod tests {
use super::Ruby;
use crate::extractor::pre_processors::pre_processor::PreProcessor;
#[test]
fn test_ruby_pre_processor() {
for (input, expected) in [
("%w[flex px-2.5]", "%w flex px-2.5 "),
(
"%w[flex data-[state=pending]:bg-[#0088cc] flex-col]",
"%w flex data-[state=pending]:bg-[#0088cc] flex-col ",
),
("%w{flex px-2.5}", "%w flex px-2.5 "),
(
"%w{flex data-[state=pending]:bg-(--my-color) flex-col}",
"%w flex data-[state=pending]:bg-(--my-color) flex-col ",
),
("%w(flex px-2.5)", "%w flex px-2.5 "),
(
"%w(flex data-[state=pending]:bg-(--my-color) flex-col)",
"%w flex data-[state=pending]:bg-(--my-color) flex-col ",
),
("%w flex px-2.5\n", "%w flex px-2.5\n"),
(r#"%w[foo\ bar baz\ bat]"#, r#"%w foo bar baz bat "#),
(r#"%W[foo\ bar baz\ bat]"#, r#"%W foo bar baz bat "#),
(r#"%w[foo[bar baz]qux]"#, r#"%w foo[bar baz]qux "#),
(
"# test\n# test\n# {ActiveRecord::Base#save!}[rdoc-ref:Persistence#save!]\n%w[flex px-2.5]",
" \n \n \n%w flex px-2.5 "
),
(r#""foo # bar""#, r#""foo # bar""#),
(r#"'foo # bar'"#, r#"'foo # bar'"#),
(
r#"def call = tag.span "Foo", class: %w[rounded-full h-0.75 w-0.75]"#,
r#"def call = tag.span "Foo", class: %w rounded-full h-0.75 w-0.75 "#
),
(r#"%w[foo ' bar]"#, r#"%w foo ' bar "#),
(r#"%w[foo " bar]"#, r#"%w foo " bar "#),
(r#"%W[foo ' bar]"#, r#"%W foo ' bar "#),
(r#"%W[foo " bar]"#, r#"%W foo " bar "#),
(r#"%p foo ' bar "#, r#"%p foo ' bar "#),
(r#"%p foo " bar "#, r#"%p foo " bar "#),
(
"%p has a ' quote\n# this should be removed\n%p has a ' quote",
"%p has a ' quote\n \n%p has a ' quote"
),
(
"%p has a \" quote\n# this should be removed\n%p has a \" quote",
"%p has a \" quote\n \n%p has a \" quote"
),
(
"%w#this text is kept# # this text is not",
"%w this text is kept ",
),
] {
Ruby::test(input, expected);
}
}
#[test]
fn test_ruby_extraction() {
for (input, expected) in [
("%w[flex px-2.5]", vec!["flex", "px-2.5"]),
("%w[px-2.5 flex]", vec!["flex", "px-2.5"]),
("%w[2xl:flex]", vec!["2xl:flex"]),
(
"%w[flex data-[state=pending]:bg-[#0088cc] flex-col]",
vec!["flex", "data-[state=pending]:bg-[#0088cc]", "flex-col"],
),
("%w{flex px-2.5}", vec!["flex", "px-2.5"]),
("%w{px-2.5 flex}", vec!["flex", "px-2.5"]),
("%w{2xl:flex}", vec!["2xl:flex"]),
(
"%w{flex data-[state=pending]:bg-(--my-color) flex-col}",
vec!["flex", "data-[state=pending]:bg-(--my-color)", "flex-col"],
),
("%w(flex px-2.5)", vec!["flex", "px-2.5"]),
("%w(px-2.5 flex)", vec!["flex", "px-2.5"]),
("%w(2xl:flex)", vec!["2xl:flex"]),
(
"%w(flex data-[state=pending]:bg-(--my-color) flex-col)",
vec!["flex", "data-[state=pending]:bg-(--my-color)", "flex-col"],
),
(
"# test\n# test\n# {ActiveRecord::Base#save!}[rdoc-ref:Persistence#save!]\n%w[flex px-2.5]",
vec!["flex", "px-2.5"],
),
(r#""foo # bar""#, vec!["foo", "bar"]),
(r#"'foo # bar'"#, vec!["foo", "bar"]),
(r#"%w[foo ' bar]"#, vec!["foo", "bar"]),
] {
Ruby::test_extract_contains(input, expected);
}
}
#[test]
fn test_embedded_slim_extraction() {
let input = r#"
class QweComponent < ApplicationComponent
slim_template <<~SLIM
button.rounded-full.bg-red-500
| Some text
button.rounded-full(
class="flex"
)
| Some text
SLIM
end
"#;
Ruby::test_extract_contains(input, vec!["rounded-full", "bg-red-500", "flex"]);
let input = r#"
class QweComponent < ApplicationComponent
svelte_template <<~HTML
<div class:flex="true"></div>
HTML
end
"#;
Ruby::test_extract_contains(input, vec!["flex"]);
let input = r#"
class QweComponent < ApplicationComponent
slim_template <<~SLIM
button.z-1.z-2
| Some text
SLIM
end
class QweComponent < ApplicationComponent
svelte_template <<~HTML
<div class:z-3="true"></div>
HTML
end
"#;
Ruby::test_extract_contains(input, vec!["z-1", "z-2", "z-3"]);
}
#[test]
fn test_skip_comments() {
let input = r#"
# From activerecord-8.1.1/lib/active_record/errors.rb:147
# Rails uses RDoc cross-reference syntax in inline documentation:
# {ActiveRecord::Base#save!}[rdoc-ref:Persistence#save!]
"#;
Ruby::test_extract_exact(input, vec![]);
}
#[test]
fn test_strict_locals() {
let input = r#"
<%# locals: (css: "text-amber-600") %>
<% more_css = "text-sky-500" %>
<p class="text-green-500">
In a partial
</p>
<p class="<%= css %>">
In a partial using explicit local variables
</p>
<p class="<%= more_css %>">
In a partial using explicit local variables
</p>
"#;
Ruby::test_extract_contains(
input,
vec!["text-amber-600", "text-sky-500", "text-green-500"],
);
}
}