sphinx_ultra/directives/validation/
parser.rs1use super::{ParsedDirective, ParsedRole, SourceLocation};
4use lazy_static::lazy_static;
5use regex::Regex;
6use std::collections::HashMap;
7
8lazy_static! {
9 static ref DIRECTIVE_REGEX: Regex = Regex::new(
11 r"(?m)^\.\. ([a-zA-Z][a-zA-Z0-9_-]*)::(.*?)$"
12 ).unwrap();
13
14 static ref OPTION_REGEX: Regex = Regex::new(
16 r"(?m)^\s+:([a-zA-Z][a-zA-Z0-9_-]*): ?(.*?)$"
17 ).unwrap();
18
19 static ref ROLE_REGEX: Regex = Regex::new(
21 r":([a-zA-Z][a-zA-Z0-9_-]*):(`[^`]+`|[^\s]+)"
22 ).unwrap();
23
24 static ref ROLE_WITH_TEXT_REGEX: Regex = Regex::new(
26 r"`([^<]+)<([^>]+)>`"
27 ).unwrap();
28}
29
30pub struct DirectiveRoleParser {
32 source_file: String,
34}
35
36impl DirectiveRoleParser {
37 pub fn new(source_file: String) -> Self {
39 Self { source_file }
40 }
41
42 pub fn extract_directives(&self, content: &str) -> Vec<ParsedDirective> {
44 let mut directives = Vec::new();
45 let lines: Vec<&str> = content.lines().collect();
46
47 for (line_num, line) in lines.iter().enumerate() {
48 if let Some(captures) = DIRECTIVE_REGEX.captures(line) {
49 let directive_name = captures.get(1).unwrap().as_str().to_string();
50 let args_str = captures.get(2).unwrap().as_str().trim();
51
52 let arguments: Vec<String> = if args_str.is_empty() {
54 Vec::new()
55 } else {
56 args_str.split_whitespace().map(|s| s.to_string()).collect()
57 };
58
59 let (options, content, _content_end_line) =
61 self.parse_directive_body(&lines, line_num + 1);
62
63 let directive = ParsedDirective {
64 name: directive_name,
65 arguments,
66 options,
67 content,
68 location: SourceLocation {
69 file: self.source_file.clone(),
70 line: line_num + 1,
71 column: line.find("..").unwrap_or(0) + 1,
72 },
73 };
74
75 directives.push(directive);
76 }
77 }
78
79 directives
80 }
81
82 pub fn extract_roles(&self, content: &str) -> Vec<ParsedRole> {
84 let mut roles = Vec::new();
85 let lines: Vec<&str> = content.lines().collect();
86
87 for (line_num, line) in lines.iter().enumerate() {
88 for captures in ROLE_REGEX.captures_iter(line) {
89 let role_name = captures.get(1).unwrap().as_str().to_string();
90 let role_content = captures.get(2).unwrap().as_str();
91
92 let role_content = if role_content.starts_with('`') && role_content.ends_with('`') {
94 &role_content[1..role_content.len() - 1]
95 } else {
96 role_content
97 };
98
99 let (target, display_text) = if role_content.contains('<')
101 && role_content.contains('>')
102 {
103 if let Some(angle_start) = role_content.rfind('<') {
105 if let Some(angle_end) = role_content.rfind('>') {
106 if angle_start < angle_end {
107 let display = role_content[..angle_start].trim().to_string();
108 let target = role_content[angle_start + 1..angle_end].to_string();
109 (
110 target,
111 if display.is_empty() {
112 None
113 } else {
114 Some(display)
115 },
116 )
117 } else {
118 (role_content.to_string(), None)
119 }
120 } else {
121 (role_content.to_string(), None)
122 }
123 } else {
124 (role_content.to_string(), None)
125 }
126 } else {
127 (role_content.to_string(), None)
128 };
129
130 let role = ParsedRole {
131 name: role_name,
132 target,
133 display_text,
134 location: SourceLocation {
135 file: self.source_file.clone(),
136 line: line_num + 1,
137 column: line.find(':').unwrap_or(0) + 1,
138 },
139 };
140
141 roles.push(role);
142 }
143 }
144
145 roles
146 }
147
148 fn parse_directive_body(
150 &self,
151 lines: &[&str],
152 start_line: usize,
153 ) -> (HashMap<String, String>, String, usize) {
154 let mut options = HashMap::new();
155 let mut content_lines = Vec::new();
156 let mut current_line = start_line;
157 let mut in_content = false;
158
159 while current_line < lines.len() {
160 let line = lines[current_line];
161
162 if line.trim().is_empty() {
164 if in_content {
165 content_lines.push(String::new());
166 }
167 current_line += 1;
168 continue;
169 }
170
171 if let Some(option_captures) = OPTION_REGEX.captures(line) {
173 if !in_content {
174 let option_name = option_captures.get(1).unwrap().as_str().to_string();
175 let option_value = option_captures.get(2).unwrap().as_str().to_string();
176 options.insert(option_name, option_value);
177 current_line += 1;
178 continue;
179 }
180 }
181
182 if line.starts_with(" ") || line.starts_with('\t') {
184 in_content = true;
185 let content_line = if let Some(stripped) = line.strip_prefix(" ") {
187 stripped
188 } else if let Some(stripped) = line.strip_prefix('\t') {
189 stripped
190 } else {
191 line
192 };
193 content_lines.push(content_line.to_string());
194 current_line += 1;
195 continue;
196 }
197
198 if in_content {
200 break;
201 }
202
203 if !line.starts_with(':') {
205 break;
206 }
207
208 current_line += 1;
209 }
210
211 let content = content_lines.join("\n");
212 (options, content, current_line)
213 }
214
215 pub fn parse_content(&self, content: &str) -> (Vec<ParsedDirective>, Vec<ParsedRole>) {
217 let directives = self.extract_directives(content);
218 let roles = self.extract_roles(content);
219 (directives, roles)
220 }
221
222 pub fn is_directive_line(line: &str) -> bool {
224 DIRECTIVE_REGEX.is_match(line)
225 }
226
227 pub fn contains_role(text: &str) -> bool {
229 ROLE_REGEX.is_match(text)
230 }
231
232 pub fn count_directives(content: &str) -> usize {
234 DIRECTIVE_REGEX.find_iter(content).count()
235 }
236
237 pub fn count_roles(content: &str) -> usize {
239 ROLE_REGEX.find_iter(content).count()
240 }
241}
242
243#[derive(Debug, Default, Clone)]
245pub struct ParseStatistics {
246 pub directive_count: usize,
248 pub role_count: usize,
250 pub directives_by_type: HashMap<String, usize>,
252 pub roles_by_type: HashMap<String, usize>,
254 pub lines_processed: usize,
256}
257
258impl ParseStatistics {
259 pub fn new() -> Self {
261 Self::default()
262 }
263
264 pub fn record_directive(&mut self, directive: &ParsedDirective) {
266 self.directive_count += 1;
267 *self
268 .directives_by_type
269 .entry(directive.name.clone())
270 .or_insert(0) += 1;
271 }
272
273 pub fn record_role(&mut self, role: &ParsedRole) {
275 self.role_count += 1;
276 *self.roles_by_type.entry(role.name.clone()).or_insert(0) += 1;
277 }
278
279 pub fn set_lines_processed(&mut self, lines: usize) {
281 self.lines_processed = lines;
282 }
283
284 pub fn total_items(&self) -> usize {
286 self.directive_count + self.role_count
287 }
288}
289
290pub struct StatisticalDirectiveRoleParser {
292 parser: DirectiveRoleParser,
293 statistics: ParseStatistics,
294}
295
296impl StatisticalDirectiveRoleParser {
297 pub fn new(source_file: String) -> Self {
299 Self {
300 parser: DirectiveRoleParser::new(source_file),
301 statistics: ParseStatistics::new(),
302 }
303 }
304
305 pub fn parse_with_statistics(
307 &mut self,
308 content: &str,
309 ) -> (Vec<ParsedDirective>, Vec<ParsedRole>) {
310 let (directives, roles) = self.parser.parse_content(content);
311
312 self.statistics.set_lines_processed(content.lines().count());
314
315 for directive in &directives {
316 self.statistics.record_directive(directive);
317 }
318
319 for role in &roles {
320 self.statistics.record_role(role);
321 }
322
323 (directives, roles)
324 }
325
326 pub fn statistics(&self) -> &ParseStatistics {
328 &self.statistics
329 }
330
331 pub fn reset_statistics(&mut self) {
333 self.statistics = ParseStatistics::new();
334 }
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340
341 #[test]
342 fn test_directive_parsing() {
343 let parser = DirectiveRoleParser::new("test.rst".to_string());
344
345 let content = r#"
346.. note:: This is a note
347
348 This is the content of the note.
349 It can span multiple lines.
350
351.. code-block:: python
352 :linenos:
353 :caption: Example code
354
355 def hello():
356 print("Hello, world!")
357"#;
358
359 let directives = parser.extract_directives(content);
360 assert_eq!(directives.len(), 2);
361
362 assert_eq!(directives[0].name, "note");
364 assert_eq!(directives[0].arguments.len(), 4); assert_eq!(directives[0].arguments[0], "This");
366 assert_eq!(directives[0].arguments[1], "is");
367 assert_eq!(directives[0].arguments[2], "a");
368 assert_eq!(directives[0].arguments[3], "note");
369 assert!(directives[0].content.contains("content of the note"));
370
371 assert_eq!(directives[1].name, "code-block");
373 assert_eq!(directives[1].arguments.len(), 1);
374 assert_eq!(directives[1].arguments[0], "python");
375 assert_eq!(directives[1].options.len(), 2);
376 assert!(directives[1].options.contains_key("linenos"));
377 assert_eq!(
378 directives[1].options.get("caption"),
379 Some(&"Example code".to_string())
380 );
381 assert!(directives[1].content.contains("def hello()"));
382 }
383
384 #[test]
385 fn test_role_parsing() {
386 let parser = DirectiveRoleParser::new("test.rst".to_string());
387
388 let content = r#"
389See :doc:`installation` for setup instructions.
390Use :ref:`advanced-config` for configuration.
391Download the :download:`example.pdf` file.
392For math, use :math:`x = \frac{a}{b}`.
393See :doc:`Custom Title <installation>` for details.
394"#;
395
396 let roles = parser.extract_roles(content);
397 assert_eq!(roles.len(), 5);
398
399 assert_eq!(roles[0].name, "doc");
401 assert_eq!(roles[0].target, "installation");
402 assert_eq!(roles[0].display_text, None);
403
404 assert_eq!(roles[1].name, "ref");
406 assert_eq!(roles[1].target, "advanced-config");
407
408 assert_eq!(roles[2].name, "download");
410 assert_eq!(roles[2].target, "example.pdf");
411
412 assert_eq!(roles[3].name, "math");
414 assert_eq!(roles[3].target, r"x = \frac{a}{b}");
415
416 assert_eq!(roles[4].name, "doc");
418 assert_eq!(roles[4].target, "installation");
419 assert_eq!(roles[4].display_text, Some("Custom Title".to_string()));
420 }
421
422 #[test]
423 fn test_statistical_parser() {
424 let mut parser = StatisticalDirectiveRoleParser::new("test.rst".to_string());
425
426 let content = r#"
427.. note:: Test note
428
429 Content here.
430
431See :doc:`test` and :ref:`section`.
432"#;
433
434 let (directives, roles) = parser.parse_with_statistics(content);
435
436 assert_eq!(directives.len(), 1);
437 assert_eq!(roles.len(), 2);
438
439 let stats = parser.statistics();
440 assert_eq!(stats.directive_count, 1);
441 assert_eq!(stats.role_count, 2);
442 assert_eq!(stats.total_items(), 3);
443 assert_eq!(stats.directives_by_type.get("note"), Some(&1));
444 assert_eq!(stats.roles_by_type.get("doc"), Some(&1));
445 assert_eq!(stats.roles_by_type.get("ref"), Some(&1));
446 }
447
448 #[test]
449 fn test_utility_functions() {
450 assert!(DirectiveRoleParser::is_directive_line(".. note:: Test"));
451 assert!(!DirectiveRoleParser::is_directive_line(
452 "This is not a directive"
453 ));
454
455 assert!(DirectiveRoleParser::contains_role("See :doc:`test` here"));
456 assert!(!DirectiveRoleParser::contains_role("No roles here"));
457
458 let content = ".. note:: Test\n.. warning:: Another\nSee :doc:`test` and :ref:`section`.";
459 assert_eq!(DirectiveRoleParser::count_directives(content), 2);
460 assert_eq!(DirectiveRoleParser::count_roles(content), 2);
461 }
462
463 #[test]
464 fn test_directive_options_parsing() {
465 let parser = DirectiveRoleParser::new("test.rst".to_string());
466
467 let content = r#"
468.. figure:: image.png
469 :width: 100px
470 :alt: Test image
471 :align: center
472
473 This is the caption.
474"#;
475
476 let directives = parser.extract_directives(content);
477 assert_eq!(directives.len(), 1);
478
479 let directive = &directives[0];
480 assert_eq!(directive.name, "figure");
481 assert_eq!(directive.arguments.len(), 1);
482 assert_eq!(directive.arguments[0], "image.png");
483 assert_eq!(directive.options.len(), 3);
484 assert_eq!(directive.options.get("width"), Some(&"100px".to_string()));
485 assert_eq!(
486 directive.options.get("alt"),
487 Some(&"Test image".to_string())
488 );
489 assert_eq!(directive.options.get("align"), Some(&"center".to_string()));
490 assert_eq!(directive.content.trim(), "This is the caption.");
491 }
492}