sphinx_ultra/domains/
parser.rs

1use crate::domains::{CrossReference, ReferenceLocation, ReferenceType};
2use lazy_static::lazy_static;
3/// Reference Parser for extracting cross-references from RST content
4///
5/// This module provides functionality to parse RST content and extract
6/// cross-references like :doc:, :ref:, :func:, :class:, etc.
7use regex::Regex;
8use std::collections::HashMap;
9
10lazy_static! {
11    /// Regex for matching Sphinx cross-references
12    /// Matches patterns like :ref:`target`, :doc:`target`, :func:`module.function`
13    static ref CROSS_REF_REGEX: Regex = Regex::new(
14        r":([a-zA-Z][a-zA-Z0-9_-]*):(`[^`]+`|[^\s]+)"
15    ).unwrap();
16
17    /// Regex for extracting target and display text from backtick format
18    /// Matches `target <display>` or just `target`
19    static ref TARGET_REGEX: Regex = Regex::new(
20        r"`([^<>]+?)(?:\s*<([^<>]+?)>)?`"
21    ).unwrap();
22}
23
24/// Parser for extracting cross-references from RST content
25pub struct ReferenceParser {
26    /// Map of role names to reference types
27    role_mapping: HashMap<String, ReferenceType>,
28}
29
30impl Default for ReferenceParser {
31    fn default() -> Self {
32        Self::new()
33    }
34}
35
36impl ReferenceParser {
37    /// Create a new reference parser
38    pub fn new() -> Self {
39        let mut role_mapping = HashMap::new();
40
41        // Standard RST roles
42        role_mapping.insert("doc".to_string(), ReferenceType::Document);
43        role_mapping.insert("ref".to_string(), ReferenceType::Section);
44
45        // Python domain roles
46        role_mapping.insert("func".to_string(), ReferenceType::Function);
47        role_mapping.insert("class".to_string(), ReferenceType::Class);
48        role_mapping.insert("mod".to_string(), ReferenceType::Module);
49        role_mapping.insert("meth".to_string(), ReferenceType::Method);
50        role_mapping.insert("attr".to_string(), ReferenceType::Attribute);
51        role_mapping.insert("data".to_string(), ReferenceType::Data);
52        role_mapping.insert("exc".to_string(), ReferenceType::Exception);
53
54        // Other common roles
55        role_mapping.insert(
56            "numref".to_string(),
57            ReferenceType::Custom("numref".to_string()),
58        );
59        role_mapping.insert(
60            "envvar".to_string(),
61            ReferenceType::Custom("envvar".to_string()),
62        );
63        role_mapping.insert(
64            "option".to_string(),
65            ReferenceType::Custom("option".to_string()),
66        );
67
68        Self { role_mapping }
69    }
70
71    /// Register a custom role mapping
72    pub fn register_role(&mut self, role: String, ref_type: ReferenceType) {
73        self.role_mapping.insert(role, ref_type);
74    }
75
76    /// Parse content and extract all cross-references
77    pub fn parse_content(
78        &self,
79        content: &str,
80        docname: &str,
81        source_path: Option<String>,
82    ) -> Vec<CrossReference> {
83        let mut references = Vec::new();
84
85        for (line_num, line) in content.lines().enumerate() {
86            let line_refs = self.parse_line(line, docname, line_num + 1, source_path.clone());
87            references.extend(line_refs);
88        }
89
90        references
91    }
92
93    /// Parse a single line and extract cross-references
94    pub fn parse_line(
95        &self,
96        line: &str,
97        docname: &str,
98        line_num: usize,
99        source_path: Option<String>,
100    ) -> Vec<CrossReference> {
101        let mut references = Vec::new();
102
103        for cap in CROSS_REF_REGEX.captures_iter(line) {
104            let role = cap.get(1).unwrap().as_str();
105            let target_text = cap.get(2).unwrap().as_str();
106
107            if let Some(cross_ref) = self.parse_reference(
108                role,
109                target_text,
110                docname,
111                line_num,
112                cap.get(0).unwrap().start(),
113                source_path.clone(),
114            ) {
115                references.push(cross_ref);
116            }
117        }
118
119        references
120    }
121
122    /// Parse a single reference
123    fn parse_reference(
124        &self,
125        role: &str,
126        target_text: &str,
127        docname: &str,
128        line_num: usize,
129        column: usize,
130        source_path: Option<String>,
131    ) -> Option<CrossReference> {
132        let ref_type = self
133            .role_mapping
134            .get(role)
135            .cloned()
136            .unwrap_or_else(|| ReferenceType::Custom(role.to_string()));
137
138        let (target, display_text) = self.extract_target_and_display(target_text);
139
140        // Check if this might be an external reference
141        let is_external = self.is_external_reference(&target, &ref_type);
142
143        Some(CrossReference {
144            ref_type,
145            target,
146            display_text,
147            source_location: ReferenceLocation {
148                docname: docname.to_string(),
149                lineno: Some(line_num),
150                column: Some(column),
151                source_path,
152            },
153            is_external,
154        })
155    }
156
157    /// Extract target and display text from target string
158    fn extract_target_and_display(&self, target_text: &str) -> (String, Option<String>) {
159        // Handle backtick format
160        if target_text.starts_with('`') && target_text.ends_with('`') {
161            if let Some(cap) = TARGET_REGEX.captures(target_text) {
162                let target = cap.get(1).unwrap().as_str().trim().to_string();
163                let display_text = cap.get(2).map(|m| m.as_str().trim().to_string());
164                return (target, display_text);
165            }
166        }
167
168        // Simple format without backticks
169        (target_text.trim().to_string(), None)
170    }
171
172    /// Determine if a reference is external
173    fn is_external_reference(&self, target: &str, ref_type: &ReferenceType) -> bool {
174        match ref_type {
175            ReferenceType::Document => {
176                // External if it contains a protocol or starts with http
177                target.starts_with("http://")
178                    || target.starts_with("https://")
179                    || target.starts_with("file://")
180            }
181            ReferenceType::Function | ReferenceType::Class | ReferenceType::Module => {
182                // External if it starts with a known external library
183                target.starts_with("builtins.")
184                    || target.starts_with("typing.")
185                    || target.starts_with("collections.")
186                    || target.starts_with("pathlib.")
187                    || target.starts_with("os.")
188                    || target.starts_with("sys.")
189                    || target.starts_with("json.")
190                    || target.starts_with("re.")
191                    || target.starts_with("datetime.")
192                    || target.starts_with("urllib.")
193                    || target.starts_with("http.")
194            }
195            _ => false,
196        }
197    }
198
199    /// Get statistics about parsed references
200    pub fn get_reference_stats(&self, references: &[CrossReference]) -> HashMap<String, usize> {
201        let mut stats = HashMap::new();
202
203        for reference in references {
204            let key = match &reference.ref_type {
205                ReferenceType::Custom(name) => name.clone(),
206                _ => format!("{:?}", reference.ref_type),
207            };
208            *stats.entry(key).or_insert(0) += 1;
209        }
210
211        stats
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    #[test]
220    fn test_reference_parser_creation() {
221        let parser = ReferenceParser::new();
222        assert!(parser.role_mapping.contains_key("doc"));
223        assert!(parser.role_mapping.contains_key("func"));
224        assert!(parser.role_mapping.contains_key("class"));
225    }
226
227    #[test]
228    fn test_simple_reference_parsing() {
229        let parser = ReferenceParser::new();
230        let content = "See :doc:`installation` for details.";
231
232        let refs = parser.parse_content(content, "index", None);
233        assert_eq!(refs.len(), 1);
234
235        let ref_obj = &refs[0];
236        assert_eq!(ref_obj.ref_type, ReferenceType::Document);
237        assert_eq!(ref_obj.target, "installation");
238        assert_eq!(ref_obj.display_text, None);
239        assert!(!ref_obj.is_external);
240    }
241
242    #[test]
243    fn test_reference_with_display_text() {
244        let parser = ReferenceParser::new();
245        let content = "See :doc:`Installation Guide <installation>` for details.";
246
247        let refs = parser.parse_content(content, "index", None);
248        assert_eq!(refs.len(), 1);
249
250        let ref_obj = &refs[0];
251        assert_eq!(ref_obj.target, "Installation Guide");
252        assert_eq!(ref_obj.display_text, Some("installation".to_string()));
253    }
254
255    #[test]
256    fn test_python_function_reference() {
257        let parser = ReferenceParser::new();
258        let content = "Use :func:`mymodule.my_function` to process data.";
259
260        let refs = parser.parse_content(content, "api", None);
261        assert_eq!(refs.len(), 1);
262
263        let ref_obj = &refs[0];
264        assert_eq!(ref_obj.ref_type, ReferenceType::Function);
265        assert_eq!(ref_obj.target, "mymodule.my_function");
266        assert!(!ref_obj.is_external);
267    }
268
269    #[test]
270    fn test_external_reference_detection() {
271        let parser = ReferenceParser::new();
272
273        // External Python reference
274        let content1 = "Use :func:`os.path.join` for paths.";
275        let refs1 = parser.parse_content(content1, "test", None);
276        assert_eq!(refs1.len(), 1);
277        assert!(refs1[0].is_external);
278
279        // External document reference
280        let content2 = "See :doc:`https://docs.python.org/3/` for more.";
281        let refs2 = parser.parse_content(content2, "test", None);
282        assert_eq!(refs2.len(), 1);
283        assert!(refs2[0].is_external);
284    }
285
286    #[test]
287    fn test_multiple_references_in_line() {
288        let parser = ReferenceParser::new();
289        let content = "Use :func:`func1` and :class:`MyClass` together.";
290
291        let refs = parser.parse_content(content, "test", None);
292        assert_eq!(refs.len(), 2);
293
294        assert_eq!(refs[0].ref_type, ReferenceType::Function);
295        assert_eq!(refs[0].target, "func1");
296
297        assert_eq!(refs[1].ref_type, ReferenceType::Class);
298        assert_eq!(refs[1].target, "MyClass");
299    }
300
301    #[test]
302    fn test_section_reference() {
303        let parser = ReferenceParser::new();
304        let content = "See :ref:`installation-section` for setup instructions.";
305
306        let refs = parser.parse_content(content, "guide", None);
307        assert_eq!(refs.len(), 1);
308
309        let ref_obj = &refs[0];
310        assert_eq!(ref_obj.ref_type, ReferenceType::Section);
311        assert_eq!(ref_obj.target, "installation-section");
312    }
313
314    #[test]
315    fn test_custom_role() {
316        let mut parser = ReferenceParser::new();
317        parser.register_role(
318            "myref".to_string(),
319            ReferenceType::Custom("myref".to_string()),
320        );
321
322        let content = "See :myref:`custom-target` for details.";
323        let refs = parser.parse_content(content, "test", None);
324        assert_eq!(refs.len(), 1);
325
326        let ref_obj = &refs[0];
327        assert_eq!(ref_obj.ref_type, ReferenceType::Custom("myref".to_string()));
328        assert_eq!(ref_obj.target, "custom-target");
329    }
330
331    #[test]
332    fn test_multiline_content() {
333        let parser = ReferenceParser::new();
334        let content = r#"This is line 1 with :doc:`doc1`.
335This is line 2 with :func:`function1`.
336This is line 3 with :ref:`section1`."#;
337
338        let refs = parser.parse_content(content, "test", None);
339        assert_eq!(refs.len(), 3);
340
341        // Check line numbers
342        assert_eq!(refs[0].source_location.lineno, Some(1));
343        assert_eq!(refs[1].source_location.lineno, Some(2));
344        assert_eq!(refs[2].source_location.lineno, Some(3));
345    }
346
347    #[test]
348    fn test_reference_stats() {
349        let parser = ReferenceParser::new();
350        let content = r#"Use :doc:`doc1` and :doc:`doc2`.
351Also :func:`func1` and :class:`class1`."#;
352
353        let refs = parser.parse_content(content, "test", None);
354        let stats = parser.get_reference_stats(&refs);
355
356        assert_eq!(stats.get("Document"), Some(&2));
357        assert_eq!(stats.get("Function"), Some(&1));
358        assert_eq!(stats.get("Class"), Some(&1));
359    }
360}