1use crate::domains::{CrossReference, ReferenceLocation, ReferenceType};
2use lazy_static::lazy_static;
3use regex::Regex;
8use std::collections::HashMap;
9
10lazy_static! {
11 static ref CROSS_REF_REGEX: Regex = Regex::new(
14 r":([a-zA-Z][a-zA-Z0-9_-]*):(`[^`]+`|[^\s]+)"
15 ).unwrap();
16
17 static ref TARGET_REGEX: Regex = Regex::new(
20 r"`([^<>]+?)(?:\s*<([^<>]+?)>)?`"
21 ).unwrap();
22}
23
24pub struct ReferenceParser {
26 role_mapping: HashMap<String, ReferenceType>,
28}
29
30impl Default for ReferenceParser {
31 fn default() -> Self {
32 Self::new()
33 }
34}
35
36impl ReferenceParser {
37 pub fn new() -> Self {
39 let mut role_mapping = HashMap::new();
40
41 role_mapping.insert("doc".to_string(), ReferenceType::Document);
43 role_mapping.insert("ref".to_string(), ReferenceType::Section);
44
45 role_mapping.insert("func".to_string(), ReferenceType::Function);
47 role_mapping.insert("class".to_string(), ReferenceType::Class);
48 role_mapping.insert("mod".to_string(), ReferenceType::Module);
49 role_mapping.insert("meth".to_string(), ReferenceType::Method);
50 role_mapping.insert("attr".to_string(), ReferenceType::Attribute);
51 role_mapping.insert("data".to_string(), ReferenceType::Data);
52 role_mapping.insert("exc".to_string(), ReferenceType::Exception);
53
54 role_mapping.insert(
56 "numref".to_string(),
57 ReferenceType::Custom("numref".to_string()),
58 );
59 role_mapping.insert(
60 "envvar".to_string(),
61 ReferenceType::Custom("envvar".to_string()),
62 );
63 role_mapping.insert(
64 "option".to_string(),
65 ReferenceType::Custom("option".to_string()),
66 );
67
68 Self { role_mapping }
69 }
70
71 pub fn register_role(&mut self, role: String, ref_type: ReferenceType) {
73 self.role_mapping.insert(role, ref_type);
74 }
75
76 pub fn parse_content(
78 &self,
79 content: &str,
80 docname: &str,
81 source_path: Option<String>,
82 ) -> Vec<CrossReference> {
83 let mut references = Vec::new();
84
85 for (line_num, line) in content.lines().enumerate() {
86 let line_refs = self.parse_line(line, docname, line_num + 1, source_path.clone());
87 references.extend(line_refs);
88 }
89
90 references
91 }
92
93 pub fn parse_line(
95 &self,
96 line: &str,
97 docname: &str,
98 line_num: usize,
99 source_path: Option<String>,
100 ) -> Vec<CrossReference> {
101 let mut references = Vec::new();
102
103 for cap in CROSS_REF_REGEX.captures_iter(line) {
104 let role = cap.get(1).unwrap().as_str();
105 let target_text = cap.get(2).unwrap().as_str();
106
107 if let Some(cross_ref) = self.parse_reference(
108 role,
109 target_text,
110 docname,
111 line_num,
112 cap.get(0).unwrap().start(),
113 source_path.clone(),
114 ) {
115 references.push(cross_ref);
116 }
117 }
118
119 references
120 }
121
122 fn parse_reference(
124 &self,
125 role: &str,
126 target_text: &str,
127 docname: &str,
128 line_num: usize,
129 column: usize,
130 source_path: Option<String>,
131 ) -> Option<CrossReference> {
132 let ref_type = self
133 .role_mapping
134 .get(role)
135 .cloned()
136 .unwrap_or_else(|| ReferenceType::Custom(role.to_string()));
137
138 let (target, display_text) = self.extract_target_and_display(target_text);
139
140 let is_external = self.is_external_reference(&target, &ref_type);
142
143 Some(CrossReference {
144 ref_type,
145 target,
146 display_text,
147 source_location: ReferenceLocation {
148 docname: docname.to_string(),
149 lineno: Some(line_num),
150 column: Some(column),
151 source_path,
152 },
153 is_external,
154 })
155 }
156
157 fn extract_target_and_display(&self, target_text: &str) -> (String, Option<String>) {
159 if target_text.starts_with('`') && target_text.ends_with('`') {
161 if let Some(cap) = TARGET_REGEX.captures(target_text) {
162 let target = cap.get(1).unwrap().as_str().trim().to_string();
163 let display_text = cap.get(2).map(|m| m.as_str().trim().to_string());
164 return (target, display_text);
165 }
166 }
167
168 (target_text.trim().to_string(), None)
170 }
171
172 fn is_external_reference(&self, target: &str, ref_type: &ReferenceType) -> bool {
174 match ref_type {
175 ReferenceType::Document => {
176 target.starts_with("http://")
178 || target.starts_with("https://")
179 || target.starts_with("file://")
180 }
181 ReferenceType::Function | ReferenceType::Class | ReferenceType::Module => {
182 target.starts_with("builtins.")
184 || target.starts_with("typing.")
185 || target.starts_with("collections.")
186 || target.starts_with("pathlib.")
187 || target.starts_with("os.")
188 || target.starts_with("sys.")
189 || target.starts_with("json.")
190 || target.starts_with("re.")
191 || target.starts_with("datetime.")
192 || target.starts_with("urllib.")
193 || target.starts_with("http.")
194 }
195 _ => false,
196 }
197 }
198
199 pub fn get_reference_stats(&self, references: &[CrossReference]) -> HashMap<String, usize> {
201 let mut stats = HashMap::new();
202
203 for reference in references {
204 let key = match &reference.ref_type {
205 ReferenceType::Custom(name) => name.clone(),
206 _ => format!("{:?}", reference.ref_type),
207 };
208 *stats.entry(key).or_insert(0) += 1;
209 }
210
211 stats
212 }
213}
214
215#[cfg(test)]
216mod tests {
217 use super::*;
218
219 #[test]
220 fn test_reference_parser_creation() {
221 let parser = ReferenceParser::new();
222 assert!(parser.role_mapping.contains_key("doc"));
223 assert!(parser.role_mapping.contains_key("func"));
224 assert!(parser.role_mapping.contains_key("class"));
225 }
226
227 #[test]
228 fn test_simple_reference_parsing() {
229 let parser = ReferenceParser::new();
230 let content = "See :doc:`installation` for details.";
231
232 let refs = parser.parse_content(content, "index", None);
233 assert_eq!(refs.len(), 1);
234
235 let ref_obj = &refs[0];
236 assert_eq!(ref_obj.ref_type, ReferenceType::Document);
237 assert_eq!(ref_obj.target, "installation");
238 assert_eq!(ref_obj.display_text, None);
239 assert!(!ref_obj.is_external);
240 }
241
242 #[test]
243 fn test_reference_with_display_text() {
244 let parser = ReferenceParser::new();
245 let content = "See :doc:`Installation Guide <installation>` for details.";
246
247 let refs = parser.parse_content(content, "index", None);
248 assert_eq!(refs.len(), 1);
249
250 let ref_obj = &refs[0];
251 assert_eq!(ref_obj.target, "Installation Guide");
252 assert_eq!(ref_obj.display_text, Some("installation".to_string()));
253 }
254
255 #[test]
256 fn test_python_function_reference() {
257 let parser = ReferenceParser::new();
258 let content = "Use :func:`mymodule.my_function` to process data.";
259
260 let refs = parser.parse_content(content, "api", None);
261 assert_eq!(refs.len(), 1);
262
263 let ref_obj = &refs[0];
264 assert_eq!(ref_obj.ref_type, ReferenceType::Function);
265 assert_eq!(ref_obj.target, "mymodule.my_function");
266 assert!(!ref_obj.is_external);
267 }
268
269 #[test]
270 fn test_external_reference_detection() {
271 let parser = ReferenceParser::new();
272
273 let content1 = "Use :func:`os.path.join` for paths.";
275 let refs1 = parser.parse_content(content1, "test", None);
276 assert_eq!(refs1.len(), 1);
277 assert!(refs1[0].is_external);
278
279 let content2 = "See :doc:`https://docs.python.org/3/` for more.";
281 let refs2 = parser.parse_content(content2, "test", None);
282 assert_eq!(refs2.len(), 1);
283 assert!(refs2[0].is_external);
284 }
285
286 #[test]
287 fn test_multiple_references_in_line() {
288 let parser = ReferenceParser::new();
289 let content = "Use :func:`func1` and :class:`MyClass` together.";
290
291 let refs = parser.parse_content(content, "test", None);
292 assert_eq!(refs.len(), 2);
293
294 assert_eq!(refs[0].ref_type, ReferenceType::Function);
295 assert_eq!(refs[0].target, "func1");
296
297 assert_eq!(refs[1].ref_type, ReferenceType::Class);
298 assert_eq!(refs[1].target, "MyClass");
299 }
300
301 #[test]
302 fn test_section_reference() {
303 let parser = ReferenceParser::new();
304 let content = "See :ref:`installation-section` for setup instructions.";
305
306 let refs = parser.parse_content(content, "guide", None);
307 assert_eq!(refs.len(), 1);
308
309 let ref_obj = &refs[0];
310 assert_eq!(ref_obj.ref_type, ReferenceType::Section);
311 assert_eq!(ref_obj.target, "installation-section");
312 }
313
314 #[test]
315 fn test_custom_role() {
316 let mut parser = ReferenceParser::new();
317 parser.register_role(
318 "myref".to_string(),
319 ReferenceType::Custom("myref".to_string()),
320 );
321
322 let content = "See :myref:`custom-target` for details.";
323 let refs = parser.parse_content(content, "test", None);
324 assert_eq!(refs.len(), 1);
325
326 let ref_obj = &refs[0];
327 assert_eq!(ref_obj.ref_type, ReferenceType::Custom("myref".to_string()));
328 assert_eq!(ref_obj.target, "custom-target");
329 }
330
331 #[test]
332 fn test_multiline_content() {
333 let parser = ReferenceParser::new();
334 let content = r#"This is line 1 with :doc:`doc1`.
335This is line 2 with :func:`function1`.
336This is line 3 with :ref:`section1`."#;
337
338 let refs = parser.parse_content(content, "test", None);
339 assert_eq!(refs.len(), 3);
340
341 assert_eq!(refs[0].source_location.lineno, Some(1));
343 assert_eq!(refs[1].source_location.lineno, Some(2));
344 assert_eq!(refs[2].source_location.lineno, Some(3));
345 }
346
347 #[test]
348 fn test_reference_stats() {
349 let parser = ReferenceParser::new();
350 let content = r#"Use :doc:`doc1` and :doc:`doc2`.
351Also :func:`func1` and :class:`class1`."#;
352
353 let refs = parser.parse_content(content, "test", None);
354 let stats = parser.get_reference_stats(&refs);
355
356 assert_eq!(stats.get("Document"), Some(&2));
357 assert_eq!(stats.get("Function"), Some(&1));
358 assert_eq!(stats.get("Class"), Some(&1));
359 }
360}