sphinx_ultra/
builder.rs

1use anyhow::Result;
2use log::{debug, info};
3use rayon::prelude::*;
4use std::collections::{HashMap, HashSet};
5use std::path::{Path, PathBuf};
6use std::sync::{Arc, Mutex};
7use std::time::{Duration, Instant};
8
9use crate::cache::BuildCache;
10use crate::config::BuildConfig;
11use crate::document::Document;
12use crate::error::{BuildErrorReport, BuildWarning};
13use crate::extensions::{ExtensionLoader, SphinxApp};
14use crate::matching;
15use crate::parser::Parser;
16use crate::utils;
17
18#[derive(Debug, Clone)]
19pub struct BuildStats {
20    pub files_processed: usize,
21    pub files_skipped: usize,
22    pub build_time: Duration,
23    pub output_size_mb: f64,
24    pub cache_hits: usize,
25    pub errors: usize,
26    pub warnings: usize,
27    pub warning_details: Vec<BuildWarning>,
28    pub error_details: Vec<BuildErrorReport>,
29}
30
31pub struct SphinxBuilder {
32    config: BuildConfig,
33    source_dir: PathBuf,
34    output_dir: PathBuf,
35    cache: BuildCache,
36    parser: Parser,
37    parallel_jobs: usize,
38    incremental: bool,
39    warnings: Arc<Mutex<Vec<BuildWarning>>>,
40    errors: Arc<Mutex<Vec<BuildErrorReport>>>,
41    #[allow(dead_code)]
42    sphinx_app: Option<SphinxApp>,
43    #[allow(dead_code)]
44    extension_loader: ExtensionLoader,
45}
46
47impl SphinxBuilder {
48    pub fn new(config: BuildConfig, source_dir: PathBuf, output_dir: PathBuf) -> Result<Self> {
49        let cache_dir = output_dir.join(".sphinx-ultra-cache");
50        let cache = BuildCache::new(cache_dir)?;
51
52        let parser = Parser::new(&config)?;
53
54        let parallel_jobs = config.parallel_jobs.unwrap_or_else(|| {
55            std::thread::available_parallelism()
56                .map(|n| n.get())
57                .unwrap_or(4)
58        });
59
60        // Initialize Sphinx app with extensions
61        let mut sphinx_app = SphinxApp::new(config.clone())?;
62        let mut extension_loader = ExtensionLoader::new()?;
63
64        // Load configured extensions
65        for extension_name in &config.extensions {
66            match extension_loader.load_extension(extension_name) {
67                Ok(extension) => {
68                    if let Err(e) = sphinx_app.add_extension(extension) {
69                        log::warn!("Failed to add extension '{}': {}", extension_name, e);
70                    }
71                }
72                Err(e) => {
73                    log::warn!("Failed to load extension '{}': {}", extension_name, e);
74                }
75            }
76        }
77
78        Ok(Self {
79            config,
80            source_dir,
81            output_dir,
82            cache,
83            parser,
84            parallel_jobs,
85            incremental: false,
86            warnings: Arc::new(Mutex::new(Vec::new())),
87            errors: Arc::new(Mutex::new(Vec::new())),
88            sphinx_app: Some(sphinx_app),
89            extension_loader,
90        })
91    }
92
93    pub fn set_parallel_jobs(&mut self, jobs: usize) {
94        self.parallel_jobs = jobs;
95    }
96
97    pub fn enable_incremental(&mut self) {
98        self.incremental = true;
99    }
100
101    /// Add a warning to the collection
102    #[allow(dead_code)]
103    pub fn add_warning(&self, warning: BuildWarning) {
104        self.warnings.lock().unwrap().push(warning);
105    }
106
107    /// Add an error to the collection
108    #[allow(dead_code)]
109    pub fn add_error(&self, error: BuildErrorReport) {
110        self.errors.lock().unwrap().push(error);
111    }
112
113    /// Check if warnings should be treated as errors
114    #[allow(dead_code)]
115    pub fn should_fail_on_warning(&self) -> bool {
116        self.config.fail_on_warning
117    }
118
119    pub async fn clean(&self) -> Result<()> {
120        if self.output_dir.exists() {
121            tokio::fs::remove_dir_all(&self.output_dir).await?;
122        }
123        Ok(())
124    }
125
126    pub async fn build(&self) -> Result<BuildStats> {
127        let start_time = Instant::now();
128        info!("Starting build process...");
129
130        // Ensure output directory exists
131        tokio::fs::create_dir_all(&self.output_dir).await?;
132
133        // Discover all source files
134        let source_files = self.discover_source_files().await?;
135        info!("Discovered {} source files", source_files.len());
136
137        // Build dependency graph
138        let dependency_graph = self.build_dependency_graph(&source_files).await?;
139        debug!(
140            "Built dependency graph with {} nodes",
141            dependency_graph.len()
142        );
143
144        // Process files in dependency order
145        let processed_docs = self
146            .process_files_parallel(&source_files, &dependency_graph)
147            .await?;
148
149        // Validate documents and collect warnings/errors
150        self.validate_documents(&processed_docs, &source_files)
151            .await?;
152
153        // Generate cross-references and indices
154        self.generate_indices(&processed_docs).await?;
155
156        // Copy static assets
157        self.copy_static_assets().await?;
158
159        // Generate sitemap and search index
160        self.generate_search_index(&processed_docs).await?;
161
162        let build_time = start_time.elapsed();
163        let output_size = utils::calculate_directory_size(&self.output_dir).await?;
164
165        let warnings = self.warnings.lock().unwrap();
166        let errors = self.errors.lock().unwrap();
167
168        let stats = BuildStats {
169            files_processed: processed_docs.len(),
170            files_skipped: 0, // TODO: Track skipped files
171            build_time,
172            output_size_mb: output_size as f64 / 1024.0 / 1024.0,
173            cache_hits: self.cache.hit_count(),
174            errors: errors.len(),
175            warnings: warnings.len(),
176            warning_details: warnings.clone(),
177            error_details: errors.clone(),
178        };
179
180        info!("Build completed in {:?}", build_time);
181        Ok(stats)
182    }
183
184    async fn discover_source_files(&self) -> Result<Vec<PathBuf>> {
185        // Use pattern-based file discovery like Sphinx
186        let mut include_patterns = self.config.include_patterns.clone();
187        let exclude_patterns = &self.config.exclude_patterns;
188
189        // Add default source file patterns if no specific patterns are configured
190        if include_patterns == vec!["**"] {
191            include_patterns = vec![
192                "**/*.rst".to_string(),
193                "**/*.md".to_string(),
194                "**/*.txt".to_string(),
195            ];
196        }
197
198        // Add built-in exclude patterns for common build artifacts and hidden files
199        let mut all_exclude_patterns = exclude_patterns.clone();
200        all_exclude_patterns.extend_from_slice(&[
201            "_build/**".to_string(),
202            "__pycache__/**".to_string(),
203            ".git/**".to_string(),
204            ".svn/**".to_string(),
205            ".hg/**".to_string(),
206            ".*/**".to_string(), // Skip all hidden directories
207            "Thumbs.db".to_string(),
208            ".DS_Store".to_string(),
209        ]);
210
211        match matching::get_matching_files(
212            &self.source_dir,
213            &include_patterns,
214            &all_exclude_patterns,
215        ) {
216            Ok(files) => Ok(files),
217            Err(e) => {
218                log::warn!(
219                    "Pattern matching failed, falling back to simple discovery: {}",
220                    e
221                );
222                // Fallback to old method if pattern matching fails
223                let mut files = Vec::new();
224                self.discover_files_sync(&self.source_dir, &mut files)?;
225                Ok(files)
226            }
227        }
228    }
229
230    /// Fallback file discovery for when pattern matching fails
231    fn discover_files_sync(&self, dir: &Path, files: &mut Vec<PathBuf>) -> Result<()> {
232        for entry in std::fs::read_dir(dir)? {
233            let entry = entry?;
234            let path = entry.path();
235
236            if path.is_dir() {
237                // Skip hidden directories and build artifacts
238                if let Some(name) = path.file_name() {
239                    if name.to_string_lossy().starts_with('.')
240                        || name == "_build"
241                        || name == "__pycache__"
242                    {
243                        continue;
244                    }
245                }
246
247                self.discover_files_sync(&path, files)?;
248            } else if self.is_source_file(&path) {
249                files.push(path);
250            }
251        }
252        Ok(())
253    }
254
255    /// Fallback method to check if a file is a source file (used as backup)
256    fn is_source_file(&self, path: &Path) -> bool {
257        if let Some(ext) = path.extension() {
258            matches!(ext.to_string_lossy().as_ref(), "rst" | "md" | "txt")
259        } else {
260            false
261        }
262    }
263
264    async fn build_dependency_graph(
265        &self,
266        files: &[PathBuf],
267    ) -> Result<HashMap<PathBuf, Vec<PathBuf>>> {
268        let mut graph = HashMap::new();
269
270        // For now, simple implementation - process files in alphabetical order
271        // TODO: Parse files to find actual dependencies (includes, references, etc.)
272        for file in files {
273            graph.insert(file.clone(), Vec::new());
274        }
275
276        Ok(graph)
277    }
278
279    async fn process_files_parallel(
280        &self,
281        files: &[PathBuf],
282        _dependency_graph: &HashMap<PathBuf, Vec<PathBuf>>,
283    ) -> Result<Vec<Document>> {
284        info!(
285            "Processing {} files with {} parallel jobs",
286            files.len(),
287            self.parallel_jobs
288        );
289
290        // Configure rayon thread pool
291        let pool = rayon::ThreadPoolBuilder::new()
292            .num_threads(self.parallel_jobs)
293            .build()?;
294
295        let documents: Result<Vec<_>, _> = pool.install(|| {
296            files
297                .par_iter()
298                .map(|file_path| self.process_single_file(file_path))
299                .collect()
300        });
301
302        documents
303    }
304
305    fn process_single_file(&self, file_path: &Path) -> Result<Document> {
306        let relative_path = file_path.strip_prefix(&self.source_dir)?;
307        debug!("Processing file: {}", relative_path.display());
308
309        // Check cache if incremental build is enabled
310        if self.incremental {
311            if let Ok(cached_doc) = self.cache.get_document(file_path) {
312                let file_mtime = utils::get_file_mtime(file_path)?;
313                if cached_doc.source_mtime >= file_mtime {
314                    debug!("Using cached version of {}", relative_path.display());
315                    return Ok(cached_doc);
316                }
317            }
318        }
319
320        // Read and parse the file
321        let content = std::fs::read_to_string(file_path)?;
322        let document = self.parser.parse(file_path, &content)?;
323
324        // Simple document rendering (placeholder)
325        let rendered_html = format!(
326            "<html><body>{}</body></html>",
327            html_escape::encode_text(&document.content.to_string())
328        );
329
330        // Write output file
331        let output_path = self.get_output_path(file_path)?;
332        if let Some(parent) = output_path.parent() {
333            std::fs::create_dir_all(parent)?;
334        }
335        std::fs::write(&output_path, &rendered_html)?;
336
337        // Cache the document
338        if self.incremental {
339            self.cache.store_document(file_path, &document)?;
340        }
341
342        Ok(document)
343    }
344
345    fn get_output_path(&self, source_path: &Path) -> Result<PathBuf> {
346        let relative_path = source_path.strip_prefix(&self.source_dir)?;
347        let mut output_path = self.output_dir.join(relative_path);
348
349        // Change extension to .html
350        output_path.set_extension("html");
351
352        Ok(output_path)
353    }
354
355    async fn generate_indices(&self, _documents: &[Document]) -> Result<()> {
356        info!("Generating indices and cross-references");
357        // TODO: Implement index generation
358        Ok(())
359    }
360
361    async fn copy_static_assets(&self) -> Result<()> {
362        info!("Copying static assets");
363
364        // Create _static directory
365        let static_output_dir = self.output_dir.join("_static");
366        tokio::fs::create_dir_all(&static_output_dir).await?;
367
368        // Copy built-in static assets - use relative path from binary location
369        let exe_dir = std::env::current_exe()?
370            .parent()
371            .ok_or_else(|| anyhow::anyhow!("Could not determine executable directory"))?
372            .to_path_buf();
373
374        // Try multiple possible locations for static assets
375        let possible_static_dirs = [
376            exe_dir.join("../static"),                      // Release build
377            exe_dir.join("../../static"),                   // Debug build
378            exe_dir.join("../../../static"),                // Deep build
379            Path::new("rust-builder/static").to_path_buf(), // Local development
380        ];
381
382        let mut static_assets_copied = false;
383        for builtin_static_dir in &possible_static_dirs {
384            if builtin_static_dir.exists() {
385                debug!("Found static assets at: {:?}", builtin_static_dir);
386                for entry in std::fs::read_dir(builtin_static_dir)? {
387                    let entry = entry?;
388                    let file_path = entry.path();
389                    if file_path.is_file() {
390                        let file_name = file_path.file_name().unwrap();
391                        let dest_path = static_output_dir.join(file_name);
392                        tokio::fs::copy(&file_path, &dest_path).await?;
393                        debug!("Copied static asset: {:?}", file_name);
394                    }
395                }
396                static_assets_copied = true;
397                break;
398            }
399        }
400
401        if !static_assets_copied {
402            debug!("No built-in static assets found, creating basic ones");
403            // Create minimal CSS files if not found
404            self.create_default_static_assets(&static_output_dir)
405                .await?;
406        }
407
408        // Copy project-specific static assets
409        let static_dirs = [
410            self.source_dir.join("_static"),
411            self.source_dir.join("_templates"),
412        ];
413
414        for static_dir in &static_dirs {
415            if static_dir.exists() {
416                let dest = self.output_dir.join(static_dir.file_name().unwrap());
417                utils::copy_dir_recursive(static_dir, &dest).await?;
418                debug!("Copied static directory: {:?}", static_dir);
419            }
420        }
421
422        Ok(())
423    }
424
425    async fn create_default_static_assets(&self, static_dir: &Path) -> Result<()> {
426        // Create basic pygments.css
427        let pygments_css = include_str!("../static/pygments.css");
428        tokio::fs::write(static_dir.join("pygments.css"), pygments_css).await?;
429
430        // Create basic theme.css
431        let theme_css = include_str!("../static/theme.css");
432        tokio::fs::write(static_dir.join("theme.css"), theme_css).await?;
433
434        // Create basic JavaScript files
435        let jquery_js = include_str!("../static/jquery.js");
436        tokio::fs::write(static_dir.join("jquery.js"), jquery_js).await?;
437
438        let doctools_js = include_str!("../static/doctools.js");
439        tokio::fs::write(static_dir.join("doctools.js"), doctools_js).await?;
440
441        let sphinx_highlight_js = include_str!("../static/sphinx_highlight.js");
442        tokio::fs::write(static_dir.join("sphinx_highlight.js"), sphinx_highlight_js).await?;
443
444        debug!("Created default static assets");
445        Ok(())
446    }
447
448    async fn validate_documents(
449        &self,
450        processed_docs: &[Document],
451        _source_files: &[PathBuf],
452    ) -> Result<()> {
453        info!("Validating documents and checking for warnings...");
454
455        let mut toctree_references = HashSet::new();
456        let mut referenced_files = HashSet::new();
457        let mut all_documents = HashSet::new();
458
459        // Collect all documents and their toctree references
460        for doc in processed_docs {
461            // Get relative path for comparison
462            let doc_path_relative = doc
463                .source_path
464                .strip_prefix(&self.source_dir)
465                .unwrap_or(&doc.source_path);
466            let doc_path_no_ext = doc_path_relative.with_extension("");
467            all_documents.insert(doc_path_no_ext.to_string_lossy().to_string());
468
469            // Check for toctree directives and collect their references
470            if let Some(toctree_refs) = self.extract_toctree_references(doc) {
471                for toc_ref in toctree_refs {
472                    toctree_references.insert((doc.source_path.clone(), toc_ref.clone()));
473                    referenced_files.insert(toc_ref);
474                }
475            }
476        }
477
478        // Check for missing toctree references
479        for (source_file, reference) in &toctree_references {
480            let ref_path = format!("{}/index", reference);
481            let alt_ref_path = reference.clone();
482
483            if !all_documents.contains(&ref_path) && !all_documents.contains(&alt_ref_path) {
484                let warning = BuildWarning::missing_toctree_ref(
485                    source_file.clone(),
486                    Some(10), // TODO: Extract actual line number
487                    reference,
488                );
489                self.warnings.lock().unwrap().push(warning);
490            }
491        }
492
493        // Check for orphaned documents
494        for doc in processed_docs {
495            let doc_path_relative = doc
496                .source_path
497                .strip_prefix(&self.source_dir)
498                .unwrap_or(&doc.source_path);
499            let doc_path_no_ext = doc_path_relative.with_extension("");
500            let doc_path_str = doc_path_no_ext.to_string_lossy().to_string();
501
502            // Skip the main index file
503            if doc_path_str == "index" {
504                continue;
505            }
506
507            // Check if this document is referenced in any toctree
508            let is_referenced = referenced_files.iter().any(|ref_path| {
509                ref_path == &doc_path_str
510                    || ref_path == &format!("{}/index", doc_path_str)
511                    || doc_path_str.starts_with(&format!("{}/", ref_path))
512            });
513
514            if !is_referenced {
515                let warning = BuildWarning::orphaned_document(doc.source_path.clone());
516                self.warnings.lock().unwrap().push(warning);
517            }
518        }
519
520        let warning_count = self.warnings.lock().unwrap().len();
521        info!("Validation completed. Found {} warnings", warning_count);
522
523        Ok(())
524    }
525
526    fn extract_toctree_references(&self, doc: &Document) -> Option<Vec<String>> {
527        use crate::document::DocumentContent;
528
529        let mut references = Vec::new();
530
531        if let DocumentContent::RestructuredText(rst_content) = &doc.content {
532            for node in &rst_content.ast {
533                if let crate::document::RstNode::Directive { name, content, .. } = node {
534                    if name == "toctree" {
535                        // Extract references from toctree content
536                        for line in content.lines() {
537                            let trimmed = line.trim();
538                            if !trimmed.is_empty()
539                                && !trimmed.starts_with(':')
540                                && !trimmed.starts_with("..")
541                            {
542                                references.push(trimmed.to_string());
543                            }
544                        }
545                    }
546                }
547            }
548        }
549
550        if references.is_empty() {
551            None
552        } else {
553            Some(references)
554        }
555    }
556
557    async fn generate_search_index(&self, _documents: &[Document]) -> Result<()> {
558        info!("Generating search index");
559        // TODO: Implement search index generation
560        Ok(())
561    }
562}