1use anyhow::Result;
2use log::{debug, info};
3use rayon::prelude::*;
4use std::collections::{HashMap, HashSet};
5use std::path::{Path, PathBuf};
6use std::sync::{Arc, Mutex};
7use std::time::{Duration, Instant};
8
9use crate::cache::BuildCache;
10use crate::config::BuildConfig;
11use crate::document::Document;
12use crate::error::{BuildErrorReport, BuildWarning};
13use crate::extensions::{ExtensionLoader, SphinxApp};
14use crate::matching;
15use crate::parser::Parser;
16use crate::utils;
17
18#[derive(Debug, Clone)]
19pub struct BuildStats {
20 pub files_processed: usize,
21 pub files_skipped: usize,
22 pub build_time: Duration,
23 pub output_size_mb: f64,
24 pub cache_hits: usize,
25 pub errors: usize,
26 pub warnings: usize,
27 pub warning_details: Vec<BuildWarning>,
28 pub error_details: Vec<BuildErrorReport>,
29}
30
31pub struct SphinxBuilder {
32 config: BuildConfig,
33 source_dir: PathBuf,
34 output_dir: PathBuf,
35 cache: BuildCache,
36 parser: Parser,
37 parallel_jobs: usize,
38 incremental: bool,
39 warnings: Arc<Mutex<Vec<BuildWarning>>>,
40 errors: Arc<Mutex<Vec<BuildErrorReport>>>,
41 #[allow(dead_code)]
42 sphinx_app: Option<SphinxApp>,
43 #[allow(dead_code)]
44 extension_loader: ExtensionLoader,
45}
46
47impl SphinxBuilder {
48 pub fn new(config: BuildConfig, source_dir: PathBuf, output_dir: PathBuf) -> Result<Self> {
49 let cache_dir = output_dir.join(".sphinx-ultra-cache");
50 let cache = BuildCache::new(cache_dir)?;
51
52 let parser = Parser::new(&config)?;
53
54 let parallel_jobs = config.parallel_jobs.unwrap_or_else(|| {
55 std::thread::available_parallelism()
56 .map(|n| n.get())
57 .unwrap_or(4)
58 });
59
60 let mut sphinx_app = SphinxApp::new(config.clone())?;
62 let mut extension_loader = ExtensionLoader::new()?;
63
64 for extension_name in &config.extensions {
66 match extension_loader.load_extension(extension_name) {
67 Ok(extension) => {
68 if let Err(e) = sphinx_app.add_extension(extension) {
69 log::warn!("Failed to add extension '{}': {}", extension_name, e);
70 }
71 }
72 Err(e) => {
73 log::warn!("Failed to load extension '{}': {}", extension_name, e);
74 }
75 }
76 }
77
78 Ok(Self {
79 config,
80 source_dir,
81 output_dir,
82 cache,
83 parser,
84 parallel_jobs,
85 incremental: false,
86 warnings: Arc::new(Mutex::new(Vec::new())),
87 errors: Arc::new(Mutex::new(Vec::new())),
88 sphinx_app: Some(sphinx_app),
89 extension_loader,
90 })
91 }
92
93 pub fn set_parallel_jobs(&mut self, jobs: usize) {
94 self.parallel_jobs = jobs;
95 }
96
97 pub fn enable_incremental(&mut self) {
98 self.incremental = true;
99 }
100
101 #[allow(dead_code)]
103 pub fn add_warning(&self, warning: BuildWarning) {
104 self.warnings.lock().unwrap().push(warning);
105 }
106
107 #[allow(dead_code)]
109 pub fn add_error(&self, error: BuildErrorReport) {
110 self.errors.lock().unwrap().push(error);
111 }
112
113 #[allow(dead_code)]
115 pub fn should_fail_on_warning(&self) -> bool {
116 self.config.fail_on_warning
117 }
118
119 pub async fn clean(&self) -> Result<()> {
120 if self.output_dir.exists() {
121 tokio::fs::remove_dir_all(&self.output_dir).await?;
122 }
123 Ok(())
124 }
125
126 pub async fn build(&self) -> Result<BuildStats> {
127 let start_time = Instant::now();
128 info!("Starting build process...");
129
130 tokio::fs::create_dir_all(&self.output_dir).await?;
132
133 let source_files = self.discover_source_files().await?;
135 info!("Discovered {} source files", source_files.len());
136
137 let dependency_graph = self.build_dependency_graph(&source_files).await?;
139 debug!(
140 "Built dependency graph with {} nodes",
141 dependency_graph.len()
142 );
143
144 let processed_docs = self
146 .process_files_parallel(&source_files, &dependency_graph)
147 .await?;
148
149 self.validate_documents(&processed_docs, &source_files)
151 .await?;
152
153 self.generate_indices(&processed_docs).await?;
155
156 self.copy_static_assets().await?;
158
159 self.generate_search_index(&processed_docs).await?;
161
162 let build_time = start_time.elapsed();
163 let output_size = utils::calculate_directory_size(&self.output_dir).await?;
164
165 let warnings = self.warnings.lock().unwrap();
166 let errors = self.errors.lock().unwrap();
167
168 let stats = BuildStats {
169 files_processed: processed_docs.len(),
170 files_skipped: 0, build_time,
172 output_size_mb: output_size as f64 / 1024.0 / 1024.0,
173 cache_hits: self.cache.hit_count(),
174 errors: errors.len(),
175 warnings: warnings.len(),
176 warning_details: warnings.clone(),
177 error_details: errors.clone(),
178 };
179
180 info!("Build completed in {:?}", build_time);
181 Ok(stats)
182 }
183
184 async fn discover_source_files(&self) -> Result<Vec<PathBuf>> {
185 let mut include_patterns = self.config.include_patterns.clone();
187 let exclude_patterns = &self.config.exclude_patterns;
188
189 if include_patterns == vec!["**"] {
191 include_patterns = vec![
192 "**/*.rst".to_string(),
193 "**/*.md".to_string(),
194 "**/*.txt".to_string(),
195 ];
196 }
197
198 let mut all_exclude_patterns = exclude_patterns.clone();
200 all_exclude_patterns.extend_from_slice(&[
201 "_build/**".to_string(),
202 "__pycache__/**".to_string(),
203 ".git/**".to_string(),
204 ".svn/**".to_string(),
205 ".hg/**".to_string(),
206 ".*/**".to_string(), "Thumbs.db".to_string(),
208 ".DS_Store".to_string(),
209 ]);
210
211 match matching::get_matching_files(
212 &self.source_dir,
213 &include_patterns,
214 &all_exclude_patterns,
215 ) {
216 Ok(files) => Ok(files),
217 Err(e) => {
218 log::warn!(
219 "Pattern matching failed, falling back to simple discovery: {}",
220 e
221 );
222 let mut files = Vec::new();
224 self.discover_files_sync(&self.source_dir, &mut files)?;
225 Ok(files)
226 }
227 }
228 }
229
230 fn discover_files_sync(&self, dir: &Path, files: &mut Vec<PathBuf>) -> Result<()> {
232 for entry in std::fs::read_dir(dir)? {
233 let entry = entry?;
234 let path = entry.path();
235
236 if path.is_dir() {
237 if let Some(name) = path.file_name() {
239 if name.to_string_lossy().starts_with('.')
240 || name == "_build"
241 || name == "__pycache__"
242 {
243 continue;
244 }
245 }
246
247 self.discover_files_sync(&path, files)?;
248 } else if self.is_source_file(&path) {
249 files.push(path);
250 }
251 }
252 Ok(())
253 }
254
255 fn is_source_file(&self, path: &Path) -> bool {
257 if let Some(ext) = path.extension() {
258 matches!(ext.to_string_lossy().as_ref(), "rst" | "md" | "txt")
259 } else {
260 false
261 }
262 }
263
264 async fn build_dependency_graph(
265 &self,
266 files: &[PathBuf],
267 ) -> Result<HashMap<PathBuf, Vec<PathBuf>>> {
268 let mut graph = HashMap::new();
269
270 for file in files {
273 graph.insert(file.clone(), Vec::new());
274 }
275
276 Ok(graph)
277 }
278
279 async fn process_files_parallel(
280 &self,
281 files: &[PathBuf],
282 _dependency_graph: &HashMap<PathBuf, Vec<PathBuf>>,
283 ) -> Result<Vec<Document>> {
284 info!(
285 "Processing {} files with {} parallel jobs",
286 files.len(),
287 self.parallel_jobs
288 );
289
290 let pool = rayon::ThreadPoolBuilder::new()
292 .num_threads(self.parallel_jobs)
293 .build()?;
294
295 let documents: Result<Vec<_>, _> = pool.install(|| {
296 files
297 .par_iter()
298 .map(|file_path| self.process_single_file(file_path))
299 .collect()
300 });
301
302 documents
303 }
304
305 fn process_single_file(&self, file_path: &Path) -> Result<Document> {
306 let relative_path = file_path.strip_prefix(&self.source_dir)?;
307 debug!("Processing file: {}", relative_path.display());
308
309 if self.incremental {
311 if let Ok(cached_doc) = self.cache.get_document(file_path) {
312 let file_mtime = utils::get_file_mtime(file_path)?;
313 if cached_doc.source_mtime >= file_mtime {
314 debug!("Using cached version of {}", relative_path.display());
315 return Ok(cached_doc);
316 }
317 }
318 }
319
320 let content = std::fs::read_to_string(file_path)?;
322 let document = self.parser.parse(file_path, &content)?;
323
324 let rendered_html = format!(
326 "<html><body>{}</body></html>",
327 html_escape::encode_text(&document.content.to_string())
328 );
329
330 let output_path = self.get_output_path(file_path)?;
332 if let Some(parent) = output_path.parent() {
333 std::fs::create_dir_all(parent)?;
334 }
335 std::fs::write(&output_path, &rendered_html)?;
336
337 if self.incremental {
339 self.cache.store_document(file_path, &document)?;
340 }
341
342 Ok(document)
343 }
344
345 fn get_output_path(&self, source_path: &Path) -> Result<PathBuf> {
346 let relative_path = source_path.strip_prefix(&self.source_dir)?;
347 let mut output_path = self.output_dir.join(relative_path);
348
349 output_path.set_extension("html");
351
352 Ok(output_path)
353 }
354
355 async fn generate_indices(&self, _documents: &[Document]) -> Result<()> {
356 info!("Generating indices and cross-references");
357 Ok(())
359 }
360
361 async fn copy_static_assets(&self) -> Result<()> {
362 info!("Copying static assets");
363
364 let static_output_dir = self.output_dir.join("_static");
366 tokio::fs::create_dir_all(&static_output_dir).await?;
367
368 let exe_dir = std::env::current_exe()?
370 .parent()
371 .ok_or_else(|| anyhow::anyhow!("Could not determine executable directory"))?
372 .to_path_buf();
373
374 let possible_static_dirs = [
376 exe_dir.join("../static"), exe_dir.join("../../static"), exe_dir.join("../../../static"), Path::new("rust-builder/static").to_path_buf(), ];
381
382 let mut static_assets_copied = false;
383 for builtin_static_dir in &possible_static_dirs {
384 if builtin_static_dir.exists() {
385 debug!("Found static assets at: {:?}", builtin_static_dir);
386 for entry in std::fs::read_dir(builtin_static_dir)? {
387 let entry = entry?;
388 let file_path = entry.path();
389 if file_path.is_file() {
390 let file_name = file_path.file_name().unwrap();
391 let dest_path = static_output_dir.join(file_name);
392 tokio::fs::copy(&file_path, &dest_path).await?;
393 debug!("Copied static asset: {:?}", file_name);
394 }
395 }
396 static_assets_copied = true;
397 break;
398 }
399 }
400
401 if !static_assets_copied {
402 debug!("No built-in static assets found, creating basic ones");
403 self.create_default_static_assets(&static_output_dir)
405 .await?;
406 }
407
408 let static_dirs = [
410 self.source_dir.join("_static"),
411 self.source_dir.join("_templates"),
412 ];
413
414 for static_dir in &static_dirs {
415 if static_dir.exists() {
416 let dest = self.output_dir.join(static_dir.file_name().unwrap());
417 utils::copy_dir_recursive(static_dir, &dest).await?;
418 debug!("Copied static directory: {:?}", static_dir);
419 }
420 }
421
422 Ok(())
423 }
424
425 async fn create_default_static_assets(&self, static_dir: &Path) -> Result<()> {
426 let pygments_css = include_str!("../static/pygments.css");
428 tokio::fs::write(static_dir.join("pygments.css"), pygments_css).await?;
429
430 let theme_css = include_str!("../static/theme.css");
432 tokio::fs::write(static_dir.join("theme.css"), theme_css).await?;
433
434 let jquery_js = include_str!("../static/jquery.js");
436 tokio::fs::write(static_dir.join("jquery.js"), jquery_js).await?;
437
438 let doctools_js = include_str!("../static/doctools.js");
439 tokio::fs::write(static_dir.join("doctools.js"), doctools_js).await?;
440
441 let sphinx_highlight_js = include_str!("../static/sphinx_highlight.js");
442 tokio::fs::write(static_dir.join("sphinx_highlight.js"), sphinx_highlight_js).await?;
443
444 debug!("Created default static assets");
445 Ok(())
446 }
447
448 async fn validate_documents(
449 &self,
450 processed_docs: &[Document],
451 _source_files: &[PathBuf],
452 ) -> Result<()> {
453 info!("Validating documents and checking for warnings...");
454
455 let mut toctree_references = HashSet::new();
456 let mut referenced_files = HashSet::new();
457 let mut all_documents = HashSet::new();
458
459 for doc in processed_docs {
461 let doc_path_relative = doc
463 .source_path
464 .strip_prefix(&self.source_dir)
465 .unwrap_or(&doc.source_path);
466 let doc_path_no_ext = doc_path_relative.with_extension("");
467 all_documents.insert(doc_path_no_ext.to_string_lossy().to_string());
468
469 if let Some(toctree_refs) = self.extract_toctree_references(doc) {
471 for toc_ref in toctree_refs {
472 toctree_references.insert((doc.source_path.clone(), toc_ref.clone()));
473 referenced_files.insert(toc_ref);
474 }
475 }
476 }
477
478 for (source_file, reference) in &toctree_references {
480 let ref_path = format!("{}/index", reference);
481 let alt_ref_path = reference.clone();
482
483 if !all_documents.contains(&ref_path) && !all_documents.contains(&alt_ref_path) {
484 let warning = BuildWarning::missing_toctree_ref(
485 source_file.clone(),
486 Some(10), reference,
488 );
489 self.warnings.lock().unwrap().push(warning);
490 }
491 }
492
493 for doc in processed_docs {
495 let doc_path_relative = doc
496 .source_path
497 .strip_prefix(&self.source_dir)
498 .unwrap_or(&doc.source_path);
499 let doc_path_no_ext = doc_path_relative.with_extension("");
500 let doc_path_str = doc_path_no_ext.to_string_lossy().to_string();
501
502 if doc_path_str == "index" {
504 continue;
505 }
506
507 let is_referenced = referenced_files.iter().any(|ref_path| {
509 ref_path == &doc_path_str
510 || ref_path == &format!("{}/index", doc_path_str)
511 || doc_path_str.starts_with(&format!("{}/", ref_path))
512 });
513
514 if !is_referenced {
515 let warning = BuildWarning::orphaned_document(doc.source_path.clone());
516 self.warnings.lock().unwrap().push(warning);
517 }
518 }
519
520 let warning_count = self.warnings.lock().unwrap().len();
521 info!("Validation completed. Found {} warnings", warning_count);
522
523 Ok(())
524 }
525
526 fn extract_toctree_references(&self, doc: &Document) -> Option<Vec<String>> {
527 use crate::document::DocumentContent;
528
529 let mut references = Vec::new();
530
531 if let DocumentContent::RestructuredText(rst_content) = &doc.content {
532 for node in &rst_content.ast {
533 if let crate::document::RstNode::Directive { name, content, .. } = node {
534 if name == "toctree" {
535 for line in content.lines() {
537 let trimmed = line.trim();
538 if !trimmed.is_empty()
539 && !trimmed.starts_with(':')
540 && !trimmed.starts_with("..")
541 {
542 references.push(trimmed.to_string());
543 }
544 }
545 }
546 }
547 }
548 }
549
550 if references.is_empty() {
551 None
552 } else {
553 Some(references)
554 }
555 }
556
557 async fn generate_search_index(&self, _documents: &[Document]) -> Result<()> {
558 info!("Generating search index");
559 Ok(())
561 }
562}