1use anyhow::Result;
2use blake3::Hasher;
3use chrono::{DateTime, Utc};
4use dashmap::DashMap;
5use log::{debug, warn};
6use parking_lot::RwLock;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11use std::time::{Duration, UNIX_EPOCH};
12
13use crate::document::Document;
14use crate::error::BuildError;
15
16pub struct BuildCache {
17 cache_dir: PathBuf,
18 documents: Arc<DashMap<PathBuf, CachedDocument>>,
19 file_hashes: Arc<RwLock<HashMap<PathBuf, String>>>,
20 hit_count: Arc<RwLock<usize>>,
21 miss_count: Arc<RwLock<usize>>,
22 max_size_mb: usize,
23 expiration_duration: Duration,
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
27struct CachedDocument {
28 document: Document,
29 hash: String,
30 cached_at: DateTime<Utc>,
31 access_count: usize,
32 size_bytes: usize,
33}
34
35impl BuildCache {
36 pub fn new(cache_dir: PathBuf) -> Result<Self> {
37 std::fs::create_dir_all(&cache_dir)?;
38
39 let cache = Self {
40 cache_dir,
41 documents: Arc::new(DashMap::new()),
42 file_hashes: Arc::new(RwLock::new(HashMap::new())),
43 hit_count: Arc::new(RwLock::new(0)),
44 miss_count: Arc::new(RwLock::new(0)),
45 max_size_mb: 500, expiration_duration: Duration::from_secs(24 * 60 * 60), };
48
49 cache.load_from_disk()?;
51
52 Ok(cache)
53 }
54
55 pub fn get_document(&self, file_path: &Path) -> Result<Document> {
56 let hash = self.calculate_file_hash(file_path)?;
57
58 if let Some(cached) = self.documents.get(file_path) {
59 if cached.hash == hash && !self.is_expired(&cached.cached_at) {
60 self.documents.alter(file_path, |_, mut cached| {
62 cached.access_count += 1;
63 cached
64 });
65
66 *self.hit_count.write() += 1;
67 debug!("Cache hit for {}", file_path.display());
68 return Ok(cached.document.clone());
69 }
70 self.documents.remove(file_path);
72 }
73
74 *self.miss_count.write() += 1;
75 debug!("Cache miss for {}", file_path.display());
76 Err(BuildError::Cache("Document not found in cache".to_string()).into())
77 }
78
79 pub fn store_document(&self, file_path: &Path, document: &Document) -> Result<()> {
80 let hash = self.calculate_file_hash(file_path)?;
81 let size_bytes = self.estimate_document_size(document);
82
83 let cached_doc = CachedDocument {
84 document: document.clone(),
85 hash: hash.clone(),
86 cached_at: Utc::now(),
87 access_count: 1,
88 size_bytes,
89 };
90
91 self.evict_if_needed(size_bytes)?;
93
94 self.documents.insert(file_path.to_path_buf(), cached_doc);
95 self.file_hashes
96 .write()
97 .insert(file_path.to_path_buf(), hash.clone());
98
99 debug!(
100 "Cached document: {} ({} bytes)",
101 file_path.display(),
102 size_bytes
103 );
104
105 self.persist_to_disk(file_path, document)?;
107
108 Ok(())
109 }
110
111 #[allow(dead_code)]
112 pub fn invalidate(&self, file_path: &Path) {
113 self.documents.remove(file_path);
114 self.file_hashes.write().remove(file_path);
115
116 let cache_file = self.get_cache_file_path(file_path);
118 if cache_file.exists() {
119 if let Err(e) = std::fs::remove_file(&cache_file) {
120 warn!(
121 "Failed to remove cache file {}: {}",
122 cache_file.display(),
123 e
124 );
125 }
126 }
127
128 debug!("Invalidated cache for {}", file_path.display());
129 }
130
131 #[allow(dead_code)]
132 pub fn clear(&self) -> Result<()> {
133 self.documents.clear();
134 self.file_hashes.write().clear();
135 *self.hit_count.write() = 0;
136 *self.miss_count.write() = 0;
137
138 if self.cache_dir.exists() {
139 std::fs::remove_dir_all(&self.cache_dir)?;
140 std::fs::create_dir_all(&self.cache_dir)?;
141 }
142
143 debug!("Cleared all cache");
144 Ok(())
145 }
146
147 pub fn hit_count(&self) -> usize {
148 *self.hit_count.read()
149 }
150
151 #[allow(dead_code)]
152 pub fn miss_count(&self) -> usize {
153 *self.miss_count.read()
154 }
155
156 #[allow(dead_code)]
157 pub fn hit_ratio(&self) -> f64 {
158 let hits = *self.hit_count.read() as f64;
159 let misses = *self.miss_count.read() as f64;
160 if hits + misses > 0.0 {
161 hits / (hits + misses)
162 } else {
163 0.0
164 }
165 }
166
167 pub fn size_mb(&self) -> f64 {
168 let total_bytes: usize = self
169 .documents
170 .iter()
171 .map(|entry| entry.value().size_bytes)
172 .sum();
173 total_bytes as f64 / 1024.0 / 1024.0
174 }
175
176 fn calculate_file_hash(&self, file_path: &Path) -> Result<String> {
177 let content = std::fs::read(file_path)?;
178 let metadata = std::fs::metadata(file_path)?;
179
180 let mut hasher = Hasher::new();
181 hasher.update(&content);
182
183 if let Ok(modified) = metadata.modified() {
185 if let Ok(duration) = modified.duration_since(UNIX_EPOCH) {
186 hasher.update(&duration.as_secs().to_le_bytes());
187 }
188 }
189
190 Ok(hasher.finalize().to_hex().to_string())
191 }
192
193 fn is_expired(&self, cached_at: &DateTime<Utc>) -> bool {
194 let now = Utc::now();
195 let elapsed = now.signed_duration_since(*cached_at);
196 elapsed.num_seconds() > self.expiration_duration.as_secs() as i64
197 }
198
199 fn estimate_document_size(&self, document: &Document) -> usize {
200 document.html.len()
202 + document.title.len()
203 + document.source_path.to_string_lossy().len()
204 + document.output_path.to_string_lossy().len()
205 + 1024 }
207
208 fn evict_if_needed(&self, new_size: usize) -> Result<()> {
209 let current_size_mb = self.size_mb();
210 let new_size_mb = (new_size as f64) / 1024.0 / 1024.0;
211
212 if current_size_mb + new_size_mb > self.max_size_mb as f64 {
213 self.evict_lru_entries(new_size_mb)?;
214 }
215
216 Ok(())
217 }
218
219 fn evict_lru_entries(&self, space_needed_mb: f64) -> Result<()> {
220 let mut entries: Vec<_> = self
221 .documents
222 .iter()
223 .map(|entry| {
224 (
225 entry.key().clone(),
226 entry.value().access_count,
227 entry.value().size_bytes,
228 )
229 })
230 .collect();
231
232 entries.sort_by_key(|(_, access_count, _)| *access_count);
234
235 let mut space_freed_mb = 0.0;
236 for (path, _, size_bytes) in entries {
237 if space_freed_mb >= space_needed_mb {
238 break;
239 }
240
241 self.documents.remove(&path);
242 self.file_hashes.write().remove(&path);
243 space_freed_mb += (size_bytes as f64) / 1024.0 / 1024.0;
244
245 debug!(
246 "Evicted {} from cache ({} MB)",
247 path.display(),
248 size_bytes as f64 / 1024.0 / 1024.0
249 );
250 }
251
252 Ok(())
253 }
254
255 fn load_from_disk(&self) -> Result<()> {
256 if !self.cache_dir.exists() {
257 return Ok(());
258 }
259
260 for entry in std::fs::read_dir(&self.cache_dir)? {
261 let entry = entry?;
262 if entry.file_type()?.is_file()
263 && entry.path().extension().is_some_and(|ext| ext == "json")
264 {
265 if let Err(e) = self.load_cache_file(&entry.path()) {
266 warn!(
267 "Failed to load cache file {}: {}",
268 entry.path().display(),
269 e
270 );
271 }
272 }
273 }
274
275 debug!("Loaded {} documents from disk cache", self.documents.len());
276 Ok(())
277 }
278
279 fn load_cache_file(&self, cache_file: &Path) -> Result<()> {
280 let content = std::fs::read_to_string(cache_file)?;
281 let cached_doc: CachedDocument = serde_json::from_str(&content)?;
282
283 if !self.is_expired(&cached_doc.cached_at) {
285 let source_path = &cached_doc.document.source_path;
286 if source_path.exists() {
287 let current_hash = self.calculate_file_hash(source_path)?;
288 if current_hash == cached_doc.hash {
289 self.documents.insert(source_path.clone(), cached_doc);
290 }
291 }
292 }
293
294 Ok(())
295 }
296
297 fn persist_to_disk(&self, file_path: &Path, _document: &Document) -> Result<()> {
298 let cache_file = self.get_cache_file_path(file_path);
299 if let Some(parent) = cache_file.parent() {
300 std::fs::create_dir_all(parent)?;
301 }
302
303 if let Some(cached_doc) = self.documents.get(file_path) {
304 let content = serde_json::to_string_pretty(&*cached_doc)?;
305 std::fs::write(&cache_file, content)?;
306 }
307
308 Ok(())
309 }
310
311 fn get_cache_file_path(&self, file_path: &Path) -> PathBuf {
312 let hash = blake3::hash(file_path.to_string_lossy().as_bytes());
313 let filename = format!("{}.json", hash.to_hex());
314 self.cache_dir.join(filename)
315 }
316}