1use std::collections::HashMap;
8use std::sync::Arc;
9
10use sheetkit_xml::shared_strings::{Si, Sst, T};
11
12use crate::rich_text::{xml_to_run, RichTextRun};
13
14#[derive(Debug)]
22pub struct SharedStringTable {
23 strings: Vec<Arc<str>>,
24 index_map: HashMap<Arc<str>, usize>,
25 si_items: Vec<Option<Si>>,
28}
29
30impl SharedStringTable {
31 pub fn new() -> Self {
33 Self {
34 strings: Vec::new(),
35 index_map: HashMap::new(),
36 si_items: Vec::new(),
37 }
38 }
39
40 pub fn from_sst(sst: Sst) -> Self {
46 let cap = sst.items.len();
47 let mut strings = Vec::with_capacity(cap);
48 let mut index_map = HashMap::with_capacity(cap);
49 let mut si_items: Vec<Option<Si>> = Vec::with_capacity(cap);
50
51 for mut si in sst.items {
52 let is_rich = si.t.is_none() && !si.r.is_empty();
53 let has_space_attr = si.t.as_ref().is_some_and(|t| t.xml_space.is_some());
54 let preserve_si = is_rich || has_space_attr;
55
56 let text: Arc<str> = if preserve_si {
57 si_to_string(&si).into()
60 } else if let Some(ref mut t) = si.t {
61 std::mem::take(&mut t.value).into()
63 } else {
64 Arc::from("")
66 };
67
68 let idx = strings.len();
69 index_map.entry(Arc::clone(&text)).or_insert(idx);
70 if preserve_si {
71 si_items.push(Some(si));
72 } else {
73 si_items.push(None);
74 }
75 strings.push(text);
76 }
77
78 Self {
79 strings,
80 index_map,
81 si_items,
82 }
83 }
84
85 pub fn to_sst(&self) -> Sst {
90 let items: Vec<Si> = self
91 .strings
92 .iter()
93 .enumerate()
94 .map(|(idx, s)| {
95 if let Some(ref si) = self.si_items[idx] {
96 si.clone()
97 } else {
98 Si {
99 t: Some(T {
100 xml_space: if needs_space_preserve(s) {
101 Some("preserve".to_string())
102 } else {
103 None
104 },
105 value: s.to_string(),
106 }),
107 r: vec![],
108 }
109 }
110 })
111 .collect();
112
113 let len = items.len() as u32;
114 Sst {
115 xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
116 count: Some(len),
117 unique_count: Some(len),
118 items,
119 }
120 }
121
122 pub fn get(&self, index: usize) -> Option<&str> {
124 self.strings.get(index).map(|s| &**s)
125 }
126
127 pub fn add(&mut self, s: &str) -> usize {
131 if let Some(&idx) = self.index_map.get(s) {
132 return idx;
133 }
134 let idx = self.strings.len();
135 let rc: Arc<str> = s.into();
136 self.strings.push(Arc::clone(&rc));
137 self.index_map.insert(rc, idx);
138 self.si_items.push(None);
139 idx
140 }
141
142 pub fn add_owned(&mut self, s: String) -> usize {
147 if let Some(&idx) = self.index_map.get(s.as_str()) {
148 return idx;
149 }
150 let idx = self.strings.len();
151 let rc: Arc<str> = s.into();
152 self.index_map.insert(Arc::clone(&rc), idx);
153 self.strings.push(rc);
154 self.si_items.push(None);
155 idx
156 }
157
158 pub fn add_rich_text(&mut self, runs: &[RichTextRun]) -> usize {
162 let plain: String = runs.iter().map(|r| r.text.as_str()).collect();
163 if let Some(&idx) = self.index_map.get(plain.as_str()) {
164 return idx;
165 }
166 let idx = self.strings.len();
167 let rc: Arc<str> = plain.into();
168 self.index_map.insert(Arc::clone(&rc), idx);
169 self.strings.push(rc);
170 let si = crate::rich_text::runs_to_si(runs);
171 self.si_items.push(Some(si));
172 idx
173 }
174
175 pub fn get_rich_text(&self, index: usize) -> Option<Vec<RichTextRun>> {
179 self.si_items
180 .get(index)
181 .and_then(|opt| opt.as_ref())
182 .filter(|si| !si.r.is_empty())
183 .map(|si| si.r.iter().map(xml_to_run).collect())
184 }
185
186 pub fn len(&self) -> usize {
188 self.strings.len()
189 }
190
191 pub fn is_empty(&self) -> bool {
193 self.strings.is_empty()
194 }
195
196 pub fn clone_for_read(&self) -> Self {
202 Self {
203 strings: self.strings.clone(),
204 index_map: HashMap::new(),
205 si_items: self.si_items.clone(),
206 }
207 }
208}
209
210impl Default for SharedStringTable {
211 fn default() -> Self {
212 Self::new()
213 }
214}
215
216fn needs_space_preserve(s: &str) -> bool {
218 s.starts_with(' ')
219 || s.ends_with(' ')
220 || s.contains(" ")
221 || s.contains('\n')
222 || s.contains('\t')
223}
224
225fn si_to_string(si: &Si) -> String {
230 if let Some(ref t) = si.t {
231 t.value.clone()
232 } else {
233 si.r.iter().map(|r| r.t.value.as_str()).collect()
235 }
236}
237
238#[cfg(test)]
239mod tests {
240 use super::*;
241 use sheetkit_xml::shared_strings::{Si, Sst, R, T};
242
243 #[test]
244 fn test_sst_new_is_empty() {
245 let table = SharedStringTable::new();
246 assert!(table.is_empty());
247 assert_eq!(table.len(), 0);
248 }
249
250 #[test]
251 fn test_sst_add_returns_index() {
252 let mut table = SharedStringTable::new();
253 assert_eq!(table.add("hello"), 0);
254 assert_eq!(table.add("world"), 1);
255 assert_eq!(table.add("foo"), 2);
256 assert_eq!(table.len(), 3);
257 }
258
259 #[test]
260 fn test_sst_add_deduplicates() {
261 let mut table = SharedStringTable::new();
262 assert_eq!(table.add("hello"), 0);
263 assert_eq!(table.add("world"), 1);
264 assert_eq!(table.add("hello"), 0); assert_eq!(table.len(), 2); }
267
268 #[test]
269 fn test_sst_add_owned() {
270 let mut table = SharedStringTable::new();
271 assert_eq!(table.add_owned("hello".to_string()), 0);
272 assert_eq!(table.add_owned("world".to_string()), 1);
273 assert_eq!(table.add_owned("hello".to_string()), 0); assert_eq!(table.len(), 2);
275 assert_eq!(table.get(0), Some("hello"));
276 assert_eq!(table.get(1), Some("world"));
277 }
278
279 #[test]
280 fn test_sst_get() {
281 let mut table = SharedStringTable::new();
282 table.add("alpha");
283 table.add("beta");
284
285 assert_eq!(table.get(0), Some("alpha"));
286 assert_eq!(table.get(1), Some("beta"));
287 assert_eq!(table.get(2), None);
288 }
289
290 #[test]
291 fn test_sst_from_xml_and_back() {
292 let xml_sst = Sst {
293 xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
294 count: Some(3),
295 unique_count: Some(3),
296 items: vec![
297 Si {
298 t: Some(T {
299 xml_space: None,
300 value: "Name".to_string(),
301 }),
302 r: vec![],
303 },
304 Si {
305 t: Some(T {
306 xml_space: None,
307 value: "Age".to_string(),
308 }),
309 r: vec![],
310 },
311 Si {
312 t: Some(T {
313 xml_space: None,
314 value: "City".to_string(),
315 }),
316 r: vec![],
317 },
318 ],
319 };
320
321 let table = SharedStringTable::from_sst(xml_sst);
322 assert_eq!(table.len(), 3);
323 assert_eq!(table.get(0), Some("Name"));
324 assert_eq!(table.get(1), Some("Age"));
325 assert_eq!(table.get(2), Some("City"));
326
327 let back = table.to_sst();
329 assert_eq!(back.items.len(), 3);
330 assert_eq!(back.items[0].t.as_ref().unwrap().value, "Name");
331 assert_eq!(back.items[1].t.as_ref().unwrap().value, "Age");
332 assert_eq!(back.items[2].t.as_ref().unwrap().value, "City");
333 assert_eq!(back.count, Some(3));
334 assert_eq!(back.unique_count, Some(3));
335 }
336
337 #[test]
338 fn test_sst_from_xml_rich_text() {
339 let xml_sst = Sst {
340 xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
341 count: Some(1),
342 unique_count: Some(1),
343 items: vec![Si {
344 t: None,
345 r: vec![
346 R {
347 r_pr: None,
348 t: T {
349 xml_space: None,
350 value: "Bold".to_string(),
351 },
352 },
353 R {
354 r_pr: None,
355 t: T {
356 xml_space: None,
357 value: " Normal".to_string(),
358 },
359 },
360 ],
361 }],
362 };
363
364 let table = SharedStringTable::from_sst(xml_sst);
365 assert_eq!(table.len(), 1);
366 assert_eq!(table.get(0), Some("Bold Normal"));
367 }
368
369 #[test]
370 fn test_sst_default() {
371 let table = SharedStringTable::default();
372 assert!(table.is_empty());
373 }
374
375 #[test]
376 fn test_add_rich_text() {
377 let mut table = SharedStringTable::new();
378 let runs = vec![
379 RichTextRun {
380 text: "Hello ".to_string(),
381 font: None,
382 size: None,
383 bold: true,
384 italic: false,
385 color: None,
386 },
387 RichTextRun {
388 text: "World".to_string(),
389 font: None,
390 size: None,
391 bold: false,
392 italic: false,
393 color: None,
394 },
395 ];
396 let idx = table.add_rich_text(&runs);
397 assert_eq!(idx, 0);
398 assert_eq!(table.get(0), Some("Hello World"));
399 assert!(table.get_rich_text(0).is_some());
400 }
401
402 #[test]
403 fn test_get_rich_text_none_for_plain() {
404 let mut table = SharedStringTable::new();
405 table.add("plain");
406 assert!(table.get_rich_text(0).is_none());
407 }
408
409 #[test]
410 fn test_rich_text_roundtrip_through_sst() {
411 let xml_sst = Sst {
412 xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
413 count: Some(1),
414 unique_count: Some(1),
415 items: vec![Si {
416 t: None,
417 r: vec![
418 R {
419 r_pr: None,
420 t: T {
421 xml_space: None,
422 value: "Bold".to_string(),
423 },
424 },
425 R {
426 r_pr: None,
427 t: T {
428 xml_space: None,
429 value: " Normal".to_string(),
430 },
431 },
432 ],
433 }],
434 };
435 let table = SharedStringTable::from_sst(xml_sst);
436 let back = table.to_sst();
437 assert!(back.items[0].t.is_none());
438 assert_eq!(back.items[0].r.len(), 2);
439 }
440
441 #[test]
442 fn test_space_preserve_roundtrip() {
443 let xml_sst = Sst {
444 xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
445 count: Some(1),
446 unique_count: Some(1),
447 items: vec![Si {
448 t: Some(T {
449 xml_space: Some("preserve".to_string()),
450 value: " leading space".to_string(),
451 }),
452 r: vec![],
453 }],
454 };
455 let table = SharedStringTable::from_sst(xml_sst);
456 let back = table.to_sst();
457 assert_eq!(
458 back.items[0].t.as_ref().unwrap().xml_space,
459 Some("preserve".to_string())
460 );
461 }
462
463 #[test]
464 fn test_add_owned_then_to_sst() {
465 let mut table = SharedStringTable::new();
466 table.add_owned("test".to_string());
467 let sst = table.to_sst();
468 assert_eq!(sst.items.len(), 1);
469 assert_eq!(sst.items[0].t.as_ref().unwrap().value, "test");
470 }
471
472 #[test]
473 fn test_from_sst_zero_copy_plain_text() {
474 let xml_sst = Sst {
475 xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
476 count: Some(3),
477 unique_count: Some(3),
478 items: vec![
479 Si {
480 t: Some(T {
481 xml_space: None,
482 value: "Alpha".to_string(),
483 }),
484 r: vec![],
485 },
486 Si {
487 t: Some(T {
488 xml_space: None,
489 value: "Beta".to_string(),
490 }),
491 r: vec![],
492 },
493 Si {
494 t: Some(T {
495 xml_space: None,
496 value: "Gamma".to_string(),
497 }),
498 r: vec![],
499 },
500 ],
501 };
502 let table = SharedStringTable::from_sst(xml_sst);
503 assert_eq!(table.len(), 3);
504 assert_eq!(table.get(0), Some("Alpha"));
505 assert_eq!(table.get(1), Some("Beta"));
506 assert_eq!(table.get(2), Some("Gamma"));
507 let back = table.to_sst();
508 assert_eq!(back.items[0].t.as_ref().unwrap().value, "Alpha");
509 assert_eq!(back.items[1].t.as_ref().unwrap().value, "Beta");
510 assert_eq!(back.items[2].t.as_ref().unwrap().value, "Gamma");
511 }
512
513 #[test]
514 fn test_from_sst_mixed_plain_and_rich_text() {
515 let xml_sst = Sst {
516 xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
517 count: Some(3),
518 unique_count: Some(3),
519 items: vec![
520 Si {
521 t: Some(T {
522 xml_space: None,
523 value: "Plain".to_string(),
524 }),
525 r: vec![],
526 },
527 Si {
528 t: None,
529 r: vec![
530 R {
531 r_pr: None,
532 t: T {
533 xml_space: None,
534 value: "Rich".to_string(),
535 },
536 },
537 R {
538 r_pr: None,
539 t: T {
540 xml_space: None,
541 value: " Text".to_string(),
542 },
543 },
544 ],
545 },
546 Si {
547 t: Some(T {
548 xml_space: Some("preserve".to_string()),
549 value: " spaced ".to_string(),
550 }),
551 r: vec![],
552 },
553 ],
554 };
555 let table = SharedStringTable::from_sst(xml_sst);
556 assert_eq!(table.len(), 3);
557 assert_eq!(table.get(0), Some("Plain"));
558 assert_eq!(table.get(1), Some("Rich Text"));
559 assert_eq!(table.get(2), Some(" spaced "));
560 assert!(table.get_rich_text(0).is_none());
561 assert!(table.get_rich_text(1).is_some());
562 }
563
564 #[test]
565 fn test_from_sst_empty_items() {
566 let xml_sst = Sst {
567 xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
568 count: Some(0),
569 unique_count: Some(0),
570 items: vec![],
571 };
572 let table = SharedStringTable::from_sst(xml_sst);
573 assert!(table.is_empty());
574 }
575}