sheetkit_core/
raw_transfer.rs

1//! Read-direction buffer serializer for bulk data transfer.
2//!
3//! Converts a `WorksheetXml` and `SharedStringTable` into a compact binary
4//! buffer that can be transferred to JavaScript as a single `Buffer` object,
5//! avoiding per-cell napi object creation overhead.
6//!
7//! Binary format (little-endian throughout):
8//!
9//! ```text
10//! HEADER (16 bytes)
11//!   magic:     u32  = 0x534B5244 ("SKRD")
12//!   version:   u16  = 1
13//!   row_count: u32  = number of rows
14//!   col_count: u16  = number of columns
15//!   flags:     u32  = bit 0: 1=sparse, 0=dense
16//!
17//! ROW INDEX (row_count * 8 bytes)
18//!   per row: row_number (u32) + offset (u32) into CELL DATA
19//!   offset = 0xFFFFFFFF for empty rows
20//!
21//! STRING TABLE
22//!   count:     u32
23//!   blob_size: u32
24//!   offsets:   u32[count] (byte offset within blob)
25//!   blob:      concatenated UTF-8 strings (blob_size bytes)
26//!
27//! CELL DATA
28//!   Dense:  row_count * col_count * 9 bytes
29//!     per cell: type (u8) + payload (8 bytes)
30//!   Sparse: variable length
31//!     per row: cell_count (u16) + cell_count * 11 bytes
32//!       per cell: col (u16) + type (u8) + payload (8 bytes)
33//! ```
34
35use std::collections::HashMap;
36
37use crate::error::Result;
38use crate::sst::SharedStringTable;
39use crate::utils::cell_ref::cell_name_to_coordinates;
40use sheetkit_xml::worksheet::{CellTypeTag, WorksheetXml};
41
42pub const MAGIC: u32 = 0x534B5244;
43pub const VERSION: u16 = 1;
44pub const HEADER_SIZE: usize = 16;
45pub const CELL_STRIDE: usize = 9;
46
47pub const TYPE_EMPTY: u8 = 0x00;
48pub const TYPE_NUMBER: u8 = 0x01;
49pub const TYPE_STRING: u8 = 0x02;
50pub const TYPE_BOOL: u8 = 0x03;
51pub const TYPE_DATE: u8 = 0x04;
52pub const TYPE_ERROR: u8 = 0x05;
53pub const TYPE_FORMULA: u8 = 0x06;
54pub const TYPE_RICH_STRING: u8 = 0x07;
55
56pub const FLAG_SPARSE: u32 = 0x01;
57
58const SPARSE_DENSITY_THRESHOLD: f64 = 0.30;
59const EMPTY_ROW_OFFSET: u32 = 0xFFFF_FFFF;
60const SPARSE_CELL_STRIDE: usize = 11;
61
62/// Serialize a worksheet's cell data into a compact binary buffer.
63///
64/// Reads cell data directly from `WorksheetXml` sheet data, resolving shared
65/// string references via `sst`. The resulting buffer uses either dense or
66/// sparse layout depending on cell density relative to the bounding rectangle.
67pub fn sheet_to_raw_buffer(ws: &WorksheetXml, sst: &SharedStringTable) -> Result<Vec<u8>> {
68    let rows = &ws.sheet_data.rows;
69
70    if rows.is_empty() {
71        return Ok(write_empty_buffer());
72    }
73
74    let (min_row, max_row, min_col, max_col, total_cells) = scan_dimensions(ws)?;
75
76    if total_cells == 0 {
77        return Ok(write_empty_buffer());
78    }
79
80    let row_count = (max_row - min_row + 1) as usize;
81    let col_count = (max_col - min_col + 1) as usize;
82
83    let total_grid = row_count * col_count;
84    let density = total_cells as f64 / total_grid as f64;
85    let sparse = density < SPARSE_DENSITY_THRESHOLD;
86
87    let mut string_table = StringTableBuilder::from_sst(sst);
88
89    let cell_entries = collect_cell_entries(ws, sst, min_col, &mut string_table)?;
90
91    let flags: u32 = (if sparse { FLAG_SPARSE } else { 0 }) | ((min_col & 0xFFFF) << 16);
92
93    let row_index_size = row_count * 8;
94    let string_section = string_table.encode();
95    let cell_data = if sparse {
96        encode_sparse_cells(&cell_entries, min_row, max_row)
97    } else {
98        encode_dense_cells(&cell_entries, min_row, row_count, col_count)
99    };
100    let row_index = build_row_index(&cell_entries, min_row, max_row, col_count, sparse);
101
102    let total_size = HEADER_SIZE + row_index_size + string_section.len() + cell_data.len();
103    let mut buf = Vec::with_capacity(total_size);
104
105    buf.extend_from_slice(&MAGIC.to_le_bytes());
106    buf.extend_from_slice(&VERSION.to_le_bytes());
107    buf.extend_from_slice(&(row_count as u32).to_le_bytes());
108    buf.extend_from_slice(&(col_count as u16).to_le_bytes());
109    buf.extend_from_slice(&flags.to_le_bytes());
110
111    buf.extend_from_slice(&row_index);
112    buf.extend_from_slice(&string_section);
113    buf.extend_from_slice(&cell_data);
114
115    Ok(buf)
116}
117
118fn write_empty_buffer() -> Vec<u8> {
119    let mut buf = Vec::with_capacity(HEADER_SIZE);
120    buf.extend_from_slice(&MAGIC.to_le_bytes());
121    buf.extend_from_slice(&VERSION.to_le_bytes());
122    buf.extend_from_slice(&0u32.to_le_bytes());
123    buf.extend_from_slice(&0u16.to_le_bytes());
124    buf.extend_from_slice(&0u32.to_le_bytes());
125    buf
126}
127
128struct CellEntry {
129    col: u32,
130    type_tag: u8,
131    payload: [u8; 8],
132}
133
134struct RowEntries {
135    row_num: u32,
136    cells: Vec<CellEntry>,
137}
138
139fn scan_dimensions(ws: &WorksheetXml) -> Result<(u32, u32, u32, u32, usize)> {
140    let mut min_row = u32::MAX;
141    let mut max_row = 0u32;
142    let mut min_col = u32::MAX;
143    let mut max_col = 0u32;
144    let mut total_cells = 0usize;
145
146    for row in &ws.sheet_data.rows {
147        if row.cells.is_empty() {
148            continue;
149        }
150        min_row = min_row.min(row.r);
151        max_row = max_row.max(row.r);
152
153        for cell in &row.cells {
154            let col = resolve_col(cell)?;
155            min_col = min_col.min(col);
156            max_col = max_col.max(col);
157            total_cells += 1;
158        }
159    }
160
161    if total_cells == 0 {
162        return Ok((1, 1, 1, 1, 0));
163    }
164
165    Ok((min_row, max_row, min_col, max_col, total_cells))
166}
167
168fn resolve_col(cell: &sheetkit_xml::worksheet::Cell) -> Result<u32> {
169    if cell.col > 0 {
170        return Ok(cell.col);
171    }
172    let (col, _row) = cell_name_to_coordinates(cell.r.as_str())?;
173    Ok(col)
174}
175
176fn collect_cell_entries(
177    ws: &WorksheetXml,
178    sst: &SharedStringTable,
179    min_col: u32,
180    string_table: &mut StringTableBuilder,
181) -> Result<Vec<RowEntries>> {
182    let mut result = Vec::with_capacity(ws.sheet_data.rows.len());
183
184    for row in &ws.sheet_data.rows {
185        if row.cells.is_empty() {
186            continue;
187        }
188
189        let mut cells = Vec::with_capacity(row.cells.len());
190        for cell in &row.cells {
191            let col = resolve_col(cell)?;
192            let relative_col = col - min_col;
193            let (type_tag, payload) = encode_cell_value(cell, sst, string_table)?;
194            cells.push(CellEntry {
195                col: relative_col,
196                type_tag,
197                payload,
198            });
199        }
200
201        result.push(RowEntries {
202            row_num: row.r,
203            cells,
204        });
205    }
206
207    Ok(result)
208}
209
210fn encode_cell_value(
211    cell: &sheetkit_xml::worksheet::Cell,
212    sst: &SharedStringTable,
213    string_table: &mut StringTableBuilder,
214) -> Result<(u8, [u8; 8])> {
215    let mut payload = [0u8; 8];
216
217    if cell.f.is_some() {
218        let formula_expr = cell
219            .f
220            .as_ref()
221            .and_then(|f| f.value.as_deref())
222            .unwrap_or("");
223        let idx = string_table.intern(formula_expr);
224        payload[..4].copy_from_slice(&(idx as u32).to_le_bytes());
225        return Ok((TYPE_FORMULA, payload));
226    }
227
228    match cell.t {
229        CellTypeTag::SharedString => {
230            if let Some(ref v) = cell.v {
231                if let Ok(sst_idx) = v.parse::<usize>() {
232                    let text = sst.get(sst_idx).unwrap_or("");
233                    let idx = string_table.intern(text);
234                    payload[..4].copy_from_slice(&(idx as u32).to_le_bytes());
235                    if sst.get_rich_text(sst_idx).is_some() {
236                        return Ok((TYPE_RICH_STRING, payload));
237                    }
238                    return Ok((TYPE_STRING, payload));
239                }
240            }
241            Ok((TYPE_EMPTY, payload))
242        }
243        CellTypeTag::Boolean => {
244            if let Some(ref v) = cell.v {
245                payload[0] = if v == "1" || v.eq_ignore_ascii_case("true") {
246                    1
247                } else {
248                    0
249                };
250            }
251            Ok((TYPE_BOOL, payload))
252        }
253        CellTypeTag::Error => {
254            let error_text = cell.v.as_deref().unwrap_or("#VALUE!");
255            let idx = string_table.intern(error_text);
256            payload[..4].copy_from_slice(&(idx as u32).to_le_bytes());
257            Ok((TYPE_ERROR, payload))
258        }
259        CellTypeTag::InlineString => {
260            let text = cell
261                .is
262                .as_ref()
263                .and_then(|is| is.t.as_deref())
264                .or(cell.v.as_deref())
265                .unwrap_or("");
266            let idx = string_table.intern(text);
267            payload[..4].copy_from_slice(&(idx as u32).to_le_bytes());
268            Ok((TYPE_STRING, payload))
269        }
270        CellTypeTag::Date => {
271            if let Some(ref v) = cell.v {
272                if let Ok(n) = v.parse::<f64>() {
273                    payload.copy_from_slice(&n.to_le_bytes());
274                    return Ok((TYPE_DATE, payload));
275                }
276            }
277            Ok((TYPE_EMPTY, payload))
278        }
279        CellTypeTag::FormulaString => {
280            if let Some(ref v) = cell.v {
281                let idx = string_table.intern(v);
282                payload[..4].copy_from_slice(&(idx as u32).to_le_bytes());
283                return Ok((TYPE_STRING, payload));
284            }
285            Ok((TYPE_EMPTY, payload))
286        }
287        CellTypeTag::None | CellTypeTag::Number => {
288            if let Some(ref v) = cell.v {
289                if let Ok(n) = v.parse::<f64>() {
290                    payload.copy_from_slice(&n.to_le_bytes());
291                    return Ok((TYPE_NUMBER, payload));
292                }
293            }
294            Ok((TYPE_EMPTY, payload))
295        }
296    }
297}
298
299struct StringTableBuilder {
300    strings: Vec<String>,
301    index_map: HashMap<String, usize>,
302}
303
304impl StringTableBuilder {
305    fn from_sst(sst: &SharedStringTable) -> Self {
306        let count = sst.len();
307        let mut strings = Vec::with_capacity(count);
308        let mut index_map = HashMap::with_capacity(count);
309
310        for i in 0..count {
311            if let Some(s) = sst.get(i) {
312                let owned = s.to_string();
313                index_map.entry(owned.clone()).or_insert(i);
314                strings.push(owned);
315            }
316        }
317
318        Self { strings, index_map }
319    }
320
321    fn intern(&mut self, s: &str) -> usize {
322        if let Some(&idx) = self.index_map.get(s) {
323            return idx;
324        }
325        let idx = self.strings.len();
326        self.strings.push(s.to_string());
327        self.index_map.insert(s.to_string(), idx);
328        idx
329    }
330
331    /// Encode the string table section: count(u32) + blob_size(u32) + offsets(u32[count]) + blob.
332    fn encode(&self) -> Vec<u8> {
333        let count = self.strings.len() as u32;
334        if count == 0 {
335            let mut buf = Vec::with_capacity(8);
336            buf.extend_from_slice(&0u32.to_le_bytes());
337            buf.extend_from_slice(&0u32.to_le_bytes());
338            return buf;
339        }
340
341        let mut blob = Vec::new();
342        let mut offsets = Vec::with_capacity(self.strings.len());
343        for s in &self.strings {
344            offsets.push(blob.len() as u32);
345            blob.extend_from_slice(s.as_bytes());
346        }
347        let blob_size = blob.len() as u32;
348
349        let total = 4 + 4 + self.strings.len() * 4 + blob.len();
350        let mut buf = Vec::with_capacity(total);
351        buf.extend_from_slice(&count.to_le_bytes());
352        buf.extend_from_slice(&blob_size.to_le_bytes());
353        for off in &offsets {
354            buf.extend_from_slice(&off.to_le_bytes());
355        }
356        buf.extend_from_slice(&blob);
357        buf
358    }
359}
360
361fn encode_dense_cells(
362    row_entries: &[RowEntries],
363    min_row: u32,
364    row_count: usize,
365    col_count: usize,
366) -> Vec<u8> {
367    let total = row_count * col_count * CELL_STRIDE;
368    let mut buf = vec![0u8; total];
369
370    for re in row_entries {
371        let row_offset = (re.row_num - min_row) as usize;
372        for ce in &re.cells {
373            let cell_idx = row_offset * col_count + ce.col as usize;
374            let pos = cell_idx * CELL_STRIDE;
375            buf[pos] = ce.type_tag;
376            buf[pos + 1..pos + 9].copy_from_slice(&ce.payload);
377        }
378    }
379
380    buf
381}
382
383fn encode_sparse_cells(row_entries: &[RowEntries], min_row: u32, max_row: u32) -> Vec<u8> {
384    let total_rows = (max_row - min_row + 1) as usize;
385    let mut entries_by_row: Vec<Option<&RowEntries>> = vec![None; total_rows];
386    for re in row_entries {
387        let idx = (re.row_num - min_row) as usize;
388        entries_by_row[idx] = Some(re);
389    }
390
391    let mut buf = Vec::new();
392    for entry in &entries_by_row {
393        match entry {
394            Some(re) => {
395                let count = re.cells.len() as u16;
396                buf.extend_from_slice(&count.to_le_bytes());
397                for ce in &re.cells {
398                    buf.extend_from_slice(&(ce.col as u16).to_le_bytes());
399                    buf.push(ce.type_tag);
400                    buf.extend_from_slice(&ce.payload);
401                }
402            }
403            None => {
404                buf.extend_from_slice(&0u16.to_le_bytes());
405            }
406        }
407    }
408
409    buf
410}
411
412fn build_row_index(
413    row_entries: &[RowEntries],
414    min_row: u32,
415    max_row: u32,
416    col_count: usize,
417    sparse: bool,
418) -> Vec<u8> {
419    let total_rows = (max_row - min_row + 1) as usize;
420    let mut index = Vec::with_capacity(total_rows * 8);
421
422    let mut entries_map: HashMap<u32, &RowEntries> = HashMap::new();
423    for re in row_entries {
424        entries_map.insert(re.row_num, re);
425    }
426
427    if sparse {
428        let mut sparse_offset = 0u32;
429        for row_num in min_row..=max_row {
430            index.extend_from_slice(&row_num.to_le_bytes());
431            if let Some(re) = entries_map.get(&row_num) {
432                if re.cells.is_empty() {
433                    index.extend_from_slice(&EMPTY_ROW_OFFSET.to_le_bytes());
434                    sparse_offset += 2;
435                } else {
436                    index.extend_from_slice(&sparse_offset.to_le_bytes());
437                    sparse_offset += 2 + (re.cells.len() as u32) * SPARSE_CELL_STRIDE as u32;
438                }
439            } else {
440                index.extend_from_slice(&EMPTY_ROW_OFFSET.to_le_bytes());
441                sparse_offset += 2;
442            }
443        }
444    } else {
445        for row_num in min_row..=max_row {
446            index.extend_from_slice(&row_num.to_le_bytes());
447            if entries_map.contains_key(&row_num) {
448                let offset = ((row_num - min_row) as usize * col_count * CELL_STRIDE) as u32;
449                index.extend_from_slice(&offset.to_le_bytes());
450            } else {
451                index.extend_from_slice(&EMPTY_ROW_OFFSET.to_le_bytes());
452            }
453        }
454    }
455
456    index
457}
458
459#[cfg(test)]
460#[allow(
461    clippy::field_reassign_with_default,
462    clippy::needless_lifetimes,
463    clippy::approx_constant
464)]
465mod tests {
466    use super::*;
467    use sheetkit_xml::worksheet::{
468        Cell, CellFormula, CellTypeTag, CompactCellRef, InlineString, Row, SheetData, WorksheetXml,
469    };
470
471    fn make_cell(col_ref: &str, col_num: u32, t: CellTypeTag, v: Option<&str>) -> Cell {
472        Cell {
473            r: CompactCellRef::new(col_ref),
474            col: col_num,
475            s: None,
476            t,
477            v: v.map(|s| s.to_string()),
478            f: None,
479            is: None,
480        }
481    }
482
483    fn make_row(row_num: u32, cells: Vec<Cell>) -> Row {
484        Row {
485            r: row_num,
486            spans: None,
487            s: None,
488            custom_format: None,
489            ht: None,
490            hidden: None,
491            custom_height: None,
492            outline_level: None,
493            cells,
494        }
495    }
496
497    fn make_worksheet(rows: Vec<Row>) -> WorksheetXml {
498        let mut ws = WorksheetXml::default();
499        ws.sheet_data = SheetData { rows };
500        ws
501    }
502
503    fn make_sst(strings: &[&str]) -> SharedStringTable {
504        let mut sst = SharedStringTable::new();
505        for s in strings {
506            sst.add(s);
507        }
508        sst
509    }
510
511    fn read_u32_le(buf: &[u8], offset: usize) -> u32 {
512        u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap())
513    }
514
515    fn read_u16_le(buf: &[u8], offset: usize) -> u16 {
516        u16::from_le_bytes(buf[offset..offset + 2].try_into().unwrap())
517    }
518
519    /// Parse a buffer and return (row_index_end, string_section_end, cell_data_start, flags).
520    fn parse_sections(buf: &[u8]) -> (usize, usize, usize, u32) {
521        let row_count = read_u32_le(buf, 6) as usize;
522        let flags = read_u32_le(buf, 12);
523        let row_index_end = HEADER_SIZE + row_count * 8;
524        let string_count = read_u32_le(buf, row_index_end) as usize;
525        let blob_size = read_u32_le(buf, row_index_end + 4) as usize;
526        let string_section_end = row_index_end + 8 + string_count * 4 + blob_size;
527        (row_index_end, string_section_end, string_section_end, flags)
528    }
529
530    /// Read a string from the string table by index.
531    fn read_string(buf: &[u8], string_section_start: usize, idx: usize) -> String {
532        let count = read_u32_le(buf, string_section_start) as usize;
533        let blob_size = read_u32_le(buf, string_section_start + 4) as usize;
534        assert!(
535            idx < count,
536            "string index {idx} out of range (count={count})"
537        );
538        let offsets_start = string_section_start + 8;
539        let blob_start = offsets_start + count * 4;
540
541        let start = read_u32_le(buf, offsets_start + idx * 4) as usize;
542        let end = if idx + 1 < count {
543            read_u32_le(buf, offsets_start + (idx + 1) * 4) as usize
544        } else {
545            blob_size
546        };
547        String::from_utf8(buf[blob_start + start..blob_start + end].to_vec()).unwrap()
548    }
549
550    /// Read the cell type tag and payload from cell data at a given position.
551    fn read_cell_at(
552        buf: &[u8],
553        cell_data_start: usize,
554        is_sparse: bool,
555        cell_index: usize,
556    ) -> (u8, &[u8]) {
557        if is_sparse {
558            panic!("use read_sparse_row for sparse format");
559        }
560        let pos = cell_data_start + cell_index * CELL_STRIDE;
561        (buf[pos], &buf[pos + 1..pos + 9])
562    }
563
564    /// Read sparse row: returns vec of (col, type_tag, payload_slice).
565    fn read_sparse_row<'a>(buf: &'a [u8], row_offset: usize) -> Vec<(u16, u8, &'a [u8])> {
566        let cell_count = read_u16_le(buf, row_offset) as usize;
567        let mut result = Vec::with_capacity(cell_count);
568        let mut pos = row_offset + 2;
569        for _ in 0..cell_count {
570            let col = read_u16_le(buf, pos);
571            let type_tag = buf[pos + 2];
572            let payload = &buf[pos + 3..pos + 11];
573            result.push((col, type_tag, payload));
574            pos += SPARSE_CELL_STRIDE;
575        }
576        result
577    }
578
579    #[test]
580    fn test_empty_sheet() {
581        let ws = make_worksheet(vec![]);
582        let sst = SharedStringTable::new();
583        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
584
585        assert_eq!(buf.len(), HEADER_SIZE);
586        assert_eq!(read_u32_le(&buf, 0), MAGIC);
587        assert_eq!(read_u16_le(&buf, 4), VERSION);
588        assert_eq!(read_u32_le(&buf, 6), 0);
589        assert_eq!(read_u16_le(&buf, 10), 0);
590        assert_eq!(read_u32_le(&buf, 12), 0);
591    }
592
593    #[test]
594    fn test_single_number_cell() {
595        let ws = make_worksheet(vec![make_row(
596            1,
597            vec![make_cell("A1", 1, CellTypeTag::None, Some("42.5"))],
598        )]);
599        let sst = SharedStringTable::new();
600        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
601
602        assert_eq!(read_u32_le(&buf, 0), MAGIC);
603        let row_count = read_u32_le(&buf, 6);
604        let col_count = read_u16_le(&buf, 10) as usize;
605        assert_eq!(row_count, 1);
606        assert_eq!(col_count, 1);
607
608        let (st_start, _, cd_start, flags) = parse_sections(&buf);
609        let is_sparse = flags & FLAG_SPARSE != 0;
610
611        if is_sparse {
612            let cells = read_sparse_row(&buf, cd_start);
613            assert_eq!(cells.len(), 1);
614            assert_eq!(cells[0].1, TYPE_NUMBER);
615            let val = f64::from_le_bytes(cells[0].2.try_into().unwrap());
616            assert!((val - 42.5).abs() < f64::EPSILON);
617        } else {
618            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
619            assert_eq!(tag, TYPE_NUMBER);
620            let val = f64::from_le_bytes(payload.try_into().unwrap());
621            assert!((val - 42.5).abs() < f64::EPSILON);
622        }
623
624        let _ = st_start;
625    }
626
627    #[test]
628    fn test_string_cell_sst() {
629        let sst = make_sst(&["Hello", "World"]);
630        let ws = make_worksheet(vec![make_row(
631            1,
632            vec![make_cell("A1", 1, CellTypeTag::SharedString, Some("1"))],
633        )]);
634        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
635
636        let (st_start, _, cd_start, flags) = parse_sections(&buf);
637        let is_sparse = flags & FLAG_SPARSE != 0;
638
639        let string_count = read_u32_le(&buf, st_start);
640        assert!(string_count >= 2);
641
642        let s0 = read_string(&buf, st_start, 0);
643        assert_eq!(s0, "Hello");
644        let s1 = read_string(&buf, st_start, 1);
645        assert_eq!(s1, "World");
646
647        let str_idx = if is_sparse {
648            let cells = read_sparse_row(&buf, cd_start);
649            assert_eq!(cells[0].1, TYPE_STRING);
650            u32::from_le_bytes(cells[0].2[..4].try_into().unwrap()) as usize
651        } else {
652            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
653            assert_eq!(tag, TYPE_STRING);
654            u32::from_le_bytes(payload[..4].try_into().unwrap()) as usize
655        };
656        assert_eq!(str_idx, 1, "should reference SST index 1 = 'World'");
657    }
658
659    #[test]
660    fn test_bool_cell() {
661        let sst = SharedStringTable::new();
662        let ws = make_worksheet(vec![make_row(
663            1,
664            vec![
665                make_cell("A1", 1, CellTypeTag::Boolean, Some("1")),
666                make_cell("B1", 2, CellTypeTag::Boolean, Some("0")),
667            ],
668        )]);
669        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
670
671        let col_count = read_u16_le(&buf, 10) as usize;
672        assert_eq!(col_count, 2);
673
674        let (_, _, cd_start, flags) = parse_sections(&buf);
675        let is_sparse = flags & FLAG_SPARSE != 0;
676
677        if is_sparse {
678            let cells = read_sparse_row(&buf, cd_start);
679            assert_eq!(cells.len(), 2);
680            assert_eq!(cells[0].1, TYPE_BOOL);
681            assert_eq!(cells[0].2[0], 1);
682            assert_eq!(cells[1].1, TYPE_BOOL);
683            assert_eq!(cells[1].2[0], 0);
684        } else {
685            let (tag0, payload0) = read_cell_at(&buf, cd_start, false, 0);
686            assert_eq!(tag0, TYPE_BOOL);
687            assert_eq!(payload0[0], 1);
688            let (tag1, payload1) = read_cell_at(&buf, cd_start, false, 1);
689            assert_eq!(tag1, TYPE_BOOL);
690            assert_eq!(payload1[0], 0);
691        }
692    }
693
694    #[test]
695    fn test_error_cell() {
696        let sst = SharedStringTable::new();
697        let ws = make_worksheet(vec![make_row(
698            1,
699            vec![make_cell("A1", 1, CellTypeTag::Error, Some("#DIV/0!"))],
700        )]);
701        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
702
703        let (st_start, _, cd_start, flags) = parse_sections(&buf);
704        let is_sparse = flags & FLAG_SPARSE != 0;
705
706        let string_count = read_u32_le(&buf, st_start);
707        assert!(string_count >= 1);
708
709        let type_tag = if is_sparse {
710            let cells = read_sparse_row(&buf, cd_start);
711            cells[0].1
712        } else {
713            let (tag, _) = read_cell_at(&buf, cd_start, false, 0);
714            tag
715        };
716        assert_eq!(type_tag, TYPE_ERROR);
717
718        let error_str = read_string(&buf, st_start, 0);
719        assert_eq!(error_str, "#DIV/0!");
720    }
721
722    #[test]
723    fn test_formula_cell() {
724        let sst = SharedStringTable::new();
725        let mut cell = make_cell("A1", 1, CellTypeTag::None, Some("84"));
726        cell.f = Some(Box::new(CellFormula {
727            t: None,
728            reference: None,
729            si: None,
730            value: Some("A2+B2".to_string()),
731        }));
732        let ws = make_worksheet(vec![make_row(1, vec![cell])]);
733        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
734
735        let (st_start, _, cd_start, flags) = parse_sections(&buf);
736        let is_sparse = flags & FLAG_SPARSE != 0;
737
738        let (type_tag, str_idx) = if is_sparse {
739            let cells = read_sparse_row(&buf, cd_start);
740            let idx = u32::from_le_bytes(cells[0].2[..4].try_into().unwrap()) as usize;
741            (cells[0].1, idx)
742        } else {
743            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
744            let idx = u32::from_le_bytes(payload[..4].try_into().unwrap()) as usize;
745            (tag, idx)
746        };
747
748        assert_eq!(type_tag, TYPE_FORMULA);
749        let formula = read_string(&buf, st_start, str_idx);
750        assert_eq!(formula, "A2+B2");
751    }
752
753    #[test]
754    fn test_inline_string_cell() {
755        let sst = SharedStringTable::new();
756        let mut cell = make_cell("A1", 1, CellTypeTag::InlineString, None);
757        cell.is = Some(Box::new(InlineString {
758            t: Some("Inline Text".to_string()),
759        }));
760        let ws = make_worksheet(vec![make_row(1, vec![cell])]);
761        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
762
763        let (st_start, _, cd_start, flags) = parse_sections(&buf);
764        let is_sparse = flags & FLAG_SPARSE != 0;
765
766        let (type_tag, str_idx) = if is_sparse {
767            let cells = read_sparse_row(&buf, cd_start);
768            let idx = u32::from_le_bytes(cells[0].2[..4].try_into().unwrap()) as usize;
769            (cells[0].1, idx)
770        } else {
771            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
772            let idx = u32::from_le_bytes(payload[..4].try_into().unwrap()) as usize;
773            (tag, idx)
774        };
775
776        assert_eq!(type_tag, TYPE_STRING);
777        let text = read_string(&buf, st_start, str_idx);
778        assert_eq!(text, "Inline Text");
779    }
780
781    #[test]
782    fn test_date_cell() {
783        let sst = SharedStringTable::new();
784        let ws = make_worksheet(vec![make_row(
785            1,
786            vec![make_cell("A1", 1, CellTypeTag::Date, Some("44927.0"))],
787        )]);
788        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
789
790        let (_, _, cd_start, flags) = parse_sections(&buf);
791        let is_sparse = flags & FLAG_SPARSE != 0;
792
793        if is_sparse {
794            let cells = read_sparse_row(&buf, cd_start);
795            assert_eq!(cells[0].1, TYPE_DATE);
796            let val = f64::from_le_bytes(cells[0].2.try_into().unwrap());
797            assert!((val - 44927.0).abs() < f64::EPSILON);
798        } else {
799            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
800            assert_eq!(tag, TYPE_DATE);
801            let val = f64::from_le_bytes(payload.try_into().unwrap());
802            assert!((val - 44927.0).abs() < f64::EPSILON);
803        }
804    }
805
806    #[test]
807    fn test_mixed_types_row() {
808        let sst = make_sst(&["Hello"]);
809        let ws = make_worksheet(vec![make_row(
810            1,
811            vec![
812                make_cell("A1", 1, CellTypeTag::None, Some("3.14")),
813                make_cell("B1", 2, CellTypeTag::SharedString, Some("0")),
814                make_cell("C1", 3, CellTypeTag::Boolean, Some("1")),
815            ],
816        )]);
817        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
818
819        let col_count = read_u16_le(&buf, 10) as usize;
820        assert_eq!(col_count, 3);
821
822        let (_, _, cd_start, flags) = parse_sections(&buf);
823        let is_sparse = flags & FLAG_SPARSE != 0;
824
825        if is_sparse {
826            let cells = read_sparse_row(&buf, cd_start);
827            assert_eq!(cells.len(), 3);
828            assert_eq!(cells[0].1, TYPE_NUMBER);
829            assert_eq!(cells[1].1, TYPE_STRING);
830            assert_eq!(cells[2].1, TYPE_BOOL);
831        } else {
832            let (t0, p0) = read_cell_at(&buf, cd_start, false, 0);
833            assert_eq!(t0, TYPE_NUMBER);
834            let val = f64::from_le_bytes(p0.try_into().unwrap());
835            assert!((val - 3.14).abs() < f64::EPSILON);
836
837            let (t1, _) = read_cell_at(&buf, cd_start, false, 1);
838            assert_eq!(t1, TYPE_STRING);
839
840            let (t2, p2) = read_cell_at(&buf, cd_start, false, 2);
841            assert_eq!(t2, TYPE_BOOL);
842            assert_eq!(p2[0], 1);
843        }
844    }
845
846    #[test]
847    fn test_dense_format() {
848        let sst = SharedStringTable::new();
849        let mut rows = Vec::new();
850        for r in 1..=5u32 {
851            let mut cells = Vec::new();
852            for c in 1..=5u32 {
853                let col_letter = match c {
854                    1 => "A",
855                    2 => "B",
856                    3 => "C",
857                    4 => "D",
858                    5 => "E",
859                    _ => unreachable!(),
860                };
861                let cell_ref = format!("{col_letter}{r}");
862                cells.push(make_cell(
863                    &cell_ref,
864                    c,
865                    CellTypeTag::None,
866                    Some(&format!("{}", r * 10 + c)),
867                ));
868            }
869            rows.push(make_row(r, cells));
870        }
871        let ws = make_worksheet(rows);
872        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
873
874        let flags = read_u32_le(&buf, 12);
875        assert_eq!(
876            flags & FLAG_SPARSE,
877            0,
878            "5x5 fully populated should be dense"
879        );
880
881        let row_count = read_u32_le(&buf, 6) as usize;
882        let col_count = read_u16_le(&buf, 10) as usize;
883        assert_eq!(row_count, 5);
884        assert_eq!(col_count, 5);
885
886        let (_, _, cd_start, _) = parse_sections(&buf);
887        let cell_data_size = row_count * col_count * CELL_STRIDE;
888        assert_eq!(
889            buf.len() - cd_start,
890            cell_data_size,
891            "dense cell data should be exactly row_count * col_count * CELL_STRIDE"
892        );
893
894        for r in 0..5usize {
895            for c in 0..5usize {
896                let idx = r * col_count + c;
897                let (tag, payload) = read_cell_at(&buf, cd_start, false, idx);
898                assert_eq!(tag, TYPE_NUMBER);
899                let val = f64::from_le_bytes(payload.try_into().unwrap());
900                let expected = ((r + 1) * 10 + (c + 1)) as f64;
901                assert!(
902                    (val - expected).abs() < f64::EPSILON,
903                    "cell ({r},{c}) expected {expected}, got {val}"
904                );
905            }
906        }
907    }
908
909    #[test]
910    fn test_sparse_format() {
911        let sst = SharedStringTable::new();
912        let rows = vec![
913            make_row(1, vec![make_cell("A1", 1, CellTypeTag::None, Some("1"))]),
914            make_row(
915                100,
916                vec![make_cell("T100", 20, CellTypeTag::None, Some("2"))],
917            ),
918        ];
919        let ws = make_worksheet(rows);
920        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
921
922        let flags = read_u32_le(&buf, 12);
923        assert_ne!(
924            flags & FLAG_SPARSE,
925            0,
926            "2 cells in 100x20 grid should be sparse"
927        );
928
929        let row_count = read_u32_le(&buf, 6) as usize;
930        assert_eq!(row_count, 100);
931        let col_count = read_u16_le(&buf, 10) as usize;
932        assert_eq!(col_count, 20);
933    }
934
935    #[test]
936    fn test_header_format() {
937        let sst = SharedStringTable::new();
938        let ws = make_worksheet(vec![
939            make_row(
940                2,
941                vec![
942                    make_cell("B2", 2, CellTypeTag::None, Some("10")),
943                    make_cell("D2", 4, CellTypeTag::None, Some("20")),
944                ],
945            ),
946            make_row(5, vec![make_cell("C5", 3, CellTypeTag::None, Some("30"))]),
947        ]);
948        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
949
950        assert_eq!(read_u32_le(&buf, 0), MAGIC);
951        assert_eq!(read_u16_le(&buf, 4), VERSION);
952        let row_count = read_u32_le(&buf, 6);
953        assert_eq!(row_count, 4, "rows 2-5 = 4 rows");
954        let col_count = read_u16_le(&buf, 10);
955        assert_eq!(col_count, 3, "cols B-D = 3 columns");
956    }
957
958    #[test]
959    fn test_string_table_format() {
960        let sst = make_sst(&["Alpha", "Beta", "Gamma"]);
961        let ws = make_worksheet(vec![make_row(
962            1,
963            vec![
964                make_cell("A1", 1, CellTypeTag::SharedString, Some("0")),
965                make_cell("B1", 2, CellTypeTag::SharedString, Some("2")),
966            ],
967        )]);
968        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
969
970        let (st_start, _, _, _) = parse_sections(&buf);
971        let string_count = read_u32_le(&buf, st_start) as usize;
972        assert_eq!(string_count, 3, "all SST strings should be in table");
973
974        let s0 = read_string(&buf, st_start, 0);
975        let s1 = read_string(&buf, st_start, 1);
976        let s2 = read_string(&buf, st_start, 2);
977        assert_eq!(s0, "Alpha");
978        assert_eq!(s1, "Beta");
979        assert_eq!(s2, "Gamma");
980    }
981
982    #[test]
983    fn test_large_sheet_dimensions() {
984        let sst = SharedStringTable::new();
985        let mut rows = Vec::new();
986        for r in [1u32, 500, 1000] {
987            let mut cells = Vec::new();
988            for c in [1u32, 10, 50] {
989                let col_name = crate::utils::cell_ref::column_number_to_name(c).unwrap();
990                let cell_ref = format!("{col_name}{r}");
991                cells.push(make_cell(
992                    &cell_ref,
993                    c,
994                    CellTypeTag::None,
995                    Some(&format!("{}", r + c)),
996                ));
997            }
998            rows.push(make_row(r, cells));
999        }
1000        let ws = make_worksheet(rows);
1001        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1002
1003        let row_count = read_u32_le(&buf, 6);
1004        assert_eq!(row_count, 1000, "rows 1-1000 = 1000 rows");
1005        let col_count = read_u16_le(&buf, 10);
1006        assert_eq!(col_count, 50, "cols A-AX = 50 columns");
1007
1008        let flags = read_u32_le(&buf, 12);
1009        assert_ne!(
1010            flags & FLAG_SPARSE,
1011            0,
1012            "9 cells in 1000x50 should be sparse"
1013        );
1014    }
1015
1016    #[test]
1017    fn test_row_index_entries() {
1018        let sst = SharedStringTable::new();
1019        let ws = make_worksheet(vec![
1020            make_row(1, vec![make_cell("A1", 1, CellTypeTag::None, Some("1"))]),
1021            make_row(3, vec![make_cell("A3", 1, CellTypeTag::None, Some("3"))]),
1022        ]);
1023        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1024
1025        let row_count = read_u32_le(&buf, 6) as usize;
1026        assert_eq!(row_count, 3);
1027
1028        let row_index_start = HEADER_SIZE;
1029        let row1_num = read_u32_le(&buf, row_index_start);
1030        let row1_offset = read_u32_le(&buf, row_index_start + 4);
1031        assert_eq!(row1_num, 1);
1032        assert_ne!(row1_offset, EMPTY_ROW_OFFSET);
1033
1034        let row2_num = read_u32_le(&buf, row_index_start + 8);
1035        let row2_offset = read_u32_le(&buf, row_index_start + 12);
1036        assert_eq!(row2_num, 2);
1037        assert_eq!(row2_offset, EMPTY_ROW_OFFSET, "row 2 has no data");
1038
1039        let row3_num = read_u32_le(&buf, row_index_start + 16);
1040        let row3_offset = read_u32_le(&buf, row_index_start + 20);
1041        assert_eq!(row3_num, 3);
1042        assert_ne!(row3_offset, EMPTY_ROW_OFFSET);
1043    }
1044
1045    #[test]
1046    fn test_string_deduplication() {
1047        let sst = SharedStringTable::new();
1048        let ws = make_worksheet(vec![make_row(
1049            1,
1050            vec![
1051                make_cell("A1", 1, CellTypeTag::Error, Some("#N/A")),
1052                make_cell("B1", 2, CellTypeTag::Error, Some("#N/A")),
1053            ],
1054        )]);
1055        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1056
1057        let (st_start, _, _, _) = parse_sections(&buf);
1058        let string_count = read_u32_le(&buf, st_start) as usize;
1059        assert_eq!(
1060            string_count, 1,
1061            "duplicate error strings should be deduplicated"
1062        );
1063        let s = read_string(&buf, st_start, 0);
1064        assert_eq!(s, "#N/A");
1065    }
1066
1067    #[test]
1068    fn test_formula_string_type() {
1069        let sst = SharedStringTable::new();
1070        let ws = make_worksheet(vec![make_row(
1071            1,
1072            vec![make_cell(
1073                "A1",
1074                1,
1075                CellTypeTag::FormulaString,
1076                Some("computed"),
1077            )],
1078        )]);
1079        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1080
1081        let (_, _, cd_start, flags) = parse_sections(&buf);
1082        let is_sparse = flags & FLAG_SPARSE != 0;
1083
1084        if is_sparse {
1085            let cells = read_sparse_row(&buf, cd_start);
1086            assert_eq!(cells[0].1, TYPE_STRING);
1087        } else {
1088            let (tag, _) = read_cell_at(&buf, cd_start, false, 0);
1089            assert_eq!(tag, TYPE_STRING);
1090        }
1091    }
1092
1093    #[test]
1094    fn test_number_with_explicit_type() {
1095        let sst = SharedStringTable::new();
1096        let ws = make_worksheet(vec![make_row(
1097            1,
1098            vec![make_cell("A1", 1, CellTypeTag::Number, Some("99.9"))],
1099        )]);
1100        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1101
1102        let (_, _, cd_start, flags) = parse_sections(&buf);
1103        let is_sparse = flags & FLAG_SPARSE != 0;
1104
1105        if is_sparse {
1106            let cells = read_sparse_row(&buf, cd_start);
1107            assert_eq!(cells[0].1, TYPE_NUMBER);
1108            let val = f64::from_le_bytes(cells[0].2.try_into().unwrap());
1109            assert!((val - 99.9).abs() < f64::EPSILON);
1110        } else {
1111            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
1112            assert_eq!(tag, TYPE_NUMBER);
1113            let val = f64::from_le_bytes(payload.try_into().unwrap());
1114            assert!((val - 99.9).abs() < f64::EPSILON);
1115        }
1116    }
1117
1118    #[test]
1119    fn test_cell_without_col_uses_ref_parsing() {
1120        let sst = SharedStringTable::new();
1121        let mut cell = make_cell("C5", 0, CellTypeTag::None, Some("42"));
1122        cell.col = 0;
1123        let ws = make_worksheet(vec![make_row(5, vec![cell])]);
1124        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1125
1126        let row_count = read_u32_le(&buf, 6);
1127        assert_eq!(row_count, 1);
1128        let col_count = read_u16_le(&buf, 10);
1129        assert_eq!(col_count, 1);
1130    }
1131
1132    #[test]
1133    fn test_rich_string_from_sst() {
1134        use crate::rich_text::RichTextRun;
1135
1136        let mut sst = SharedStringTable::new();
1137        sst.add("plain");
1138        sst.add_rich_text(&[
1139            RichTextRun {
1140                text: "Bold".to_string(),
1141                font: None,
1142                size: None,
1143                bold: true,
1144                italic: false,
1145                color: None,
1146            },
1147            RichTextRun {
1148                text: " Normal".to_string(),
1149                font: None,
1150                size: None,
1151                bold: false,
1152                italic: false,
1153                color: None,
1154            },
1155        ]);
1156
1157        let ws = make_worksheet(vec![make_row(
1158            1,
1159            vec![make_cell("A1", 1, CellTypeTag::SharedString, Some("1"))],
1160        )]);
1161        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1162
1163        let (st_start, _, cd_start, flags) = parse_sections(&buf);
1164        let is_sparse = flags & FLAG_SPARSE != 0;
1165
1166        let (type_tag, str_idx) = if is_sparse {
1167            let cells = read_sparse_row(&buf, cd_start);
1168            let idx = u32::from_le_bytes(cells[0].2[..4].try_into().unwrap()) as usize;
1169            (cells[0].1, idx)
1170        } else {
1171            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
1172            let idx = u32::from_le_bytes(payload[..4].try_into().unwrap()) as usize;
1173            (tag, idx)
1174        };
1175
1176        assert_eq!(type_tag, TYPE_RICH_STRING);
1177        let text = read_string(&buf, st_start, str_idx);
1178        assert_eq!(text, "Bold Normal");
1179    }
1180}