sheetkit_core/
raw_transfer_write.rs

1//! Write-direction buffer serialization and deserialization for raw FFI transfer.
2//!
3//! Provides [`cells_to_raw_buffer`] to encode structured cell data into a
4//! compact binary buffer, and [`raw_buffer_to_cells`] to decode a buffer
5//! back into cell values. These are used for the JS-to-Rust write path
6//! (e.g., `setSheetData`) and for round-trip testing.
7//!
8//! The binary format matches the specification in `raw_transfer.rs` so that
9//! buffers produced by either module can be consumed by the other.
10
11use std::collections::HashMap;
12
13use crate::cell::CellValue;
14use crate::error::{Error, Result};
15use crate::rich_text;
16
17const MAGIC: u32 = 0x534B5244;
18const VERSION: u16 = 1;
19const HEADER_SIZE: usize = 16;
20const ROW_INDEX_ENTRY_SIZE: usize = 8;
21const CELL_STRIDE: usize = 9;
22const SPARSE_ENTRY_SIZE: usize = 11;
23const EMPTY_ROW_SENTINEL: u32 = 0xFFFF_FFFF;
24const FLAG_SPARSE: u32 = 1;
25const DENSITY_THRESHOLD: f64 = 0.3;
26
27const TYPE_EMPTY: u8 = 0x00;
28const TYPE_NUMBER: u8 = 0x01;
29const TYPE_STRING: u8 = 0x02;
30const TYPE_BOOL: u8 = 0x03;
31const TYPE_DATE: u8 = 0x04;
32const TYPE_ERROR: u8 = 0x05;
33const TYPE_FORMULA: u8 = 0x06;
34const TYPE_RICH_STRING: u8 = 0x07;
35
36/// A row of cell data: (1-based row number, cells in that row).
37type CellRow = (u32, Vec<(u32, CellValue)>);
38
39/// Intermediate encoded cell: (0-based column index, type tag, 8-byte payload).
40type EncodedCell = (u16, u8, [u8; 8]);
41
42/// Intermediate encoded row: (1-based row number, encoded cells).
43type EncodedRow = (u32, Vec<EncodedCell>);
44
45struct BufferHeader {
46    _version: u16,
47    row_count: u32,
48    col_count: u16,
49    flags: u32,
50}
51
52fn read_header(buf: &[u8]) -> Result<BufferHeader> {
53    if buf.len() < HEADER_SIZE {
54        return Err(Error::Internal(format!(
55            "buffer too short for header: {} bytes (need {})",
56            buf.len(),
57            HEADER_SIZE
58        )));
59    }
60    let magic = u32::from_le_bytes(buf[0..4].try_into().unwrap());
61    if magic != MAGIC {
62        return Err(Error::Internal(format!(
63            "invalid buffer magic: expected 0x{MAGIC:08X}, got 0x{magic:08X}"
64        )));
65    }
66    let version = u16::from_le_bytes(buf[4..6].try_into().unwrap());
67    let row_count = u32::from_le_bytes(buf[6..10].try_into().unwrap());
68    let col_count = u16::from_le_bytes(buf[10..12].try_into().unwrap());
69    let flags = u32::from_le_bytes(buf[12..16].try_into().unwrap());
70    Ok(BufferHeader {
71        _version: version,
72        row_count,
73        col_count,
74        flags,
75    })
76}
77
78fn read_row_index(buf: &[u8], row_count: u32) -> Result<Vec<(u32, u32)>> {
79    let start = HEADER_SIZE;
80    let end = start + row_count as usize * ROW_INDEX_ENTRY_SIZE;
81    if buf.len() < end {
82        return Err(Error::Internal(format!(
83            "buffer too short for row index: {} bytes (need {})",
84            buf.len(),
85            end
86        )));
87    }
88    let mut entries = Vec::with_capacity(row_count as usize);
89    for i in 0..row_count as usize {
90        let offset = start + i * ROW_INDEX_ENTRY_SIZE;
91        let row_num = u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap());
92        let row_off = u32::from_le_bytes(buf[offset + 4..offset + 8].try_into().unwrap());
93        entries.push((row_num, row_off));
94    }
95    Ok(entries)
96}
97
98/// Read the string table. Returns (strings, byte position after string table).
99fn read_string_table(buf: &[u8], offset: usize) -> Result<(Vec<String>, usize)> {
100    if buf.len() < offset + 8 {
101        return Err(Error::Internal(
102            "buffer too short for string table header".to_string(),
103        ));
104    }
105    let count = u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap()) as usize;
106    let blob_size = u32::from_le_bytes(buf[offset + 4..offset + 8].try_into().unwrap()) as usize;
107
108    let offsets_start = offset + 8;
109    let offsets_end = offsets_start + count * 4;
110    let blob_start = offsets_end;
111    let blob_end = blob_start + blob_size;
112
113    if buf.len() < blob_end {
114        return Err(Error::Internal(format!(
115            "buffer too short for string table: {} bytes (need {})",
116            buf.len(),
117            blob_end
118        )));
119    }
120
121    let mut string_offsets = Vec::with_capacity(count);
122    for i in 0..count {
123        let pos = offsets_start + i * 4;
124        let off = u32::from_le_bytes(buf[pos..pos + 4].try_into().unwrap()) as usize;
125        string_offsets.push(off);
126    }
127
128    let mut strings = Vec::with_capacity(count);
129    for i in 0..count {
130        let start = blob_start + string_offsets[i];
131        let end = if i + 1 < count {
132            blob_start + string_offsets[i + 1]
133        } else {
134            blob_end
135        };
136        let s = std::str::from_utf8(&buf[start..end])
137            .map_err(|e| Error::Internal(format!("invalid UTF-8 in string table: {e}")))?;
138        strings.push(s.to_string());
139    }
140
141    Ok((strings, blob_end))
142}
143
144fn decode_cell_payload(type_tag: u8, payload: &[u8], strings: &[String]) -> Result<CellValue> {
145    match type_tag {
146        TYPE_EMPTY => Ok(CellValue::Empty),
147        TYPE_NUMBER => {
148            let n = f64::from_le_bytes(payload[0..8].try_into().unwrap());
149            Ok(CellValue::Number(n))
150        }
151        TYPE_STRING => {
152            let idx = u32::from_le_bytes(payload[0..4].try_into().unwrap()) as usize;
153            let s = strings
154                .get(idx)
155                .cloned()
156                .ok_or_else(|| Error::Internal(format!("string index {idx} out of range")))?;
157            Ok(CellValue::String(s))
158        }
159        TYPE_BOOL => Ok(CellValue::Bool(payload[0] != 0)),
160        TYPE_DATE => {
161            let n = f64::from_le_bytes(payload[0..8].try_into().unwrap());
162            Ok(CellValue::Date(n))
163        }
164        TYPE_ERROR => {
165            let idx = u32::from_le_bytes(payload[0..4].try_into().unwrap()) as usize;
166            let s = strings
167                .get(idx)
168                .cloned()
169                .ok_or_else(|| Error::Internal(format!("string index {idx} out of range")))?;
170            Ok(CellValue::Error(s))
171        }
172        TYPE_FORMULA => {
173            let idx = u32::from_le_bytes(payload[0..4].try_into().unwrap()) as usize;
174            let expr = strings
175                .get(idx)
176                .cloned()
177                .ok_or_else(|| Error::Internal(format!("string index {idx} out of range")))?;
178            Ok(CellValue::Formula { expr, result: None })
179        }
180        TYPE_RICH_STRING => {
181            let idx = u32::from_le_bytes(payload[0..4].try_into().unwrap()) as usize;
182            let s = strings
183                .get(idx)
184                .cloned()
185                .ok_or_else(|| Error::Internal(format!("string index {idx} out of range")))?;
186            Ok(CellValue::String(s))
187        }
188        _ => Ok(CellValue::Empty),
189    }
190}
191
192fn read_dense_cells(
193    buf: &[u8],
194    cell_data_start: usize,
195    row_index: &[(u32, u32)],
196    col_count: u16,
197    strings: &[String],
198) -> Result<Vec<CellRow>> {
199    let mut result = Vec::new();
200    for &(row_num, offset) in row_index {
201        if offset == EMPTY_ROW_SENTINEL {
202            continue;
203        }
204        let row_start = cell_data_start + offset as usize;
205        let row_end = row_start + col_count as usize * CELL_STRIDE;
206        if buf.len() < row_end {
207            return Err(Error::Internal(format!(
208                "buffer too short for dense row data at offset {}",
209                row_start
210            )));
211        }
212        let mut cells = Vec::new();
213        for c in 0..col_count as usize {
214            let cell_offset = row_start + c * CELL_STRIDE;
215            let type_tag = buf[cell_offset];
216            if type_tag == TYPE_EMPTY {
217                continue;
218            }
219            let payload = &buf[cell_offset + 1..cell_offset + 9];
220            let value = decode_cell_payload(type_tag, payload, strings)?;
221            cells.push((c as u32 + 1, value));
222        }
223        if !cells.is_empty() {
224            result.push((row_num, cells));
225        }
226    }
227    Ok(result)
228}
229
230fn read_sparse_cells(
231    buf: &[u8],
232    cell_data_start: usize,
233    row_index: &[(u32, u32)],
234    strings: &[String],
235) -> Result<Vec<CellRow>> {
236    let mut result = Vec::new();
237    for &(row_num, offset) in row_index {
238        if offset == EMPTY_ROW_SENTINEL {
239            continue;
240        }
241        let pos = cell_data_start + offset as usize;
242        if buf.len() < pos + 2 {
243            return Err(Error::Internal(
244                "buffer too short for sparse row cell count".to_string(),
245            ));
246        }
247        let cell_count = u16::from_le_bytes(buf[pos..pos + 2].try_into().unwrap()) as usize;
248        let entries_start = pos + 2;
249        let entries_end = entries_start + cell_count * SPARSE_ENTRY_SIZE;
250        if buf.len() < entries_end {
251            return Err(Error::Internal(format!(
252                "buffer too short for sparse row entries at offset {}",
253                entries_start
254            )));
255        }
256        let mut cells = Vec::with_capacity(cell_count);
257        for i in 0..cell_count {
258            let entry_off = entries_start + i * SPARSE_ENTRY_SIZE;
259            let col = u16::from_le_bytes(buf[entry_off..entry_off + 2].try_into().unwrap());
260            let type_tag = buf[entry_off + 2];
261            let payload = &buf[entry_off + 3..entry_off + 11];
262            let value = decode_cell_payload(type_tag, payload, strings)?;
263            cells.push((col as u32 + 1, value));
264        }
265        if !cells.is_empty() {
266            result.push((row_num, cells));
267        }
268    }
269    Ok(result)
270}
271
272/// Decode a raw buffer into cell values for applying to a worksheet.
273///
274/// Returns rows as `(row_number, cells)` where each cell is
275/// `(col_number, CellValue)`. Both row and column numbers are 1-based.
276pub fn raw_buffer_to_cells(buf: &[u8]) -> Result<Vec<CellRow>> {
277    let header = read_header(buf)?;
278    if header.row_count == 0 {
279        return Ok(Vec::new());
280    }
281
282    let row_index = read_row_index(buf, header.row_count)?;
283    let string_table_offset = HEADER_SIZE + header.row_count as usize * ROW_INDEX_ENTRY_SIZE;
284    let (strings, cell_data_start) = read_string_table(buf, string_table_offset)?;
285
286    let is_sparse = header.flags & FLAG_SPARSE != 0;
287    if is_sparse {
288        read_sparse_cells(buf, cell_data_start, &row_index, &strings)
289    } else {
290        read_dense_cells(buf, cell_data_start, &row_index, header.col_count, &strings)
291    }
292}
293
294struct StringTable {
295    strings: Vec<String>,
296    index_map: HashMap<String, u32>,
297}
298
299impl StringTable {
300    fn new() -> Self {
301        Self {
302            strings: Vec::new(),
303            index_map: HashMap::new(),
304        }
305    }
306
307    fn intern(&mut self, s: &str) -> u32 {
308        if let Some(&idx) = self.index_map.get(s) {
309            return idx;
310        }
311        let idx = self.strings.len() as u32;
312        self.strings.push(s.to_string());
313        self.index_map.insert(s.to_string(), idx);
314        idx
315    }
316}
317
318fn cell_type_tag(value: &CellValue) -> u8 {
319    match value {
320        CellValue::Empty => TYPE_EMPTY,
321        CellValue::Number(_) => TYPE_NUMBER,
322        CellValue::String(_) => TYPE_STRING,
323        CellValue::Bool(_) => TYPE_BOOL,
324        CellValue::Date(_) => TYPE_DATE,
325        CellValue::Error(_) => TYPE_ERROR,
326        CellValue::Formula { .. } => TYPE_FORMULA,
327        CellValue::RichString(_) => TYPE_RICH_STRING,
328    }
329}
330
331fn encode_cell_payload(value: &CellValue, st: &mut StringTable) -> [u8; 8] {
332    let mut payload = [0u8; 8];
333    match value {
334        CellValue::Empty => {}
335        CellValue::Number(n) => {
336            payload[0..8].copy_from_slice(&n.to_le_bytes());
337        }
338        CellValue::String(s) => {
339            let idx = st.intern(s);
340            payload[0..4].copy_from_slice(&idx.to_le_bytes());
341        }
342        CellValue::Bool(b) => {
343            payload[0] = u8::from(*b);
344        }
345        CellValue::Date(n) => {
346            payload[0..8].copy_from_slice(&n.to_le_bytes());
347        }
348        CellValue::Error(s) => {
349            let idx = st.intern(s);
350            payload[0..4].copy_from_slice(&idx.to_le_bytes());
351        }
352        CellValue::Formula { expr, .. } => {
353            let idx = st.intern(expr);
354            payload[0..4].copy_from_slice(&idx.to_le_bytes());
355        }
356        CellValue::RichString(runs) => {
357            let plain = rich_text::rich_text_to_plain(runs);
358            let idx = st.intern(&plain);
359            payload[0..4].copy_from_slice(&idx.to_le_bytes());
360        }
361    }
362    payload
363}
364
365/// Encode cell values into a raw buffer for transfer.
366///
367/// Takes rows as `(row_number, cells)` where each cell is
368/// `(col_number, CellValue)`. Both row and column numbers are 1-based.
369/// Returns the encoded binary buffer.
370pub fn cells_to_raw_buffer(rows: &[(u32, Vec<(u32, CellValue)>)]) -> Result<Vec<u8>> {
371    if rows.is_empty() {
372        return write_empty_buffer();
373    }
374
375    let mut max_col: u32 = 0;
376    let mut total_cells: usize = 0;
377    for (_, cells) in rows {
378        for &(col, _) in cells {
379            if col > max_col {
380                max_col = col;
381            }
382        }
383        total_cells += cells.len();
384    }
385
386    let row_count = rows.len() as u32;
387    let col_count = max_col as u16;
388
389    let grid_size = row_count as usize * col_count as usize;
390    let density = if grid_size > 0 {
391        total_cells as f64 / grid_size as f64
392    } else {
393        0.0
394    };
395    let is_sparse = density < DENSITY_THRESHOLD;
396
397    let mut st = StringTable::new();
398    let mut row_payloads: Vec<EncodedRow> = Vec::with_capacity(rows.len());
399    for &(row_num, ref cells) in rows {
400        let mut encoded_cells = Vec::with_capacity(cells.len());
401        for &(col, ref value) in cells {
402            let tag = cell_type_tag(value);
403            let payload = encode_cell_payload(value, &mut st);
404            encoded_cells.push((col as u16 - 1, tag, payload));
405        }
406        row_payloads.push((row_num, encoded_cells));
407    }
408
409    let row_index_size = row_count as usize * ROW_INDEX_ENTRY_SIZE;
410    let string_table_size = compute_string_table_size(&st);
411    let cell_data_size = if is_sparse {
412        compute_sparse_size(&row_payloads)
413    } else {
414        compute_dense_size(row_count, col_count)
415    };
416
417    let total_size = HEADER_SIZE + row_index_size + string_table_size + cell_data_size;
418    let mut buf = vec![0u8; total_size];
419
420    write_header(
421        &mut buf,
422        row_count,
423        col_count,
424        if is_sparse { FLAG_SPARSE } else { 0 },
425    );
426
427    let cell_data_start = HEADER_SIZE + row_index_size + string_table_size;
428
429    if is_sparse {
430        write_sparse_data(&mut buf, &row_payloads, cell_data_start);
431    } else {
432        write_dense_data(&mut buf, &row_payloads, col_count, cell_data_start);
433    }
434
435    write_row_index(&mut buf, &row_payloads, is_sparse, col_count);
436    write_string_table(&mut buf, HEADER_SIZE + row_index_size, &st);
437
438    Ok(buf)
439}
440
441fn write_empty_buffer() -> Result<Vec<u8>> {
442    let st_size = 8; // count(4) + blob_size(4), both zero
443    let total = HEADER_SIZE + st_size;
444    let mut buf = vec![0u8; total];
445    write_header(&mut buf, 0, 0, 0);
446    // String table: count=0, blob_size=0
447    buf[HEADER_SIZE..HEADER_SIZE + 4].copy_from_slice(&0u32.to_le_bytes());
448    buf[HEADER_SIZE + 4..HEADER_SIZE + 8].copy_from_slice(&0u32.to_le_bytes());
449    Ok(buf)
450}
451
452fn write_header(buf: &mut [u8], row_count: u32, col_count: u16, flags: u32) {
453    buf[0..4].copy_from_slice(&MAGIC.to_le_bytes());
454    buf[4..6].copy_from_slice(&VERSION.to_le_bytes());
455    buf[6..10].copy_from_slice(&row_count.to_le_bytes());
456    buf[10..12].copy_from_slice(&col_count.to_le_bytes());
457    buf[12..16].copy_from_slice(&flags.to_le_bytes());
458}
459
460fn compute_string_table_size(st: &StringTable) -> usize {
461    let blob_size: usize = st.strings.iter().map(|s| s.len()).sum();
462    8 + st.strings.len() * 4 + blob_size // count(4) + blob_size(4) + offsets + blob
463}
464
465fn write_string_table(buf: &mut [u8], offset: usize, st: &StringTable) {
466    let count = st.strings.len() as u32;
467    let blob_size: usize = st.strings.iter().map(|s| s.len()).sum();
468
469    buf[offset..offset + 4].copy_from_slice(&count.to_le_bytes());
470    buf[offset + 4..offset + 8].copy_from_slice(&(blob_size as u32).to_le_bytes());
471
472    let offsets_start = offset + 8;
473    let blob_start = offsets_start + st.strings.len() * 4;
474
475    let mut blob_offset: u32 = 0;
476    for (i, s) in st.strings.iter().enumerate() {
477        let pos = offsets_start + i * 4;
478        buf[pos..pos + 4].copy_from_slice(&blob_offset.to_le_bytes());
479        let dst = blob_start + blob_offset as usize;
480        buf[dst..dst + s.len()].copy_from_slice(s.as_bytes());
481        blob_offset += s.len() as u32;
482    }
483}
484
485fn compute_dense_size(row_count: u32, col_count: u16) -> usize {
486    row_count as usize * col_count as usize * CELL_STRIDE
487}
488
489fn compute_sparse_size(row_payloads: &[EncodedRow]) -> usize {
490    let mut size = 0;
491    for (_, cells) in row_payloads {
492        size += 2 + cells.len() * SPARSE_ENTRY_SIZE; // cell_count(u16) + entries
493    }
494    size
495}
496
497fn write_row_index(buf: &mut [u8], row_payloads: &[EncodedRow], is_sparse: bool, col_count: u16) {
498    let base = HEADER_SIZE;
499    if is_sparse {
500        let mut data_offset: u32 = 0;
501        for (i, (row_num, cells)) in row_payloads.iter().enumerate() {
502            let pos = base + i * ROW_INDEX_ENTRY_SIZE;
503            buf[pos..pos + 4].copy_from_slice(&row_num.to_le_bytes());
504            if cells.is_empty() {
505                buf[pos + 4..pos + 8].copy_from_slice(&EMPTY_ROW_SENTINEL.to_le_bytes());
506            } else {
507                buf[pos + 4..pos + 8].copy_from_slice(&data_offset.to_le_bytes());
508            }
509            let row_size = 2 + cells.len() * SPARSE_ENTRY_SIZE;
510            data_offset += row_size as u32;
511        }
512    } else {
513        for (i, (row_num, _)) in row_payloads.iter().enumerate() {
514            let pos = base + i * ROW_INDEX_ENTRY_SIZE;
515            buf[pos..pos + 4].copy_from_slice(&row_num.to_le_bytes());
516            let offset = i as u32 * col_count as u32 * CELL_STRIDE as u32;
517            buf[pos + 4..pos + 8].copy_from_slice(&offset.to_le_bytes());
518        }
519    }
520}
521
522fn write_dense_data(
523    buf: &mut [u8],
524    row_payloads: &[EncodedRow],
525    col_count: u16,
526    cell_data_start: usize,
527) {
528    for (i, (_, cells)) in row_payloads.iter().enumerate() {
529        let row_start = cell_data_start + i * col_count as usize * CELL_STRIDE;
530        for &(col_idx, tag, ref payload) in cells {
531            let cell_off = row_start + col_idx as usize * CELL_STRIDE;
532            buf[cell_off] = tag;
533            buf[cell_off + 1..cell_off + 9].copy_from_slice(payload);
534        }
535    }
536}
537
538fn write_sparse_data(buf: &mut [u8], row_payloads: &[EncodedRow], cell_data_start: usize) {
539    let mut offset = cell_data_start;
540    for (_, cells) in row_payloads {
541        let cell_count = cells.len() as u16;
542        buf[offset..offset + 2].copy_from_slice(&cell_count.to_le_bytes());
543        offset += 2;
544        for &(col_idx, tag, ref payload) in cells {
545            buf[offset..offset + 2].copy_from_slice(&col_idx.to_le_bytes());
546            buf[offset + 2] = tag;
547            buf[offset + 3..offset + 11].copy_from_slice(payload);
548            offset += SPARSE_ENTRY_SIZE;
549        }
550    }
551}
552
553#[cfg(test)]
554#[allow(clippy::needless_range_loop)]
555mod tests {
556    use super::*;
557    use crate::rich_text::RichTextRun;
558
559    #[test]
560    fn test_decode_empty_buffer() {
561        let buf = cells_to_raw_buffer(&[]).unwrap();
562        let result = raw_buffer_to_cells(&buf).unwrap();
563        assert!(result.is_empty());
564    }
565
566    #[test]
567    fn test_decode_single_number() {
568        let rows = vec![(1, vec![(1, CellValue::Number(42.5))])];
569        let buf = cells_to_raw_buffer(&rows).unwrap();
570        let result = raw_buffer_to_cells(&buf).unwrap();
571        assert_eq!(result.len(), 1);
572        assert_eq!(result[0].0, 1);
573        assert_eq!(result[0].1.len(), 1);
574        assert_eq!(result[0].1[0].0, 1);
575        assert_eq!(result[0].1[0].1, CellValue::Number(42.5));
576    }
577
578    #[test]
579    fn test_decode_string_with_table() {
580        let rows = vec![(1, vec![(1, CellValue::String("hello world".to_string()))])];
581        let buf = cells_to_raw_buffer(&rows).unwrap();
582        let result = raw_buffer_to_cells(&buf).unwrap();
583        assert_eq!(result.len(), 1);
584        assert_eq!(
585            result[0].1[0].1,
586            CellValue::String("hello world".to_string())
587        );
588    }
589
590    #[test]
591    fn test_decode_bool_true_false() {
592        let rows = vec![(
593            1,
594            vec![(1, CellValue::Bool(true)), (2, CellValue::Bool(false))],
595        )];
596        let buf = cells_to_raw_buffer(&rows).unwrap();
597        let result = raw_buffer_to_cells(&buf).unwrap();
598        assert_eq!(result[0].1[0].1, CellValue::Bool(true));
599        assert_eq!(result[0].1[1].1, CellValue::Bool(false));
600    }
601
602    #[test]
603    fn test_decode_error() {
604        let rows = vec![(1, vec![(1, CellValue::Error("#DIV/0!".to_string()))])];
605        let buf = cells_to_raw_buffer(&rows).unwrap();
606        let result = raw_buffer_to_cells(&buf).unwrap();
607        assert_eq!(result[0].1[0].1, CellValue::Error("#DIV/0!".to_string()));
608    }
609
610    #[test]
611    fn test_decode_formula() {
612        let rows = vec![(
613            1,
614            vec![(
615                1,
616                CellValue::Formula {
617                    expr: "SUM(A1:A10)".to_string(),
618                    result: None,
619                },
620            )],
621        )];
622        let buf = cells_to_raw_buffer(&rows).unwrap();
623        let result = raw_buffer_to_cells(&buf).unwrap();
624        assert_eq!(
625            result[0].1[0].1,
626            CellValue::Formula {
627                expr: "SUM(A1:A10)".to_string(),
628                result: None,
629            }
630        );
631    }
632
633    #[test]
634    fn test_decode_date() {
635        let serial = 44927.0; // 2023-01-01
636        let rows = vec![(1, vec![(1, CellValue::Date(serial))])];
637        let buf = cells_to_raw_buffer(&rows).unwrap();
638        let result = raw_buffer_to_cells(&buf).unwrap();
639        assert_eq!(result[0].1[0].1, CellValue::Date(serial));
640    }
641
642    #[test]
643    fn test_decode_mixed_row() {
644        let rows = vec![(
645            3,
646            vec![
647                (1, CellValue::Number(1.0)),
648                (2, CellValue::String("text".to_string())),
649                (3, CellValue::Bool(true)),
650                (4, CellValue::Date(44927.0)),
651                (5, CellValue::Error("#N/A".to_string())),
652                (
653                    6,
654                    CellValue::Formula {
655                        expr: "A3+B3".to_string(),
656                        result: None,
657                    },
658                ),
659            ],
660        )];
661        let buf = cells_to_raw_buffer(&rows).unwrap();
662        let result = raw_buffer_to_cells(&buf).unwrap();
663
664        assert_eq!(result.len(), 1);
665        assert_eq!(result[0].0, 3);
666        let cells = &result[0].1;
667        assert_eq!(cells.len(), 6);
668        assert_eq!(cells[0], (1, CellValue::Number(1.0)));
669        assert_eq!(cells[1], (2, CellValue::String("text".to_string())));
670        assert_eq!(cells[2], (3, CellValue::Bool(true)));
671        assert_eq!(cells[3], (4, CellValue::Date(44927.0)));
672        assert_eq!(cells[4], (5, CellValue::Error("#N/A".to_string())));
673        assert_eq!(
674            cells[5],
675            (
676                6,
677                CellValue::Formula {
678                    expr: "A3+B3".to_string(),
679                    result: None,
680                }
681            )
682        );
683    }
684
685    #[test]
686    fn test_round_trip_cells_to_buffer() {
687        let rows = vec![
688            (
689                1,
690                vec![
691                    (1, CellValue::String("Name".to_string())),
692                    (2, CellValue::String("Age".to_string())),
693                    (3, CellValue::String("Active".to_string())),
694                ],
695            ),
696            (
697                2,
698                vec![
699                    (1, CellValue::String("Alice".to_string())),
700                    (2, CellValue::Number(30.0)),
701                    (3, CellValue::Bool(true)),
702                ],
703            ),
704            (
705                3,
706                vec![
707                    (1, CellValue::String("Bob".to_string())),
708                    (2, CellValue::Number(25.0)),
709                    (3, CellValue::Bool(false)),
710                ],
711            ),
712        ];
713        let buf = cells_to_raw_buffer(&rows).unwrap();
714        let result = raw_buffer_to_cells(&buf).unwrap();
715        assert_eq!(result, rows);
716    }
717
718    #[test]
719    fn test_invalid_magic() {
720        let mut buf = vec![0u8; 24];
721        buf[0..4].copy_from_slice(&0xDEADBEEFu32.to_le_bytes());
722        let err = raw_buffer_to_cells(&buf).unwrap_err();
723        assert!(err.to_string().contains("invalid buffer magic"));
724    }
725
726    #[test]
727    fn test_buffer_too_short() {
728        let buf = vec![0u8; 4];
729        let err = raw_buffer_to_cells(&buf).unwrap_err();
730        assert!(err.to_string().contains("buffer too short"));
731    }
732
733    #[test]
734    fn test_rich_string_degrades_to_string() {
735        let runs = vec![
736            RichTextRun {
737                text: "bold ".to_string(),
738                font: None,
739                size: None,
740                bold: true,
741                italic: false,
742                color: None,
743            },
744            RichTextRun {
745                text: "text".to_string(),
746                font: None,
747                size: None,
748                bold: false,
749                italic: false,
750                color: None,
751            },
752        ];
753        let rows = vec![(1, vec![(1, CellValue::RichString(runs))])];
754        let buf = cells_to_raw_buffer(&rows).unwrap();
755        let result = raw_buffer_to_cells(&buf).unwrap();
756        assert_eq!(result[0].1[0].1, CellValue::String("bold text".to_string()));
757    }
758
759    #[test]
760    fn test_multiple_rows_and_columns() {
761        let rows = vec![
762            (
763                1,
764                vec![(1, CellValue::Number(1.0)), (5, CellValue::Number(5.0))],
765            ),
766            (10, vec![(3, CellValue::String("mid".to_string()))]),
767            (
768                100,
769                vec![(1, CellValue::Bool(true)), (5, CellValue::Date(45000.0))],
770            ),
771        ];
772        let buf = cells_to_raw_buffer(&rows).unwrap();
773        let result = raw_buffer_to_cells(&buf).unwrap();
774        assert_eq!(result.len(), 3);
775        assert_eq!(result[0].0, 1);
776        assert_eq!(result[1].0, 10);
777        assert_eq!(result[2].0, 100);
778        assert_eq!(result[0].1.len(), 2);
779        assert_eq!(result[0].1[0], (1, CellValue::Number(1.0)));
780        assert_eq!(result[0].1[1], (5, CellValue::Number(5.0)));
781        assert_eq!(result[1].1[0], (3, CellValue::String("mid".to_string())));
782        assert_eq!(result[2].1[0], (1, CellValue::Bool(true)));
783        assert_eq!(result[2].1[1], (5, CellValue::Date(45000.0)));
784    }
785
786    #[test]
787    fn test_sparse_format_selected_for_sparse_data() {
788        // 10 rows with 1 cell each, but col ranges up to 100 -> density = 10/(10*100) = 1%
789        let mut rows = Vec::new();
790        for i in 1..=10 {
791            rows.push((i, vec![(100, CellValue::Number(i as f64))]));
792        }
793        let buf = cells_to_raw_buffer(&rows).unwrap();
794        let header = read_header(&buf).unwrap();
795        assert_ne!(header.flags & FLAG_SPARSE, 0, "sparse flag should be set");
796
797        let result = raw_buffer_to_cells(&buf).unwrap();
798        assert_eq!(result.len(), 10);
799        for (i, (row_num, cells)) in result.iter().enumerate() {
800            assert_eq!(*row_num, i as u32 + 1);
801            assert_eq!(cells[0], (100, CellValue::Number((i + 1) as f64)));
802        }
803    }
804
805    #[test]
806    fn test_dense_format_selected_for_dense_data() {
807        let mut rows = Vec::new();
808        for r in 1..=5 {
809            let cells: Vec<(u32, CellValue)> = (1..=5)
810                .map(|c| (c, CellValue::Number((r * 10 + c) as f64)))
811                .collect();
812            rows.push((r, cells));
813        }
814        let buf = cells_to_raw_buffer(&rows).unwrap();
815        let header = read_header(&buf).unwrap();
816        assert_eq!(
817            header.flags & FLAG_SPARSE,
818            0,
819            "sparse flag should not be set"
820        );
821
822        let result = raw_buffer_to_cells(&buf).unwrap();
823        assert_eq!(result.len(), 5);
824        for r in 0..5 {
825            assert_eq!(result[r].0, (r + 1) as u32);
826            assert_eq!(result[r].1.len(), 5);
827            for c in 0..5 {
828                let expected = ((r + 1) * 10 + (c + 1)) as f64;
829                assert_eq!(
830                    result[r].1[c],
831                    ((c + 1) as u32, CellValue::Number(expected))
832                );
833            }
834        }
835    }
836
837    #[test]
838    fn test_string_deduplication() {
839        let rows = vec![(
840            1,
841            vec![
842                (1, CellValue::String("repeated".to_string())),
843                (2, CellValue::String("repeated".to_string())),
844                (3, CellValue::String("unique".to_string())),
845            ],
846        )];
847        let buf = cells_to_raw_buffer(&rows).unwrap();
848        let header = read_header(&buf).unwrap();
849        let st_offset = HEADER_SIZE + header.row_count as usize * ROW_INDEX_ENTRY_SIZE;
850        let count = u32::from_le_bytes(buf[st_offset..st_offset + 4].try_into().unwrap());
851        assert_eq!(count, 2, "string table should have 2 unique strings, not 3");
852
853        let result = raw_buffer_to_cells(&buf).unwrap();
854        assert_eq!(result[0].1[0].1, CellValue::String("repeated".to_string()));
855        assert_eq!(result[0].1[1].1, CellValue::String("repeated".to_string()));
856        assert_eq!(result[0].1[2].1, CellValue::String("unique".to_string()));
857    }
858
859    #[test]
860    fn test_header_fields() {
861        let rows = vec![
862            (
863                1,
864                vec![(1, CellValue::Number(1.0)), (3, CellValue::Number(3.0))],
865            ),
866            (2, vec![(2, CellValue::Number(2.0))]),
867        ];
868        let buf = cells_to_raw_buffer(&rows).unwrap();
869        let header = read_header(&buf).unwrap();
870        assert_eq!(header._version, VERSION);
871        assert_eq!(header.row_count, 2);
872        assert_eq!(header.col_count, 3);
873    }
874
875    #[test]
876    fn test_formula_result_not_preserved() {
877        let rows = vec![(
878            1,
879            vec![(
880                1,
881                CellValue::Formula {
882                    expr: "1+1".to_string(),
883                    result: Some(Box::new(CellValue::Number(2.0))),
884                },
885            )],
886        )];
887        let buf = cells_to_raw_buffer(&rows).unwrap();
888        let result = raw_buffer_to_cells(&buf).unwrap();
889        assert_eq!(
890            result[0].1[0].1,
891            CellValue::Formula {
892                expr: "1+1".to_string(),
893                result: None,
894            }
895        );
896    }
897
898    #[test]
899    fn test_hand_constructed_dense_buffer() {
900        // Manually construct a buffer with 1 row, 2 cols, dense, 1 number + 1 bool
901        let row_count: u32 = 1;
902        let col_count: u16 = 2;
903
904        let st_size = 8; // count(4) + blob_size(4) + 0 offsets + 0 blob
905        let cell_data_size = 2 * CELL_STRIDE; // 2 cols * 9 bytes
906        let total = HEADER_SIZE + ROW_INDEX_ENTRY_SIZE + st_size + cell_data_size;
907
908        let mut buf = vec![0u8; total];
909        // Header
910        buf[0..4].copy_from_slice(&MAGIC.to_le_bytes());
911        buf[4..6].copy_from_slice(&1u16.to_le_bytes()); // version
912        buf[6..10].copy_from_slice(&row_count.to_le_bytes());
913        buf[10..12].copy_from_slice(&col_count.to_le_bytes());
914        buf[12..16].copy_from_slice(&0u32.to_le_bytes()); // flags (dense)
915
916        // Row index: row 1 at offset 0
917        let ri_start = HEADER_SIZE;
918        buf[ri_start..ri_start + 4].copy_from_slice(&1u32.to_le_bytes());
919        buf[ri_start + 4..ri_start + 8].copy_from_slice(&0u32.to_le_bytes());
920
921        // String table: count=0, blob_size=0
922        let st_start = ri_start + ROW_INDEX_ENTRY_SIZE;
923        buf[st_start..st_start + 4].copy_from_slice(&0u32.to_le_bytes());
924        buf[st_start + 4..st_start + 8].copy_from_slice(&0u32.to_le_bytes());
925
926        // Cell data
927        let cd_start = st_start + st_size;
928        // Col 0: Number 99.0
929        buf[cd_start] = TYPE_NUMBER;
930        buf[cd_start + 1..cd_start + 9].copy_from_slice(&99.0f64.to_le_bytes());
931        // Col 1: Bool true
932        buf[cd_start + CELL_STRIDE] = TYPE_BOOL;
933        buf[cd_start + CELL_STRIDE + 1] = 1;
934
935        let result = raw_buffer_to_cells(&buf).unwrap();
936        assert_eq!(result.len(), 1);
937        assert_eq!(result[0].0, 1);
938        assert_eq!(result[0].1[0], (1, CellValue::Number(99.0)));
939        assert_eq!(result[0].1[1], (2, CellValue::Bool(true)));
940    }
941
942    #[test]
943    fn test_hand_constructed_sparse_buffer() {
944        // Manually construct a sparse buffer: 1 row, col_count=100, 1 cell at col 50
945        let row_count: u32 = 1;
946        let col_count: u16 = 100;
947
948        let st_size = 8; // count=0, blob_size=0
949        let cell_data_size = 2 + SPARSE_ENTRY_SIZE; // cell_count(2) + 1 entry(11)
950        let total = HEADER_SIZE + ROW_INDEX_ENTRY_SIZE + st_size + cell_data_size;
951
952        let mut buf = vec![0u8; total];
953        // Header
954        buf[0..4].copy_from_slice(&MAGIC.to_le_bytes());
955        buf[4..6].copy_from_slice(&1u16.to_le_bytes());
956        buf[6..10].copy_from_slice(&row_count.to_le_bytes());
957        buf[10..12].copy_from_slice(&col_count.to_le_bytes());
958        buf[12..16].copy_from_slice(&FLAG_SPARSE.to_le_bytes());
959
960        // Row index
961        let ri_start = HEADER_SIZE;
962        buf[ri_start..ri_start + 4].copy_from_slice(&5u32.to_le_bytes()); // row 5
963        buf[ri_start + 4..ri_start + 8].copy_from_slice(&0u32.to_le_bytes()); // offset 0
964
965        // String table
966        let st_start = ri_start + ROW_INDEX_ENTRY_SIZE;
967        buf[st_start..st_start + 4].copy_from_slice(&0u32.to_le_bytes());
968        buf[st_start + 4..st_start + 8].copy_from_slice(&0u32.to_le_bytes());
969
970        // Sparse cell data
971        let cd_start = st_start + st_size;
972        buf[cd_start..cd_start + 2].copy_from_slice(&1u16.to_le_bytes()); // 1 cell
973        let entry = cd_start + 2;
974        buf[entry..entry + 2].copy_from_slice(&49u16.to_le_bytes()); // col index 49 (0-based)
975        buf[entry + 2] = TYPE_NUMBER;
976        buf[entry + 3..entry + 11].copy_from_slice(&7.77f64.to_le_bytes());
977
978        let result = raw_buffer_to_cells(&buf).unwrap();
979        assert_eq!(result.len(), 1);
980        assert_eq!(result[0].0, 5);
981        assert_eq!(result[0].1[0], (50, CellValue::Number(7.77))); // 1-based col 50
982    }
983}