sheetkit_core/
raw_transfer_v2.rs

1//! Read-direction buffer serializer v2 for bulk data transfer.
2//!
3//! Converts a `WorksheetXml` and `SharedStringTable` into a compact binary
4//! buffer with inline strings, eliminating the global string table that v1
5//! required. This allows incremental (row-by-row) decoding on the JS side
6//! without eagerly materializing all strings upfront.
7//!
8//! Binary format (little-endian throughout):
9//!
10//! ```text
11//! HEADER (16 bytes)
12//!   magic:     u32  = 0x534B5232 ("SKR2")
13//!   version:   u16  = 2
14//!   row_count: u32  = number of rows
15//!   col_count: u16  = number of columns
16//!   flags:     u32  = bit 0: always 0 (v2 is always sparse)
17//!                      bits 16..31: min_col (1-based)
18//!
19//! ROW INDEX (row_count * 8 bytes)
20//!   per row: row_number (u32) + byte_offset (u32) into CELL DATA section
21//!   offset = 0xFFFFFFFF for empty rows
22//!
23//! CELL DATA (variable length)
24//!   per row: cell_count (u16) + cells
25//!   per cell: col (u16) + type (u8) + payload
26//!     0x00 (empty):  0 bytes
27//!     0x01 (number): 8 bytes (f64 LE)
28//!     0x02 (string): 4 bytes (len u32 LE) + N bytes (UTF-8)
29//!     0x03 (bool):   1 byte
30//!     0x04 (date):   8 bytes (f64 LE, Excel serial)
31//!     0x05 (error):  4 bytes (len u32 LE) + N bytes (UTF-8)
32//!     0x06 (formula): 4 bytes (len u32 LE) + N bytes (UTF-8, cached formula text)
33//!     0x07 (rich):   4 bytes (len u32 LE) + N bytes (UTF-8)
34//! ```
35
36use crate::error::Result;
37use crate::sst::SharedStringTable;
38use crate::utils::cell_ref::cell_name_to_coordinates;
39use sheetkit_xml::worksheet::{CellTypeTag, WorksheetXml};
40
41pub const MAGIC_V2: u32 = 0x534B5232;
42pub const VERSION_V2: u16 = 2;
43pub const HEADER_SIZE: usize = 16;
44
45pub const TYPE_EMPTY: u8 = 0x00;
46pub const TYPE_NUMBER: u8 = 0x01;
47pub const TYPE_STRING: u8 = 0x02;
48pub const TYPE_BOOL: u8 = 0x03;
49pub const TYPE_DATE: u8 = 0x04;
50pub const TYPE_ERROR: u8 = 0x05;
51pub const TYPE_FORMULA: u8 = 0x06;
52pub const TYPE_RICH_STRING: u8 = 0x07;
53
54const EMPTY_ROW_OFFSET: u32 = 0xFFFF_FFFF;
55
56/// A cell entry with inline string data (variable-length payload).
57struct CellEntryV2 {
58    col: u16,
59    type_tag: u8,
60    payload: CellPayload,
61}
62
63enum CellPayload {
64    Empty,
65    Number(f64),
66    Bool(u8),
67    Str(String),
68}
69
70impl CellPayload {
71    fn byte_size(&self) -> usize {
72        match self {
73            CellPayload::Empty => 0,
74            CellPayload::Number(_) => 8,
75            CellPayload::Bool(_) => 1,
76            CellPayload::Str(s) => 4 + s.len(),
77        }
78    }
79
80    fn write_to(&self, buf: &mut Vec<u8>) {
81        match self {
82            CellPayload::Empty => {}
83            CellPayload::Number(n) => {
84                buf.extend_from_slice(&n.to_le_bytes());
85            }
86            CellPayload::Bool(b) => {
87                buf.push(*b);
88            }
89            CellPayload::Str(s) => {
90                buf.extend_from_slice(&(s.len() as u32).to_le_bytes());
91                buf.extend_from_slice(s.as_bytes());
92            }
93        }
94    }
95}
96
97struct RowEntriesV2 {
98    row_num: u32,
99    cells: Vec<CellEntryV2>,
100}
101
102impl RowEntriesV2 {
103    /// Byte size of the cell data for this row (cell_count u16 + all cells).
104    fn byte_size(&self) -> usize {
105        // cell_count (2 bytes) + each cell (col u16 + type u8 + payload)
106        let mut size = 2usize;
107        for cell in &self.cells {
108            size += 2 + 1 + cell.payload.byte_size();
109        }
110        size
111    }
112}
113
114/// Serialize a worksheet's cell data into a compact binary buffer using v2 format.
115///
116/// The v2 format inlines all string data with each cell, eliminating the global
117/// string table. This enables incremental row-by-row decoding without eagerly
118/// decoding all strings.
119pub fn sheet_to_raw_buffer_v2(ws: &WorksheetXml, sst: &SharedStringTable) -> Result<Vec<u8>> {
120    let rows = &ws.sheet_data.rows;
121
122    if rows.is_empty() {
123        return Ok(write_empty_buffer());
124    }
125
126    let (min_row, max_row, min_col, max_col, total_cells) = scan_dimensions(ws)?;
127
128    if total_cells == 0 {
129        return Ok(write_empty_buffer());
130    }
131
132    let row_count = (max_row - min_row + 1) as usize;
133    let col_count = (max_col - min_col + 1) as usize;
134
135    let cell_entries = collect_cell_entries_v2(ws, sst, min_col)?;
136
137    let flags: u32 = (min_col & 0xFFFF) << 16;
138
139    let row_index_size = row_count * 8;
140
141    let cell_data = encode_cell_data_v2(&cell_entries, min_row, max_row);
142    let row_index = build_row_index_v2(&cell_entries, min_row, max_row);
143
144    let total_size = HEADER_SIZE + row_index_size + cell_data.len();
145    let mut buf = Vec::with_capacity(total_size);
146
147    buf.extend_from_slice(&MAGIC_V2.to_le_bytes());
148    buf.extend_from_slice(&VERSION_V2.to_le_bytes());
149    buf.extend_from_slice(&(row_count as u32).to_le_bytes());
150    buf.extend_from_slice(&(col_count as u16).to_le_bytes());
151    buf.extend_from_slice(&flags.to_le_bytes());
152
153    buf.extend_from_slice(&row_index);
154    buf.extend_from_slice(&cell_data);
155
156    Ok(buf)
157}
158
159fn write_empty_buffer() -> Vec<u8> {
160    let mut buf = Vec::with_capacity(HEADER_SIZE);
161    buf.extend_from_slice(&MAGIC_V2.to_le_bytes());
162    buf.extend_from_slice(&VERSION_V2.to_le_bytes());
163    buf.extend_from_slice(&0u32.to_le_bytes());
164    buf.extend_from_slice(&0u16.to_le_bytes());
165    buf.extend_from_slice(&0u32.to_le_bytes());
166    buf
167}
168
169fn scan_dimensions(ws: &WorksheetXml) -> Result<(u32, u32, u32, u32, usize)> {
170    let mut min_row = u32::MAX;
171    let mut max_row = 0u32;
172    let mut min_col = u32::MAX;
173    let mut max_col = 0u32;
174    let mut total_cells = 0usize;
175
176    for row in &ws.sheet_data.rows {
177        if row.cells.is_empty() {
178            continue;
179        }
180        min_row = min_row.min(row.r);
181        max_row = max_row.max(row.r);
182
183        for cell in &row.cells {
184            let col = resolve_col(cell)?;
185            min_col = min_col.min(col);
186            max_col = max_col.max(col);
187            total_cells += 1;
188        }
189    }
190
191    if total_cells == 0 {
192        return Ok((1, 1, 1, 1, 0));
193    }
194
195    Ok((min_row, max_row, min_col, max_col, total_cells))
196}
197
198fn resolve_col(cell: &sheetkit_xml::worksheet::Cell) -> Result<u32> {
199    if cell.col > 0 {
200        return Ok(cell.col);
201    }
202    let (col, _row) = cell_name_to_coordinates(cell.r.as_str())?;
203    Ok(col)
204}
205
206fn collect_cell_entries_v2(
207    ws: &WorksheetXml,
208    sst: &SharedStringTable,
209    min_col: u32,
210) -> Result<Vec<RowEntriesV2>> {
211    let mut result = Vec::with_capacity(ws.sheet_data.rows.len());
212
213    for row in &ws.sheet_data.rows {
214        if row.cells.is_empty() {
215            continue;
216        }
217
218        let mut cells = Vec::with_capacity(row.cells.len());
219        for cell in &row.cells {
220            let col = resolve_col(cell)?;
221            let relative_col = (col - min_col) as u16;
222            let (type_tag, payload) = encode_cell_value_v2(cell, sst)?;
223            cells.push(CellEntryV2 {
224                col: relative_col,
225                type_tag,
226                payload,
227            });
228        }
229
230        result.push(RowEntriesV2 {
231            row_num: row.r,
232            cells,
233        });
234    }
235
236    Ok(result)
237}
238
239fn encode_cell_value_v2(
240    cell: &sheetkit_xml::worksheet::Cell,
241    sst: &SharedStringTable,
242) -> Result<(u8, CellPayload)> {
243    if cell.f.is_some() {
244        let formula_expr = cell
245            .f
246            .as_ref()
247            .and_then(|f| f.value.as_deref())
248            .unwrap_or("");
249        return Ok((TYPE_FORMULA, CellPayload::Str(formula_expr.to_string())));
250    }
251
252    match cell.t {
253        CellTypeTag::SharedString => {
254            if let Some(ref v) = cell.v {
255                if let Ok(sst_idx) = v.parse::<usize>() {
256                    let text = sst.get(sst_idx).unwrap_or("");
257                    if sst.get_rich_text(sst_idx).is_some() {
258                        return Ok((TYPE_RICH_STRING, CellPayload::Str(text.to_string())));
259                    }
260                    return Ok((TYPE_STRING, CellPayload::Str(text.to_string())));
261                }
262            }
263            Ok((TYPE_EMPTY, CellPayload::Empty))
264        }
265        CellTypeTag::Boolean => {
266            let b = if let Some(ref v) = cell.v {
267                if v == "1" || v.eq_ignore_ascii_case("true") {
268                    1u8
269                } else {
270                    0u8
271                }
272            } else {
273                0u8
274            };
275            Ok((TYPE_BOOL, CellPayload::Bool(b)))
276        }
277        CellTypeTag::Error => {
278            let error_text = cell.v.as_deref().unwrap_or("#VALUE!");
279            Ok((TYPE_ERROR, CellPayload::Str(error_text.to_string())))
280        }
281        CellTypeTag::InlineString => {
282            let text = cell
283                .is
284                .as_ref()
285                .and_then(|is| is.t.as_deref())
286                .or(cell.v.as_deref())
287                .unwrap_or("");
288            Ok((TYPE_STRING, CellPayload::Str(text.to_string())))
289        }
290        CellTypeTag::Date => {
291            if let Some(ref v) = cell.v {
292                if let Ok(n) = v.parse::<f64>() {
293                    return Ok((TYPE_DATE, CellPayload::Number(n)));
294                }
295            }
296            Ok((TYPE_EMPTY, CellPayload::Empty))
297        }
298        CellTypeTag::FormulaString => {
299            if let Some(ref v) = cell.v {
300                return Ok((TYPE_STRING, CellPayload::Str(v.clone())));
301            }
302            Ok((TYPE_EMPTY, CellPayload::Empty))
303        }
304        CellTypeTag::None | CellTypeTag::Number => {
305            if let Some(ref v) = cell.v {
306                if let Ok(n) = v.parse::<f64>() {
307                    return Ok((TYPE_NUMBER, CellPayload::Number(n)));
308                }
309            }
310            Ok((TYPE_EMPTY, CellPayload::Empty))
311        }
312    }
313}
314
315fn encode_cell_data_v2(row_entries: &[RowEntriesV2], min_row: u32, max_row: u32) -> Vec<u8> {
316    let total_rows = (max_row - min_row + 1) as usize;
317    let mut entries_by_row: Vec<Option<&RowEntriesV2>> = vec![None; total_rows];
318    for re in row_entries {
319        let idx = (re.row_num - min_row) as usize;
320        entries_by_row[idx] = Some(re);
321    }
322
323    let mut buf = Vec::new();
324    for entry in &entries_by_row {
325        match entry {
326            Some(re) => {
327                let count = re.cells.len() as u16;
328                buf.extend_from_slice(&count.to_le_bytes());
329                for cell in &re.cells {
330                    buf.extend_from_slice(&cell.col.to_le_bytes());
331                    buf.push(cell.type_tag);
332                    cell.payload.write_to(&mut buf);
333                }
334            }
335            None => {
336                buf.extend_from_slice(&0u16.to_le_bytes());
337            }
338        }
339    }
340
341    buf
342}
343
344fn build_row_index_v2(row_entries: &[RowEntriesV2], min_row: u32, max_row: u32) -> Vec<u8> {
345    let total_rows = (max_row - min_row + 1) as usize;
346    let mut index = Vec::with_capacity(total_rows * 8);
347
348    let mut entries_by_row: Vec<Option<&RowEntriesV2>> = vec![None; total_rows];
349    for re in row_entries {
350        let idx = (re.row_num - min_row) as usize;
351        entries_by_row[idx] = Some(re);
352    }
353
354    let mut byte_offset = 0u32;
355    for (i, entry) in entries_by_row.iter().enumerate() {
356        let row_num = min_row + i as u32;
357        index.extend_from_slice(&row_num.to_le_bytes());
358        match entry {
359            Some(re) => {
360                if re.cells.is_empty() {
361                    index.extend_from_slice(&EMPTY_ROW_OFFSET.to_le_bytes());
362                    byte_offset += 2; // just the cell_count u16
363                } else {
364                    index.extend_from_slice(&byte_offset.to_le_bytes());
365                    byte_offset += re.byte_size() as u32;
366                }
367            }
368            None => {
369                index.extend_from_slice(&EMPTY_ROW_OFFSET.to_le_bytes());
370                byte_offset += 2; // empty row writes cell_count=0
371            }
372        }
373    }
374
375    index
376}
377
378#[cfg(test)]
379#[allow(clippy::field_reassign_with_default)]
380mod tests {
381    use super::*;
382    use sheetkit_xml::worksheet::{
383        Cell, CellFormula, CellTypeTag, CompactCellRef, InlineString, Row, SheetData, WorksheetXml,
384    };
385
386    fn make_cell(col_ref: &str, col_num: u32, t: CellTypeTag, v: Option<&str>) -> Cell {
387        Cell {
388            r: CompactCellRef::new(col_ref),
389            col: col_num,
390            s: None,
391            t,
392            v: v.map(|s| s.to_string()),
393            f: None,
394            is: None,
395        }
396    }
397
398    fn make_row(row_num: u32, cells: Vec<Cell>) -> Row {
399        Row {
400            r: row_num,
401            spans: None,
402            s: None,
403            custom_format: None,
404            ht: None,
405            hidden: None,
406            custom_height: None,
407            outline_level: None,
408            cells,
409        }
410    }
411
412    fn make_worksheet(rows: Vec<Row>) -> WorksheetXml {
413        let mut ws = WorksheetXml::default();
414        ws.sheet_data = SheetData { rows };
415        ws
416    }
417
418    fn make_sst(strings: &[&str]) -> SharedStringTable {
419        let mut sst = SharedStringTable::new();
420        for s in strings {
421            sst.add(s);
422        }
423        sst
424    }
425
426    fn read_u32_le(buf: &[u8], offset: usize) -> u32 {
427        u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap())
428    }
429
430    fn read_u16_le(buf: &[u8], offset: usize) -> u16 {
431        u16::from_le_bytes(buf[offset..offset + 2].try_into().unwrap())
432    }
433
434    fn read_f64_le(buf: &[u8], offset: usize) -> f64 {
435        f64::from_le_bytes(buf[offset..offset + 8].try_into().unwrap())
436    }
437
438    fn read_inline_string(buf: &[u8], offset: usize) -> (String, usize) {
439        let len = read_u32_le(buf, offset) as usize;
440        let s = std::str::from_utf8(&buf[offset + 4..offset + 4 + len])
441            .unwrap()
442            .to_string();
443        (s, 4 + len)
444    }
445
446    /// Parse v2 buffer cell data for a single row at the given absolute offset.
447    /// Returns vec of (col, type_tag, value_description).
448    fn read_v2_row_cells(buf: &[u8], abs_offset: usize) -> Vec<(u16, u8, String)> {
449        let cell_count = read_u16_le(buf, abs_offset) as usize;
450        let mut pos = abs_offset + 2;
451        let mut result = Vec::with_capacity(cell_count);
452
453        for _ in 0..cell_count {
454            let col = read_u16_le(buf, pos);
455            let type_tag = buf[pos + 2];
456            pos += 3;
457
458            let val = match type_tag {
459                TYPE_EMPTY => String::new(),
460                TYPE_NUMBER | TYPE_DATE => {
461                    let n = read_f64_le(buf, pos);
462                    pos += 8;
463                    format!("{n}")
464                }
465                TYPE_BOOL => {
466                    let b = buf[pos];
467                    pos += 1;
468                    format!("{b}")
469                }
470                TYPE_STRING | TYPE_ERROR | TYPE_FORMULA | TYPE_RICH_STRING => {
471                    let (s, consumed) = read_inline_string(buf, pos);
472                    pos += consumed;
473                    s
474                }
475                _ => panic!("unknown type tag: {type_tag}"),
476            };
477
478            result.push((col, type_tag, val));
479        }
480
481        result
482    }
483
484    #[test]
485    fn test_empty_sheet() {
486        let ws = make_worksheet(vec![]);
487        let sst = SharedStringTable::new();
488        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
489
490        assert_eq!(buf.len(), HEADER_SIZE);
491        assert_eq!(read_u32_le(&buf, 0), MAGIC_V2);
492        assert_eq!(read_u16_le(&buf, 4), VERSION_V2);
493        assert_eq!(read_u32_le(&buf, 6), 0);
494        assert_eq!(read_u16_le(&buf, 10), 0);
495        assert_eq!(read_u32_le(&buf, 12), 0);
496    }
497
498    #[test]
499    fn test_single_number_cell() {
500        let ws = make_worksheet(vec![make_row(
501            1,
502            vec![make_cell("A1", 1, CellTypeTag::None, Some("42.5"))],
503        )]);
504        let sst = SharedStringTable::new();
505        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
506
507        assert_eq!(read_u32_le(&buf, 0), MAGIC_V2);
508        assert_eq!(read_u32_le(&buf, 6), 1); // row_count
509        assert_eq!(read_u16_le(&buf, 10), 1); // col_count
510
511        let row_index_start = HEADER_SIZE;
512        let row1_num = read_u32_le(&buf, row_index_start);
513        let row1_offset = read_u32_le(&buf, row_index_start + 4);
514        assert_eq!(row1_num, 1);
515        assert_ne!(row1_offset, EMPTY_ROW_OFFSET);
516
517        let cell_data_start = HEADER_SIZE + 8;
518        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
519        assert_eq!(cells.len(), 1);
520        assert_eq!(cells[0].0, 0); // col 0 (relative)
521        assert_eq!(cells[0].1, TYPE_NUMBER);
522        assert_eq!(cells[0].2, "42.5");
523    }
524
525    #[test]
526    fn test_string_cell_sst() {
527        let sst = make_sst(&["Hello", "World"]);
528        let ws = make_worksheet(vec![make_row(
529            1,
530            vec![make_cell("A1", 1, CellTypeTag::SharedString, Some("1"))],
531        )]);
532        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
533
534        let cell_data_start = HEADER_SIZE + 8;
535        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
536        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
537
538        assert_eq!(cells.len(), 1);
539        assert_eq!(cells[0].1, TYPE_STRING);
540        assert_eq!(cells[0].2, "World"); // SST index 1 = "World"
541    }
542
543    #[test]
544    fn test_bool_cell() {
545        let sst = SharedStringTable::new();
546        let ws = make_worksheet(vec![make_row(
547            1,
548            vec![
549                make_cell("A1", 1, CellTypeTag::Boolean, Some("1")),
550                make_cell("B1", 2, CellTypeTag::Boolean, Some("0")),
551            ],
552        )]);
553        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
554
555        let cell_data_start = HEADER_SIZE + 8;
556        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
557        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
558
559        assert_eq!(cells.len(), 2);
560        assert_eq!(cells[0].1, TYPE_BOOL);
561        assert_eq!(cells[0].2, "1");
562        assert_eq!(cells[1].1, TYPE_BOOL);
563        assert_eq!(cells[1].2, "0");
564    }
565
566    #[test]
567    fn test_error_cell() {
568        let sst = SharedStringTable::new();
569        let ws = make_worksheet(vec![make_row(
570            1,
571            vec![make_cell("A1", 1, CellTypeTag::Error, Some("#DIV/0!"))],
572        )]);
573        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
574
575        let cell_data_start = HEADER_SIZE + 8;
576        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
577        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
578
579        assert_eq!(cells.len(), 1);
580        assert_eq!(cells[0].1, TYPE_ERROR);
581        assert_eq!(cells[0].2, "#DIV/0!");
582    }
583
584    #[test]
585    fn test_formula_cell() {
586        let sst = SharedStringTable::new();
587        let mut cell = make_cell("A1", 1, CellTypeTag::None, Some("84"));
588        cell.f = Some(Box::new(CellFormula {
589            t: None,
590            reference: None,
591            si: None,
592            value: Some("A2+B2".to_string()),
593        }));
594        let ws = make_worksheet(vec![make_row(1, vec![cell])]);
595        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
596
597        let cell_data_start = HEADER_SIZE + 8;
598        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
599        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
600
601        assert_eq!(cells.len(), 1);
602        assert_eq!(cells[0].1, TYPE_FORMULA);
603        assert_eq!(cells[0].2, "A2+B2");
604    }
605
606    #[test]
607    fn test_inline_string_cell() {
608        let sst = SharedStringTable::new();
609        let mut cell = make_cell("A1", 1, CellTypeTag::InlineString, None);
610        cell.is = Some(Box::new(InlineString {
611            t: Some("Inline Text".to_string()),
612        }));
613        let ws = make_worksheet(vec![make_row(1, vec![cell])]);
614        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
615
616        let cell_data_start = HEADER_SIZE + 8;
617        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
618        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
619
620        assert_eq!(cells.len(), 1);
621        assert_eq!(cells[0].1, TYPE_STRING);
622        assert_eq!(cells[0].2, "Inline Text");
623    }
624
625    #[test]
626    fn test_date_cell() {
627        let sst = SharedStringTable::new();
628        let ws = make_worksheet(vec![make_row(
629            1,
630            vec![make_cell("A1", 1, CellTypeTag::Date, Some("44927.0"))],
631        )]);
632        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
633
634        let cell_data_start = HEADER_SIZE + 8;
635        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
636        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
637
638        assert_eq!(cells.len(), 1);
639        assert_eq!(cells[0].1, TYPE_DATE);
640        let val: f64 = cells[0].2.parse().unwrap();
641        assert!((val - 44927.0).abs() < f64::EPSILON);
642    }
643
644    #[test]
645    fn test_mixed_types_row() {
646        let sst = make_sst(&["Hello"]);
647        let ws = make_worksheet(vec![make_row(
648            1,
649            vec![
650                make_cell("A1", 1, CellTypeTag::None, Some("3.14")),
651                make_cell("B1", 2, CellTypeTag::SharedString, Some("0")),
652                make_cell("C1", 3, CellTypeTag::Boolean, Some("1")),
653            ],
654        )]);
655        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
656
657        let cell_data_start = HEADER_SIZE + 8;
658        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
659        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
660
661        assert_eq!(cells.len(), 3);
662        assert_eq!(cells[0].1, TYPE_NUMBER);
663        assert_eq!(cells[1].1, TYPE_STRING);
664        assert_eq!(cells[1].2, "Hello");
665        assert_eq!(cells[2].1, TYPE_BOOL);
666    }
667
668    #[test]
669    fn test_sparse_rows_with_gaps() {
670        let sst = SharedStringTable::new();
671        let rows = vec![
672            make_row(1, vec![make_cell("A1", 1, CellTypeTag::None, Some("1"))]),
673            make_row(
674                100,
675                vec![make_cell("T100", 20, CellTypeTag::None, Some("2"))],
676            ),
677        ];
678        let ws = make_worksheet(rows);
679        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
680
681        let row_count = read_u32_le(&buf, 6) as usize;
682        assert_eq!(row_count, 100);
683        let col_count = read_u16_le(&buf, 10) as usize;
684        assert_eq!(col_count, 20);
685
686        // Verify row 1 has data
687        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
688        assert_ne!(row1_offset, EMPTY_ROW_OFFSET);
689
690        // Verify row 2 is empty
691        let row2_offset = read_u32_le(&buf, HEADER_SIZE + 8 + 4);
692        assert_eq!(row2_offset, EMPTY_ROW_OFFSET);
693
694        // Verify row 100 has data
695        let row100_offset = read_u32_le(&buf, HEADER_SIZE + 99 * 8 + 4);
696        assert_ne!(row100_offset, EMPTY_ROW_OFFSET);
697    }
698
699    #[test]
700    fn test_row_index_entries() {
701        let sst = SharedStringTable::new();
702        let ws = make_worksheet(vec![
703            make_row(1, vec![make_cell("A1", 1, CellTypeTag::None, Some("1"))]),
704            make_row(3, vec![make_cell("A3", 1, CellTypeTag::None, Some("3"))]),
705        ]);
706        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
707
708        let row_count = read_u32_le(&buf, 6) as usize;
709        assert_eq!(row_count, 3);
710
711        let row1_num = read_u32_le(&buf, HEADER_SIZE);
712        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
713        assert_eq!(row1_num, 1);
714        assert_ne!(row1_offset, EMPTY_ROW_OFFSET);
715
716        let row2_num = read_u32_le(&buf, HEADER_SIZE + 8);
717        let row2_offset = read_u32_le(&buf, HEADER_SIZE + 12);
718        assert_eq!(row2_num, 2);
719        assert_eq!(row2_offset, EMPTY_ROW_OFFSET);
720
721        let row3_num = read_u32_le(&buf, HEADER_SIZE + 16);
722        let row3_offset = read_u32_le(&buf, HEADER_SIZE + 20);
723        assert_eq!(row3_num, 3);
724        assert_ne!(row3_offset, EMPTY_ROW_OFFSET);
725    }
726
727    #[test]
728    fn test_header_format() {
729        let sst = SharedStringTable::new();
730        let ws = make_worksheet(vec![
731            make_row(
732                2,
733                vec![
734                    make_cell("B2", 2, CellTypeTag::None, Some("10")),
735                    make_cell("D2", 4, CellTypeTag::None, Some("20")),
736                ],
737            ),
738            make_row(5, vec![make_cell("C5", 3, CellTypeTag::None, Some("30"))]),
739        ]);
740        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
741
742        assert_eq!(read_u32_le(&buf, 0), MAGIC_V2);
743        assert_eq!(read_u16_le(&buf, 4), VERSION_V2);
744        let row_count = read_u32_le(&buf, 6);
745        assert_eq!(row_count, 4, "rows 2-5 = 4 rows");
746        let col_count = read_u16_le(&buf, 10);
747        assert_eq!(col_count, 3, "cols B-D = 3 columns");
748    }
749
750    #[test]
751    fn test_no_string_table_section() {
752        let sst = make_sst(&["Alpha", "Beta"]);
753        let ws = make_worksheet(vec![make_row(
754            1,
755            vec![
756                make_cell("A1", 1, CellTypeTag::SharedString, Some("0")),
757                make_cell("B1", 2, CellTypeTag::SharedString, Some("1")),
758            ],
759        )]);
760        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
761
762        // v2 has no string table section: after header + row_index comes cell data directly
763        let cell_data_start = HEADER_SIZE + 8; // 1 row * 8 bytes
764        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
765        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
766
767        assert_eq!(cells.len(), 2);
768        assert_eq!(cells[0].1, TYPE_STRING);
769        assert_eq!(cells[0].2, "Alpha");
770        assert_eq!(cells[1].1, TYPE_STRING);
771        assert_eq!(cells[1].2, "Beta");
772    }
773
774    #[test]
775    fn test_large_string_inline() {
776        let sst = SharedStringTable::new();
777        let long_str = "A".repeat(10000);
778        let ws = make_worksheet(vec![make_row(
779            1,
780            vec![make_cell(
781                "A1",
782                1,
783                CellTypeTag::InlineString,
784                Some(&long_str),
785            )],
786        )]);
787        // InlineString uses `is` field; set it via the cell is field
788        // Actually InlineString with v.as_deref() fallback will use `v` if `is` is None
789        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
790
791        let cell_data_start = HEADER_SIZE + 8;
792        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
793        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
794
795        assert_eq!(cells.len(), 1);
796        assert_eq!(cells[0].1, TYPE_STRING);
797        assert_eq!(cells[0].2, long_str);
798    }
799
800    #[test]
801    fn test_formula_string_type() {
802        let sst = SharedStringTable::new();
803        let ws = make_worksheet(vec![make_row(
804            1,
805            vec![make_cell(
806                "A1",
807                1,
808                CellTypeTag::FormulaString,
809                Some("computed"),
810            )],
811        )]);
812        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
813
814        let cell_data_start = HEADER_SIZE + 8;
815        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
816        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
817
818        assert_eq!(cells[0].1, TYPE_STRING);
819        assert_eq!(cells[0].2, "computed");
820    }
821
822    #[test]
823    fn test_rich_string_from_sst() {
824        use crate::rich_text::RichTextRun;
825
826        let mut sst = SharedStringTable::new();
827        sst.add("plain");
828        sst.add_rich_text(&[
829            RichTextRun {
830                text: "Bold".to_string(),
831                font: None,
832                size: None,
833                bold: true,
834                italic: false,
835                color: None,
836            },
837            RichTextRun {
838                text: " Normal".to_string(),
839                font: None,
840                size: None,
841                bold: false,
842                italic: false,
843                color: None,
844            },
845        ]);
846
847        let ws = make_worksheet(vec![make_row(
848            1,
849            vec![make_cell("A1", 1, CellTypeTag::SharedString, Some("1"))],
850        )]);
851        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
852
853        let cell_data_start = HEADER_SIZE + 8;
854        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
855        let cells = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
856
857        assert_eq!(cells[0].1, TYPE_RICH_STRING);
858        assert_eq!(cells[0].2, "Bold Normal");
859    }
860
861    #[test]
862    fn test_multiple_rows_multiple_types() {
863        let sst = make_sst(&["Hello", "World"]);
864        let ws = make_worksheet(vec![
865            make_row(
866                1,
867                vec![
868                    make_cell("A1", 1, CellTypeTag::None, Some("42")),
869                    make_cell("B1", 2, CellTypeTag::SharedString, Some("0")),
870                ],
871            ),
872            make_row(
873                2,
874                vec![
875                    make_cell("A2", 1, CellTypeTag::Boolean, Some("1")),
876                    make_cell("B2", 2, CellTypeTag::Error, Some("#N/A")),
877                ],
878            ),
879        ]);
880        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
881
882        let row_count = read_u32_le(&buf, 6) as usize;
883        assert_eq!(row_count, 2);
884
885        let cell_data_start = HEADER_SIZE + row_count * 8;
886
887        let row1_offset = read_u32_le(&buf, HEADER_SIZE + 4);
888        let cells1 = read_v2_row_cells(&buf, cell_data_start + row1_offset as usize);
889        assert_eq!(cells1.len(), 2);
890        assert_eq!(cells1[0].1, TYPE_NUMBER);
891        assert_eq!(cells1[1].1, TYPE_STRING);
892        assert_eq!(cells1[1].2, "Hello");
893
894        let row2_offset = read_u32_le(&buf, HEADER_SIZE + 8 + 4);
895        let cells2 = read_v2_row_cells(&buf, cell_data_start + row2_offset as usize);
896        assert_eq!(cells2.len(), 2);
897        assert_eq!(cells2[0].1, TYPE_BOOL);
898        assert_eq!(cells2[1].1, TYPE_ERROR);
899        assert_eq!(cells2[1].2, "#N/A");
900    }
901
902    #[test]
903    fn test_cell_without_col_uses_ref_parsing() {
904        let sst = SharedStringTable::new();
905        let mut cell = make_cell("C5", 0, CellTypeTag::None, Some("42"));
906        cell.col = 0;
907        let ws = make_worksheet(vec![make_row(5, vec![cell])]);
908        let buf = sheet_to_raw_buffer_v2(&ws, &sst).unwrap();
909
910        let row_count = read_u32_le(&buf, 6);
911        assert_eq!(row_count, 1);
912        let col_count = read_u16_le(&buf, 10);
913        assert_eq!(col_count, 1);
914    }
915}