sheetkit_core/
stream_reader.rs

1//! Forward-only streaming worksheet reader.
2//!
3//! [`SheetStreamReader`] reads worksheet XML row-by-row using event-driven
4//! parsing (`quick_xml::Reader`) without materializing the full DOM. This
5//! enables processing large worksheets with bounded memory by reading rows
6//! in batches.
7//!
8//! Shared string indices are resolved through a reference to the workbook's
9//! [`SharedStringTable`]. Cell types (string, number, boolean, date, formula,
10//! error, inline string) are handled according to the OOXML specification.
11
12use std::io::BufRead;
13
14use quick_xml::events::Event;
15use quick_xml::name::QName;
16
17use crate::cell::CellValue;
18use crate::error::{Error, Result};
19use crate::sst::SharedStringTable;
20use crate::utils::cell_ref::cell_name_to_coordinates;
21
22/// A single row produced by the streaming reader.
23#[derive(Debug, Clone)]
24pub struct StreamRow {
25    /// 1-based row number.
26    pub row_number: u32,
27    /// Cells in this row as (1-based column index, value) pairs.
28    pub cells: Vec<(u32, CellValue)>,
29}
30
31/// Forward-only streaming reader for worksheet XML.
32///
33/// Reads rows in batches without deserializing the entire worksheet into
34/// memory. The reader borrows the shared string table for resolving string
35/// cell references.
36pub struct SheetStreamReader<'a, R: BufRead> {
37    reader: quick_xml::Reader<R>,
38    sst: &'a SharedStringTable,
39    done: bool,
40    row_limit: Option<u32>,
41    rows_emitted: u32,
42}
43
44impl<'a, R: BufRead> SheetStreamReader<'a, R> {
45    /// Create a new streaming reader over the given `BufRead` source.
46    ///
47    /// `sst` is a reference to the shared string table for resolving
48    /// shared string cell values. `row_limit` optionally caps the number
49    /// of rows returned.
50    pub fn new(source: R, sst: &'a SharedStringTable, row_limit: Option<u32>) -> Self {
51        let mut reader = quick_xml::Reader::from_reader(source);
52        reader.config_mut().trim_text(false);
53        Self {
54            reader,
55            sst,
56            done: false,
57            row_limit,
58            rows_emitted: 0,
59        }
60    }
61
62    /// Read the next batch of rows. Returns an empty `Vec` when there are no
63    /// more rows to read.
64    pub fn next_batch(&mut self, batch_size: usize) -> Result<Vec<StreamRow>> {
65        if self.done {
66            return Ok(Vec::new());
67        }
68
69        let mut rows = Vec::with_capacity(batch_size);
70        let mut buf = Vec::with_capacity(4096);
71
72        loop {
73            if rows.len() >= batch_size {
74                break;
75            }
76            if let Some(limit) = self.row_limit {
77                if self.rows_emitted >= limit {
78                    self.done = true;
79                    break;
80                }
81            }
82
83            buf.clear();
84            match self
85                .reader
86                .read_event_into(&mut buf)
87                .map_err(|e| Error::XmlParse(e.to_string()))?
88            {
89                Event::Start(ref e) if e.name() == QName(b"row") => {
90                    let row_number = extract_row_number(e)?;
91                    let row = self.parse_row_body(row_number)?;
92                    self.rows_emitted += 1;
93                    if !row.cells.is_empty() {
94                        rows.push(row);
95                    }
96                }
97                Event::Eof => {
98                    self.done = true;
99                    break;
100                }
101                _ => {}
102            }
103        }
104
105        Ok(rows)
106    }
107
108    /// Returns `true` if there are potentially more rows to read.
109    pub fn has_more(&self) -> bool {
110        !self.done
111    }
112
113    /// Close the reader and release resources.
114    pub fn close(self) {
115        drop(self);
116    }
117
118    /// Parse the body of a `<row>` element (its child `<c>` elements) after
119    /// the row number has been extracted from the opening tag.
120    fn parse_row_body(&mut self, row_number: u32) -> Result<StreamRow> {
121        let mut cells = Vec::new();
122        let mut buf = Vec::with_capacity(1024);
123
124        loop {
125            buf.clear();
126            match self
127                .reader
128                .read_event_into(&mut buf)
129                .map_err(|e| Error::XmlParse(e.to_string()))?
130            {
131                Event::Start(ref e) if e.name() == QName(b"c") => {
132                    let (col, cell_type) = extract_cell_attrs(e)?;
133                    if let Some(col) = col {
134                        let cv = self.parse_cell_body(cell_type.as_deref())?;
135                        cells.push((col, cv));
136                    } else {
137                        self.skip_to_end_of(b"c")?;
138                    }
139                }
140                Event::Empty(ref e) if e.name() == QName(b"c") => {
141                    let (col, cell_type) = extract_cell_attrs(e)?;
142                    if let Some(col) = col {
143                        let cv =
144                            resolve_cell_value(self.sst, cell_type.as_deref(), None, None, None)?;
145                        cells.push((col, cv));
146                    }
147                }
148                Event::End(ref e) if e.name() == QName(b"row") => break,
149                Event::Eof => {
150                    self.done = true;
151                    break;
152                }
153                _ => {}
154            }
155        }
156
157        Ok(StreamRow { row_number, cells })
158    }
159
160    /// Parse the body of a `<c>` element (its child `<v>`, `<f>`, `<is>`
161    /// elements) after the cell attributes have been extracted.
162    fn parse_cell_body(&mut self, cell_type: Option<&str>) -> Result<CellValue> {
163        let mut value_text: Option<String> = None;
164        let mut formula_text: Option<String> = None;
165        let mut inline_string: Option<String> = None;
166        let mut buf = Vec::with_capacity(512);
167        let mut in_is = false;
168
169        loop {
170            buf.clear();
171            match self
172                .reader
173                .read_event_into(&mut buf)
174                .map_err(|e| Error::XmlParse(e.to_string()))?
175            {
176                Event::Start(ref e) => {
177                    let local = e.local_name();
178                    if local.as_ref() == b"v" {
179                        value_text = Some(self.read_text_content(b"v")?);
180                    } else if local.as_ref() == b"f" {
181                        formula_text = Some(self.read_text_content(b"f")?);
182                    } else if local.as_ref() == b"is" {
183                        in_is = true;
184                        inline_string = Some(String::new());
185                    } else if local.as_ref() == b"t" && in_is {
186                        let t = self.read_text_content(b"t")?;
187                        if let Some(ref mut is) = inline_string {
188                            is.push_str(&t);
189                        }
190                    }
191                }
192                Event::End(ref e) => {
193                    let local = e.local_name();
194                    if local.as_ref() == b"c" {
195                        break;
196                    }
197                    if local.as_ref() == b"is" {
198                        in_is = false;
199                    }
200                }
201                Event::Eof => {
202                    self.done = true;
203                    break;
204                }
205                _ => {}
206            }
207        }
208
209        resolve_cell_value(
210            self.sst,
211            cell_type,
212            value_text.as_deref(),
213            formula_text,
214            inline_string,
215        )
216    }
217
218    /// Read text content between an opening tag and its matching closing tag.
219    fn read_text_content(&mut self, end_tag: &[u8]) -> Result<String> {
220        let mut text = String::new();
221        let mut buf = Vec::with_capacity(256);
222        loop {
223            buf.clear();
224            match self
225                .reader
226                .read_event_into(&mut buf)
227                .map_err(|e| Error::XmlParse(e.to_string()))?
228            {
229                Event::Text(ref e) => {
230                    let decoded = e.unescape().map_err(|e| Error::XmlParse(e.to_string()))?;
231                    text.push_str(&decoded);
232                }
233                Event::End(ref e) if e.local_name().as_ref() == end_tag => break,
234                Event::Eof => {
235                    self.done = true;
236                    break;
237                }
238                _ => {}
239            }
240        }
241        Ok(text)
242    }
243
244    /// Skip all events until the matching end tag for the given element.
245    fn skip_to_end_of(&mut self, tag: &[u8]) -> Result<()> {
246        let mut buf = Vec::with_capacity(256);
247        let mut depth: u32 = 1;
248        loop {
249            buf.clear();
250            match self
251                .reader
252                .read_event_into(&mut buf)
253                .map_err(|e| Error::XmlParse(e.to_string()))?
254            {
255                Event::Start(ref e) if e.local_name().as_ref() == tag => {
256                    depth += 1;
257                }
258                Event::End(ref e) if e.local_name().as_ref() == tag => {
259                    depth -= 1;
260                    if depth == 0 {
261                        break;
262                    }
263                }
264                Event::Eof => {
265                    self.done = true;
266                    break;
267                }
268                _ => {}
269            }
270        }
271        Ok(())
272    }
273}
274
275/// Owning variant of [`SheetStreamReader`] for use in FFI contexts where
276/// lifetime parameters are not supported (e.g., napi classes).
277///
278/// Stores its own copy of the shared string table and the XML byte source,
279/// avoiding any borrowed references. The parsing logic delegates to the same
280/// free functions used by the borrowed reader.
281pub struct OwnedSheetStreamReader {
282    reader: quick_xml::Reader<std::io::BufReader<std::io::Cursor<Vec<u8>>>>,
283    sst: SharedStringTable,
284    done: bool,
285    row_limit: Option<u32>,
286    rows_emitted: u32,
287}
288
289impl OwnedSheetStreamReader {
290    /// Create a new owned streaming reader.
291    ///
292    /// `xml_bytes` is the raw worksheet XML. `sst` is a read-only clone of
293    /// the shared string table. `row_limit` optionally caps the total number
294    /// of rows returned.
295    pub fn new(xml_bytes: Vec<u8>, sst: SharedStringTable, row_limit: Option<u32>) -> Self {
296        let cursor = std::io::Cursor::new(xml_bytes);
297        let buf_reader = std::io::BufReader::new(cursor);
298        let mut reader = quick_xml::Reader::from_reader(buf_reader);
299        reader.config_mut().trim_text(false);
300        Self {
301            reader,
302            sst,
303            done: false,
304            row_limit,
305            rows_emitted: 0,
306        }
307    }
308
309    /// Read the next batch of rows. Returns an empty `Vec` when there are no
310    /// more rows to read.
311    pub fn next_batch(&mut self, batch_size: usize) -> Result<Vec<StreamRow>> {
312        if self.done {
313            return Ok(Vec::new());
314        }
315
316        let mut rows = Vec::with_capacity(batch_size);
317        let mut buf = Vec::with_capacity(4096);
318
319        loop {
320            if rows.len() >= batch_size {
321                break;
322            }
323            if let Some(limit) = self.row_limit {
324                if self.rows_emitted >= limit {
325                    self.done = true;
326                    break;
327                }
328            }
329
330            buf.clear();
331            match self
332                .reader
333                .read_event_into(&mut buf)
334                .map_err(|e| Error::XmlParse(e.to_string()))?
335            {
336                Event::Start(ref e) if e.name() == QName(b"row") => {
337                    let row_number = extract_row_number(e)?;
338                    let row = self.parse_row_body(row_number)?;
339                    self.rows_emitted += 1;
340                    if !row.cells.is_empty() {
341                        rows.push(row);
342                    }
343                }
344                Event::Eof => {
345                    self.done = true;
346                    break;
347                }
348                _ => {}
349            }
350        }
351
352        Ok(rows)
353    }
354
355    /// Returns `true` if there are potentially more rows to read.
356    pub fn has_more(&self) -> bool {
357        !self.done
358    }
359
360    /// Close the reader and release resources.
361    pub fn close(self) {
362        drop(self);
363    }
364
365    fn parse_row_body(&mut self, row_number: u32) -> Result<StreamRow> {
366        let mut cells = Vec::new();
367        let mut buf = Vec::with_capacity(1024);
368
369        loop {
370            buf.clear();
371            match self
372                .reader
373                .read_event_into(&mut buf)
374                .map_err(|e| Error::XmlParse(e.to_string()))?
375            {
376                Event::Start(ref e) if e.name() == QName(b"c") => {
377                    let (col, cell_type) = extract_cell_attrs(e)?;
378                    if let Some(col) = col {
379                        let cv = self.parse_cell_body(cell_type.as_deref())?;
380                        cells.push((col, cv));
381                    } else {
382                        self.skip_to_end_of(b"c")?;
383                    }
384                }
385                Event::Empty(ref e) if e.name() == QName(b"c") => {
386                    let (col, cell_type) = extract_cell_attrs(e)?;
387                    if let Some(col) = col {
388                        let cv =
389                            resolve_cell_value(&self.sst, cell_type.as_deref(), None, None, None)?;
390                        cells.push((col, cv));
391                    }
392                }
393                Event::End(ref e) if e.name() == QName(b"row") => break,
394                Event::Eof => {
395                    self.done = true;
396                    break;
397                }
398                _ => {}
399            }
400        }
401
402        Ok(StreamRow { row_number, cells })
403    }
404
405    fn parse_cell_body(&mut self, cell_type: Option<&str>) -> Result<CellValue> {
406        let mut value_text: Option<String> = None;
407        let mut formula_text: Option<String> = None;
408        let mut inline_string: Option<String> = None;
409        let mut buf = Vec::with_capacity(512);
410        let mut in_is = false;
411
412        loop {
413            buf.clear();
414            match self
415                .reader
416                .read_event_into(&mut buf)
417                .map_err(|e| Error::XmlParse(e.to_string()))?
418            {
419                Event::Start(ref e) => {
420                    let local = e.local_name();
421                    if local.as_ref() == b"v" {
422                        value_text = Some(self.read_text_content(b"v")?);
423                    } else if local.as_ref() == b"f" {
424                        formula_text = Some(self.read_text_content(b"f")?);
425                    } else if local.as_ref() == b"is" {
426                        in_is = true;
427                        inline_string = Some(String::new());
428                    } else if local.as_ref() == b"t" && in_is {
429                        let t = self.read_text_content(b"t")?;
430                        if let Some(ref mut is) = inline_string {
431                            is.push_str(&t);
432                        }
433                    }
434                }
435                Event::End(ref e) => {
436                    let local = e.local_name();
437                    if local.as_ref() == b"c" {
438                        break;
439                    }
440                    if local.as_ref() == b"is" {
441                        in_is = false;
442                    }
443                }
444                Event::Eof => {
445                    self.done = true;
446                    break;
447                }
448                _ => {}
449            }
450        }
451
452        resolve_cell_value(
453            &self.sst,
454            cell_type,
455            value_text.as_deref(),
456            formula_text,
457            inline_string,
458        )
459    }
460
461    fn read_text_content(&mut self, end_tag: &[u8]) -> Result<String> {
462        let mut text = String::new();
463        let mut buf = Vec::with_capacity(256);
464        loop {
465            buf.clear();
466            match self
467                .reader
468                .read_event_into(&mut buf)
469                .map_err(|e| Error::XmlParse(e.to_string()))?
470            {
471                Event::Text(ref e) => {
472                    let decoded = e.unescape().map_err(|e| Error::XmlParse(e.to_string()))?;
473                    text.push_str(&decoded);
474                }
475                Event::End(ref e) if e.local_name().as_ref() == end_tag => break,
476                Event::Eof => {
477                    self.done = true;
478                    break;
479                }
480                _ => {}
481            }
482        }
483        Ok(text)
484    }
485
486    fn skip_to_end_of(&mut self, tag: &[u8]) -> Result<()> {
487        let mut buf = Vec::with_capacity(256);
488        let mut depth: u32 = 1;
489        loop {
490            buf.clear();
491            match self
492                .reader
493                .read_event_into(&mut buf)
494                .map_err(|e| Error::XmlParse(e.to_string()))?
495            {
496                Event::Start(ref e) if e.local_name().as_ref() == tag => {
497                    depth += 1;
498                }
499                Event::End(ref e) if e.local_name().as_ref() == tag => {
500                    depth -= 1;
501                    if depth == 0 {
502                        break;
503                    }
504                }
505                Event::Eof => {
506                    self.done = true;
507                    break;
508                }
509                _ => {}
510            }
511        }
512        Ok(())
513    }
514}
515
516/// Extract the `r` (row number) attribute from a `<row>` element.
517fn extract_row_number(start: &quick_xml::events::BytesStart<'_>) -> Result<u32> {
518    for attr in start.attributes().flatten() {
519        if attr.key == QName(b"r") {
520            let val =
521                std::str::from_utf8(&attr.value).map_err(|e| Error::XmlParse(e.to_string()))?;
522            return val
523                .parse::<u32>()
524                .map_err(|e| Error::XmlParse(format!("invalid row number: {e}")));
525        }
526    }
527    Err(Error::XmlParse(
528        "row element missing r attribute".to_string(),
529    ))
530}
531
532/// Extract the cell reference (column index) and type attribute from a `<c>` element.
533fn extract_cell_attrs(
534    start: &quick_xml::events::BytesStart<'_>,
535) -> Result<(Option<u32>, Option<String>)> {
536    let mut cell_ref: Option<String> = None;
537    let mut cell_type: Option<String> = None;
538
539    for attr in start.attributes().flatten() {
540        match attr.key {
541            QName(b"r") => {
542                cell_ref = Some(
543                    std::str::from_utf8(&attr.value)
544                        .map_err(|e| Error::XmlParse(e.to_string()))?
545                        .to_string(),
546                );
547            }
548            QName(b"t") => {
549                cell_type = Some(
550                    std::str::from_utf8(&attr.value)
551                        .map_err(|e| Error::XmlParse(e.to_string()))?
552                        .to_string(),
553                );
554            }
555            _ => {}
556        }
557    }
558
559    let col = match &cell_ref {
560        Some(r) => Some(cell_name_to_coordinates(r)?.0),
561        None => None,
562    };
563
564    Ok((col, cell_type))
565}
566
567/// Resolve cell type, value text, formula, and inline string into a `CellValue`.
568fn resolve_cell_value(
569    sst: &SharedStringTable,
570    cell_type: Option<&str>,
571    value_text: Option<&str>,
572    formula_text: Option<String>,
573    inline_string: Option<String>,
574) -> Result<CellValue> {
575    if let Some(formula) = formula_text {
576        let cached = match (cell_type, value_text) {
577            (Some("b"), Some(v)) => Some(Box::new(CellValue::Bool(v == "1"))),
578            (Some("e"), Some(v)) => Some(Box::new(CellValue::Error(v.to_string()))),
579            (Some("str"), Some(v)) => Some(Box::new(CellValue::String(v.to_string()))),
580            (_, Some(v)) => v
581                .parse::<f64>()
582                .ok()
583                .map(|n| Box::new(CellValue::Number(n))),
584            _ => None,
585        };
586        return Ok(CellValue::Formula {
587            expr: formula,
588            result: cached,
589        });
590    }
591
592    match (cell_type, value_text) {
593        (Some("s"), Some(v)) => {
594            let idx: usize = v
595                .parse()
596                .map_err(|_| Error::Internal(format!("invalid SST index: {v}")))?;
597            let s = sst
598                .get(idx)
599                .ok_or_else(|| Error::Internal(format!("SST index {idx} out of bounds")))?;
600            Ok(CellValue::String(s.to_string()))
601        }
602        (Some("b"), Some(v)) => Ok(CellValue::Bool(v == "1")),
603        (Some("e"), Some(v)) => Ok(CellValue::Error(v.to_string())),
604        (Some("inlineStr"), _) => Ok(CellValue::String(inline_string.unwrap_or_default())),
605        (Some("str"), Some(v)) => Ok(CellValue::String(v.to_string())),
606        (Some("d"), Some(v)) => {
607            let n: f64 = v
608                .parse()
609                .map_err(|_| Error::Internal(format!("invalid date value: {v}")))?;
610            Ok(CellValue::Date(n))
611        }
612        (Some("n") | None, Some(v)) => {
613            let n: f64 = v
614                .parse()
615                .map_err(|_| Error::Internal(format!("invalid number: {v}")))?;
616            Ok(CellValue::Number(n))
617        }
618        _ => Ok(CellValue::Empty),
619    }
620}
621
622#[cfg(test)]
623mod tests {
624    use super::*;
625    use std::io::Cursor;
626
627    fn make_sst(strings: &[&str]) -> SharedStringTable {
628        let mut sst = SharedStringTable::new();
629        for s in strings {
630            sst.add(s);
631        }
632        sst
633    }
634
635    fn worksheet_xml(sheet_data: &str) -> String {
636        format!(
637            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
638<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
639<sheetData>
640{sheet_data}
641</sheetData>
642</worksheet>"#
643        )
644    }
645
646    fn read_all(xml: &str, sst: &SharedStringTable, row_limit: Option<u32>) -> Vec<StreamRow> {
647        let cursor = Cursor::new(xml.as_bytes().to_vec());
648        let mut reader = SheetStreamReader::new(cursor, sst, row_limit);
649        let mut all = Vec::new();
650        loop {
651            let batch = reader.next_batch(100).unwrap();
652            if batch.is_empty() {
653                break;
654            }
655            all.extend(batch);
656        }
657        all
658    }
659
660    #[test]
661    fn test_basic_batch_reading() {
662        let sst = make_sst(&["Name", "Age"]);
663        let xml = worksheet_xml(
664            r#"
665<row r="1"><c r="A1" t="s"><v>0</v></c><c r="B1" t="s"><v>1</v></c></row>
666<row r="2"><c r="A2" t="s"><v>0</v></c><c r="B2"><v>30</v></c></row>
667<row r="3"><c r="A3" t="s"><v>0</v></c><c r="B3"><v>25</v></c></row>
668"#,
669        );
670
671        let cursor = Cursor::new(xml.as_bytes().to_vec());
672        let mut reader = SheetStreamReader::new(cursor, &sst, None);
673
674        let batch1 = reader.next_batch(2).unwrap();
675        assert_eq!(batch1.len(), 2);
676        assert!(reader.has_more());
677
678        let batch2 = reader.next_batch(2).unwrap();
679        assert_eq!(batch2.len(), 1);
680
681        let batch3 = reader.next_batch(2).unwrap();
682        assert!(batch3.is_empty());
683        assert!(!reader.has_more());
684    }
685
686    #[test]
687    fn test_sparse_rows() {
688        let sst = SharedStringTable::new();
689        let xml = worksheet_xml(
690            r#"
691<row r="1"><c r="A1"><v>1</v></c></row>
692<row r="5"><c r="C5"><v>5</v></c></row>
693<row r="100"><c r="A100"><v>100</v></c></row>
694"#,
695        );
696
697        let rows = read_all(&xml, &sst, None);
698        assert_eq!(rows.len(), 3);
699        assert_eq!(rows[0].row_number, 1);
700        assert_eq!(rows[1].row_number, 5);
701        assert_eq!(rows[1].cells[0].0, 3);
702        assert_eq!(rows[2].row_number, 100);
703    }
704
705    #[test]
706    fn test_all_cell_types() {
707        let sst = make_sst(&["Hello"]);
708        let xml = worksheet_xml(
709            r#"
710<row r="1">
711  <c r="A1" t="s"><v>0</v></c>
712  <c r="B1"><v>42.5</v></c>
713  <c r="C1" t="b"><v>1</v></c>
714  <c r="D1" t="e"><v>#DIV/0!</v></c>
715  <c r="E1" t="inlineStr"><is><t>Inline</t></is></c>
716  <c r="F1" t="n"><v>99</v></c>
717  <c r="G1" t="d"><v>45000</v></c>
718</row>
719"#,
720        );
721
722        let rows = read_all(&xml, &sst, None);
723        assert_eq!(rows.len(), 1);
724        let cells = &rows[0].cells;
725
726        assert_eq!(cells[0], (1, CellValue::String("Hello".to_string())));
727        assert_eq!(cells[1], (2, CellValue::Number(42.5)));
728        assert_eq!(cells[2], (3, CellValue::Bool(true)));
729        assert_eq!(cells[3], (4, CellValue::Error("#DIV/0!".to_string())));
730        assert_eq!(cells[4], (5, CellValue::String("Inline".to_string())));
731        assert_eq!(cells[5], (6, CellValue::Number(99.0)));
732        assert_eq!(cells[6], (7, CellValue::Date(45000.0)));
733    }
734
735    #[test]
736    fn test_boolean_false() {
737        let sst = SharedStringTable::new();
738        let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="b"><v>0</v></c></row>"#);
739        let rows = read_all(&xml, &sst, None);
740        assert_eq!(rows[0].cells[0].1, CellValue::Bool(false));
741    }
742
743    #[test]
744    fn test_shared_string_resolution() {
745        let sst = make_sst(&["First", "Second", "Third"]);
746        let xml = worksheet_xml(
747            r#"
748<row r="1">
749  <c r="A1" t="s"><v>0</v></c>
750  <c r="B1" t="s"><v>1</v></c>
751  <c r="C1" t="s"><v>2</v></c>
752</row>
753"#,
754        );
755
756        let rows = read_all(&xml, &sst, None);
757        assert_eq!(rows[0].cells[0].1, CellValue::String("First".to_string()));
758        assert_eq!(rows[0].cells[1].1, CellValue::String("Second".to_string()));
759        assert_eq!(rows[0].cells[2].1, CellValue::String("Third".to_string()));
760    }
761
762    #[test]
763    fn test_shared_string_out_of_bounds() {
764        let sst = make_sst(&["Only"]);
765        let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="s"><v>999</v></c></row>"#);
766
767        let cursor = Cursor::new(xml.as_bytes().to_vec());
768        let mut reader = SheetStreamReader::new(cursor, &sst, None);
769        let result = reader.next_batch(10);
770        assert!(result.is_err());
771    }
772
773    #[test]
774    fn test_row_limit() {
775        let sst = SharedStringTable::new();
776        let xml = worksheet_xml(
777            r#"
778<row r="1"><c r="A1"><v>1</v></c></row>
779<row r="2"><c r="A2"><v>2</v></c></row>
780<row r="3"><c r="A3"><v>3</v></c></row>
781<row r="4"><c r="A4"><v>4</v></c></row>
782<row r="5"><c r="A5"><v>5</v></c></row>
783"#,
784        );
785
786        let rows = read_all(&xml, &sst, Some(3));
787        assert_eq!(rows.len(), 3);
788        assert_eq!(rows[0].row_number, 1);
789        assert_eq!(rows[2].row_number, 3);
790    }
791
792    #[test]
793    fn test_row_limit_zero() {
794        let sst = SharedStringTable::new();
795        let xml = worksheet_xml(r#"<row r="1"><c r="A1"><v>1</v></c></row>"#);
796
797        let rows = read_all(&xml, &sst, Some(0));
798        assert!(rows.is_empty());
799    }
800
801    #[test]
802    fn test_empty_sheet() {
803        let sst = SharedStringTable::new();
804        let xml = worksheet_xml("");
805
806        let rows = read_all(&xml, &sst, None);
807        assert!(rows.is_empty());
808    }
809
810    #[test]
811    fn test_empty_rows_are_skipped() {
812        let sst = SharedStringTable::new();
813        let xml = worksheet_xml(
814            r#"
815<row r="1"></row>
816<row r="2"><c r="A2"><v>42</v></c></row>
817<row r="3"></row>
818"#,
819        );
820
821        let rows = read_all(&xml, &sst, None);
822        assert_eq!(rows.len(), 1);
823        assert_eq!(rows[0].row_number, 2);
824    }
825
826    #[test]
827    fn test_empty_rows_count_against_limit() {
828        let sst = SharedStringTable::new();
829        let xml = worksheet_xml(
830            r#"
831<row r="1"></row>
832<row r="2"></row>
833<row r="3"><c r="A3"><v>3</v></c></row>
834<row r="4"><c r="A4"><v>4</v></c></row>
835"#,
836        );
837
838        let rows = read_all(&xml, &sst, Some(2));
839        assert!(
840            rows.is_empty(),
841            "with limit=2 and 2 empty rows, no data rows should be returned"
842        );
843
844        let rows2 = read_all(&xml, &sst, Some(3));
845        assert_eq!(rows2.len(), 1);
846        assert_eq!(rows2[0].row_number, 3);
847    }
848
849    #[test]
850    fn test_formula_with_cached_number() {
851        let sst = SharedStringTable::new();
852        let xml = worksheet_xml(r#"<row r="1"><c r="A1"><f>SUM(B1:B10)</f><v>42</v></c></row>"#);
853
854        let rows = read_all(&xml, &sst, None);
855        match &rows[0].cells[0].1 {
856            CellValue::Formula { expr, result } => {
857                assert_eq!(expr, "SUM(B1:B10)");
858                assert_eq!(result.as_deref(), Some(&CellValue::Number(42.0)));
859            }
860            other => panic!("expected Formula, got {:?}", other),
861        }
862    }
863
864    #[test]
865    fn test_formula_with_cached_string() {
866        let sst = SharedStringTable::new();
867        let xml = worksheet_xml(
868            r#"<row r="1"><c r="A1" t="str"><f>CONCAT("a","b")</f><v>ab</v></c></row>"#,
869        );
870
871        let rows = read_all(&xml, &sst, None);
872        match &rows[0].cells[0].1 {
873            CellValue::Formula { expr, result } => {
874                assert_eq!(expr, r#"CONCAT("a","b")"#);
875                assert_eq!(
876                    result.as_deref(),
877                    Some(&CellValue::String("ab".to_string()))
878                );
879            }
880            other => panic!("expected Formula, got {:?}", other),
881        }
882    }
883
884    #[test]
885    fn test_formula_with_cached_boolean() {
886        let sst = SharedStringTable::new();
887        let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="b"><f>TRUE()</f><v>1</v></c></row>"#);
888
889        let rows = read_all(&xml, &sst, None);
890        match &rows[0].cells[0].1 {
891            CellValue::Formula { expr, result } => {
892                assert_eq!(expr, "TRUE()");
893                assert_eq!(result.as_deref(), Some(&CellValue::Bool(true)));
894            }
895            other => panic!("expected Formula, got {:?}", other),
896        }
897    }
898
899    #[test]
900    fn test_formula_with_cached_error() {
901        let sst = SharedStringTable::new();
902        let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="e"><f>1/0</f><v>#DIV/0!</v></c></row>"#);
903
904        let rows = read_all(&xml, &sst, None);
905        match &rows[0].cells[0].1 {
906            CellValue::Formula { expr, result } => {
907                assert_eq!(expr, "1/0");
908                assert_eq!(
909                    result.as_deref(),
910                    Some(&CellValue::Error("#DIV/0!".to_string()))
911                );
912            }
913            other => panic!("expected Formula, got {:?}", other),
914        }
915    }
916
917    #[test]
918    fn test_formula_without_cached_value() {
919        let sst = SharedStringTable::new();
920        let xml = worksheet_xml(r#"<row r="1"><c r="A1"><f>A2+A3</f></c></row>"#);
921
922        let rows = read_all(&xml, &sst, None);
923        match &rows[0].cells[0].1 {
924            CellValue::Formula { expr, result } => {
925                assert_eq!(expr, "A2+A3");
926                assert!(result.is_none());
927            }
928            other => panic!("expected Formula, got {:?}", other),
929        }
930    }
931
932    #[test]
933    fn test_inline_string_with_rich_text_runs() {
934        let sst = SharedStringTable::new();
935        let xml = worksheet_xml(
936            r#"<row r="1"><c r="A1" t="inlineStr"><is><r><t>Bold</t></r><r><t> Normal</t></r></is></c></row>"#,
937        );
938
939        let rows = read_all(&xml, &sst, None);
940        assert_eq!(
941            rows[0].cells[0].1,
942            CellValue::String("Bold Normal".to_string())
943        );
944    }
945
946    #[test]
947    fn test_reader_close() {
948        let sst = SharedStringTable::new();
949        let xml = worksheet_xml(r#"<row r="1"><c r="A1"><v>1</v></c></row>"#);
950        let cursor = Cursor::new(xml.as_bytes().to_vec());
951        let reader = SheetStreamReader::new(cursor, &sst, None);
952        reader.close();
953    }
954
955    #[test]
956    fn test_reader_drop_without_reading_all() {
957        let sst = SharedStringTable::new();
958        let xml = worksheet_xml(
959            r#"
960<row r="1"><c r="A1"><v>1</v></c></row>
961<row r="2"><c r="A2"><v>2</v></c></row>
962"#,
963        );
964        let cursor = Cursor::new(xml.as_bytes().to_vec());
965        let mut reader = SheetStreamReader::new(cursor, &sst, None);
966        let batch = reader.next_batch(1).unwrap();
967        assert_eq!(batch.len(), 1);
968        drop(reader);
969    }
970
971    #[test]
972    fn test_has_more_transitions() {
973        let sst = SharedStringTable::new();
974        let xml = worksheet_xml(r#"<row r="1"><c r="A1"><v>1</v></c></row>"#);
975
976        let cursor = Cursor::new(xml.as_bytes().to_vec());
977        let mut reader = SheetStreamReader::new(cursor, &sst, None);
978        assert!(reader.has_more());
979
980        let batch = reader.next_batch(100).unwrap();
981        assert_eq!(batch.len(), 1);
982
983        let batch2 = reader.next_batch(100).unwrap();
984        assert!(batch2.is_empty());
985        assert!(!reader.has_more());
986    }
987
988    #[test]
989    fn test_batch_size_one() {
990        let sst = SharedStringTable::new();
991        let xml = worksheet_xml(
992            r#"
993<row r="1"><c r="A1"><v>1</v></c></row>
994<row r="2"><c r="A2"><v>2</v></c></row>
995<row r="3"><c r="A3"><v>3</v></c></row>
996"#,
997        );
998
999        let cursor = Cursor::new(xml.as_bytes().to_vec());
1000        let mut reader = SheetStreamReader::new(cursor, &sst, None);
1001
1002        for expected_row in 1..=3 {
1003            let batch = reader.next_batch(1).unwrap();
1004            assert_eq!(batch.len(), 1);
1005            assert_eq!(batch[0].row_number, expected_row);
1006        }
1007
1008        let batch = reader.next_batch(1).unwrap();
1009        assert!(batch.is_empty());
1010    }
1011
1012    #[test]
1013    fn test_cell_with_no_value() {
1014        let sst = SharedStringTable::new();
1015        let xml = worksheet_xml(r#"<row r="1"><c r="A1"></c><c r="B1"><v>42</v></c></row>"#);
1016
1017        let rows = read_all(&xml, &sst, None);
1018        assert_eq!(rows[0].cells.len(), 2);
1019        assert_eq!(rows[0].cells[0].1, CellValue::Empty);
1020        assert_eq!(rows[0].cells[1].1, CellValue::Number(42.0));
1021    }
1022
1023    #[test]
1024    fn test_self_closing_cell_element() {
1025        let sst = SharedStringTable::new();
1026        let xml = worksheet_xml(
1027            r#"<row r="1"><c r="A1"/><c r="B1"><v>42</v></c><c r="C1" t="b"/></row>"#,
1028        );
1029
1030        let rows = read_all(&xml, &sst, None);
1031        assert_eq!(rows[0].cells.len(), 3);
1032        assert_eq!(rows[0].cells[0], (1, CellValue::Empty));
1033        assert_eq!(rows[0].cells[1], (2, CellValue::Number(42.0)));
1034        assert_eq!(rows[0].cells[2], (3, CellValue::Empty));
1035    }
1036
1037    #[test]
1038    fn test_integration_with_saved_workbook() {
1039        let mut wb = crate::workbook::Workbook::new();
1040        wb.set_cell_value("Sheet1", "A1", "Name").unwrap();
1041        wb.set_cell_value("Sheet1", "B1", "Score").unwrap();
1042        wb.set_cell_value("Sheet1", "A2", "Alice").unwrap();
1043        wb.set_cell_value("Sheet1", "B2", 95.5f64).unwrap();
1044        wb.set_cell_value("Sheet1", "A3", "Bob").unwrap();
1045        wb.set_cell_value("Sheet1", "B3", 87.0f64).unwrap();
1046
1047        let dir = tempfile::TempDir::new().unwrap();
1048        let path = dir.path().join("stream_reader_test.xlsx");
1049        wb.save(&path).unwrap();
1050
1051        let wb2 = crate::workbook::Workbook::open_with_options(
1052            &path,
1053            &crate::workbook::OpenOptions::new().read_mode(crate::workbook::ReadMode::Lazy),
1054        )
1055        .unwrap();
1056
1057        let mut reader = wb2.open_sheet_reader("Sheet1").unwrap();
1058        let rows = reader.next_batch(100).unwrap();
1059
1060        assert_eq!(rows.len(), 3);
1061        assert_eq!(rows[0].row_number, 1);
1062        assert_eq!(rows[0].cells[0].1, CellValue::String("Name".to_string()));
1063        assert_eq!(rows[0].cells[1].1, CellValue::String("Score".to_string()));
1064        assert_eq!(rows[1].cells[0].1, CellValue::String("Alice".to_string()));
1065        assert_eq!(rows[1].cells[1].1, CellValue::Number(95.5));
1066        assert_eq!(rows[2].cells[0].1, CellValue::String("Bob".to_string()));
1067        assert_eq!(rows[2].cells[1].1, CellValue::Number(87.0));
1068    }
1069
1070    #[test]
1071    fn test_integration_with_row_limit() {
1072        let mut wb = crate::workbook::Workbook::new();
1073        for i in 1..=10 {
1074            let cell = format!("A{i}");
1075            wb.set_cell_value("Sheet1", &cell, i as f64).unwrap();
1076        }
1077
1078        let dir = tempfile::TempDir::new().unwrap();
1079        let path = dir.path().join("stream_limit_test.xlsx");
1080        wb.save(&path).unwrap();
1081
1082        let wb2 = crate::workbook::Workbook::open_with_options(
1083            &path,
1084            &crate::workbook::OpenOptions::new()
1085                .read_mode(crate::workbook::ReadMode::Lazy)
1086                .sheet_rows(5),
1087        )
1088        .unwrap();
1089
1090        let mut reader = wb2.open_sheet_reader("Sheet1").unwrap();
1091        let mut all_rows = Vec::new();
1092        loop {
1093            let batch = reader.next_batch(3).unwrap();
1094            if batch.is_empty() {
1095                break;
1096            }
1097            all_rows.extend(batch);
1098        }
1099
1100        assert_eq!(all_rows.len(), 5);
1101        assert_eq!(all_rows[4].row_number, 5);
1102    }
1103
1104    #[test]
1105    fn test_integration_sheet_not_found() {
1106        let wb = crate::workbook::Workbook::new();
1107        let result = wb.open_sheet_reader("NonExistent");
1108        assert!(result.is_err());
1109    }
1110}