1use std::io::BufRead;
13
14use quick_xml::events::Event;
15use quick_xml::name::QName;
16
17use crate::cell::CellValue;
18use crate::error::{Error, Result};
19use crate::sst::SharedStringTable;
20use crate::utils::cell_ref::cell_name_to_coordinates;
21
22#[derive(Debug, Clone)]
24pub struct StreamRow {
25 pub row_number: u32,
27 pub cells: Vec<(u32, CellValue)>,
29}
30
31pub struct SheetStreamReader<'a, R: BufRead> {
37 reader: quick_xml::Reader<R>,
38 sst: &'a SharedStringTable,
39 done: bool,
40 row_limit: Option<u32>,
41 rows_emitted: u32,
42}
43
44impl<'a, R: BufRead> SheetStreamReader<'a, R> {
45 pub fn new(source: R, sst: &'a SharedStringTable, row_limit: Option<u32>) -> Self {
51 let mut reader = quick_xml::Reader::from_reader(source);
52 reader.config_mut().trim_text(false);
53 Self {
54 reader,
55 sst,
56 done: false,
57 row_limit,
58 rows_emitted: 0,
59 }
60 }
61
62 pub fn next_batch(&mut self, batch_size: usize) -> Result<Vec<StreamRow>> {
65 if self.done {
66 return Ok(Vec::new());
67 }
68
69 let mut rows = Vec::with_capacity(batch_size);
70 let mut buf = Vec::with_capacity(4096);
71
72 loop {
73 if rows.len() >= batch_size {
74 break;
75 }
76 if let Some(limit) = self.row_limit {
77 if self.rows_emitted >= limit {
78 self.done = true;
79 break;
80 }
81 }
82
83 buf.clear();
84 match self
85 .reader
86 .read_event_into(&mut buf)
87 .map_err(|e| Error::XmlParse(e.to_string()))?
88 {
89 Event::Start(ref e) if e.name() == QName(b"row") => {
90 let row_number = extract_row_number(e)?;
91 let row = self.parse_row_body(row_number)?;
92 self.rows_emitted += 1;
93 if !row.cells.is_empty() {
94 rows.push(row);
95 }
96 }
97 Event::Eof => {
98 self.done = true;
99 break;
100 }
101 _ => {}
102 }
103 }
104
105 Ok(rows)
106 }
107
108 pub fn has_more(&self) -> bool {
110 !self.done
111 }
112
113 pub fn close(self) {
115 drop(self);
116 }
117
118 fn parse_row_body(&mut self, row_number: u32) -> Result<StreamRow> {
121 let mut cells = Vec::new();
122 let mut buf = Vec::with_capacity(1024);
123
124 loop {
125 buf.clear();
126 match self
127 .reader
128 .read_event_into(&mut buf)
129 .map_err(|e| Error::XmlParse(e.to_string()))?
130 {
131 Event::Start(ref e) if e.name() == QName(b"c") => {
132 let (col, cell_type) = extract_cell_attrs(e)?;
133 if let Some(col) = col {
134 let cv = self.parse_cell_body(cell_type.as_deref())?;
135 cells.push((col, cv));
136 } else {
137 self.skip_to_end_of(b"c")?;
138 }
139 }
140 Event::Empty(ref e) if e.name() == QName(b"c") => {
141 let (col, cell_type) = extract_cell_attrs(e)?;
142 if let Some(col) = col {
143 let cv =
144 resolve_cell_value(self.sst, cell_type.as_deref(), None, None, None)?;
145 cells.push((col, cv));
146 }
147 }
148 Event::End(ref e) if e.name() == QName(b"row") => break,
149 Event::Eof => {
150 self.done = true;
151 break;
152 }
153 _ => {}
154 }
155 }
156
157 Ok(StreamRow { row_number, cells })
158 }
159
160 fn parse_cell_body(&mut self, cell_type: Option<&str>) -> Result<CellValue> {
163 let mut value_text: Option<String> = None;
164 let mut formula_text: Option<String> = None;
165 let mut inline_string: Option<String> = None;
166 let mut buf = Vec::with_capacity(512);
167 let mut in_is = false;
168
169 loop {
170 buf.clear();
171 match self
172 .reader
173 .read_event_into(&mut buf)
174 .map_err(|e| Error::XmlParse(e.to_string()))?
175 {
176 Event::Start(ref e) => {
177 let local = e.local_name();
178 if local.as_ref() == b"v" {
179 value_text = Some(self.read_text_content(b"v")?);
180 } else if local.as_ref() == b"f" {
181 formula_text = Some(self.read_text_content(b"f")?);
182 } else if local.as_ref() == b"is" {
183 in_is = true;
184 inline_string = Some(String::new());
185 } else if local.as_ref() == b"t" && in_is {
186 let t = self.read_text_content(b"t")?;
187 if let Some(ref mut is) = inline_string {
188 is.push_str(&t);
189 }
190 }
191 }
192 Event::End(ref e) => {
193 let local = e.local_name();
194 if local.as_ref() == b"c" {
195 break;
196 }
197 if local.as_ref() == b"is" {
198 in_is = false;
199 }
200 }
201 Event::Eof => {
202 self.done = true;
203 break;
204 }
205 _ => {}
206 }
207 }
208
209 resolve_cell_value(
210 self.sst,
211 cell_type,
212 value_text.as_deref(),
213 formula_text,
214 inline_string,
215 )
216 }
217
218 fn read_text_content(&mut self, end_tag: &[u8]) -> Result<String> {
220 let mut text = String::new();
221 let mut buf = Vec::with_capacity(256);
222 loop {
223 buf.clear();
224 match self
225 .reader
226 .read_event_into(&mut buf)
227 .map_err(|e| Error::XmlParse(e.to_string()))?
228 {
229 Event::Text(ref e) => {
230 let decoded = e.unescape().map_err(|e| Error::XmlParse(e.to_string()))?;
231 text.push_str(&decoded);
232 }
233 Event::End(ref e) if e.local_name().as_ref() == end_tag => break,
234 Event::Eof => {
235 self.done = true;
236 break;
237 }
238 _ => {}
239 }
240 }
241 Ok(text)
242 }
243
244 fn skip_to_end_of(&mut self, tag: &[u8]) -> Result<()> {
246 let mut buf = Vec::with_capacity(256);
247 let mut depth: u32 = 1;
248 loop {
249 buf.clear();
250 match self
251 .reader
252 .read_event_into(&mut buf)
253 .map_err(|e| Error::XmlParse(e.to_string()))?
254 {
255 Event::Start(ref e) if e.local_name().as_ref() == tag => {
256 depth += 1;
257 }
258 Event::End(ref e) if e.local_name().as_ref() == tag => {
259 depth -= 1;
260 if depth == 0 {
261 break;
262 }
263 }
264 Event::Eof => {
265 self.done = true;
266 break;
267 }
268 _ => {}
269 }
270 }
271 Ok(())
272 }
273}
274
275pub struct OwnedSheetStreamReader {
282 reader: quick_xml::Reader<std::io::BufReader<std::io::Cursor<Vec<u8>>>>,
283 sst: SharedStringTable,
284 done: bool,
285 row_limit: Option<u32>,
286 rows_emitted: u32,
287}
288
289impl OwnedSheetStreamReader {
290 pub fn new(xml_bytes: Vec<u8>, sst: SharedStringTable, row_limit: Option<u32>) -> Self {
296 let cursor = std::io::Cursor::new(xml_bytes);
297 let buf_reader = std::io::BufReader::new(cursor);
298 let mut reader = quick_xml::Reader::from_reader(buf_reader);
299 reader.config_mut().trim_text(false);
300 Self {
301 reader,
302 sst,
303 done: false,
304 row_limit,
305 rows_emitted: 0,
306 }
307 }
308
309 pub fn next_batch(&mut self, batch_size: usize) -> Result<Vec<StreamRow>> {
312 if self.done {
313 return Ok(Vec::new());
314 }
315
316 let mut rows = Vec::with_capacity(batch_size);
317 let mut buf = Vec::with_capacity(4096);
318
319 loop {
320 if rows.len() >= batch_size {
321 break;
322 }
323 if let Some(limit) = self.row_limit {
324 if self.rows_emitted >= limit {
325 self.done = true;
326 break;
327 }
328 }
329
330 buf.clear();
331 match self
332 .reader
333 .read_event_into(&mut buf)
334 .map_err(|e| Error::XmlParse(e.to_string()))?
335 {
336 Event::Start(ref e) if e.name() == QName(b"row") => {
337 let row_number = extract_row_number(e)?;
338 let row = self.parse_row_body(row_number)?;
339 self.rows_emitted += 1;
340 if !row.cells.is_empty() {
341 rows.push(row);
342 }
343 }
344 Event::Eof => {
345 self.done = true;
346 break;
347 }
348 _ => {}
349 }
350 }
351
352 Ok(rows)
353 }
354
355 pub fn has_more(&self) -> bool {
357 !self.done
358 }
359
360 pub fn close(self) {
362 drop(self);
363 }
364
365 fn parse_row_body(&mut self, row_number: u32) -> Result<StreamRow> {
366 let mut cells = Vec::new();
367 let mut buf = Vec::with_capacity(1024);
368
369 loop {
370 buf.clear();
371 match self
372 .reader
373 .read_event_into(&mut buf)
374 .map_err(|e| Error::XmlParse(e.to_string()))?
375 {
376 Event::Start(ref e) if e.name() == QName(b"c") => {
377 let (col, cell_type) = extract_cell_attrs(e)?;
378 if let Some(col) = col {
379 let cv = self.parse_cell_body(cell_type.as_deref())?;
380 cells.push((col, cv));
381 } else {
382 self.skip_to_end_of(b"c")?;
383 }
384 }
385 Event::Empty(ref e) if e.name() == QName(b"c") => {
386 let (col, cell_type) = extract_cell_attrs(e)?;
387 if let Some(col) = col {
388 let cv =
389 resolve_cell_value(&self.sst, cell_type.as_deref(), None, None, None)?;
390 cells.push((col, cv));
391 }
392 }
393 Event::End(ref e) if e.name() == QName(b"row") => break,
394 Event::Eof => {
395 self.done = true;
396 break;
397 }
398 _ => {}
399 }
400 }
401
402 Ok(StreamRow { row_number, cells })
403 }
404
405 fn parse_cell_body(&mut self, cell_type: Option<&str>) -> Result<CellValue> {
406 let mut value_text: Option<String> = None;
407 let mut formula_text: Option<String> = None;
408 let mut inline_string: Option<String> = None;
409 let mut buf = Vec::with_capacity(512);
410 let mut in_is = false;
411
412 loop {
413 buf.clear();
414 match self
415 .reader
416 .read_event_into(&mut buf)
417 .map_err(|e| Error::XmlParse(e.to_string()))?
418 {
419 Event::Start(ref e) => {
420 let local = e.local_name();
421 if local.as_ref() == b"v" {
422 value_text = Some(self.read_text_content(b"v")?);
423 } else if local.as_ref() == b"f" {
424 formula_text = Some(self.read_text_content(b"f")?);
425 } else if local.as_ref() == b"is" {
426 in_is = true;
427 inline_string = Some(String::new());
428 } else if local.as_ref() == b"t" && in_is {
429 let t = self.read_text_content(b"t")?;
430 if let Some(ref mut is) = inline_string {
431 is.push_str(&t);
432 }
433 }
434 }
435 Event::End(ref e) => {
436 let local = e.local_name();
437 if local.as_ref() == b"c" {
438 break;
439 }
440 if local.as_ref() == b"is" {
441 in_is = false;
442 }
443 }
444 Event::Eof => {
445 self.done = true;
446 break;
447 }
448 _ => {}
449 }
450 }
451
452 resolve_cell_value(
453 &self.sst,
454 cell_type,
455 value_text.as_deref(),
456 formula_text,
457 inline_string,
458 )
459 }
460
461 fn read_text_content(&mut self, end_tag: &[u8]) -> Result<String> {
462 let mut text = String::new();
463 let mut buf = Vec::with_capacity(256);
464 loop {
465 buf.clear();
466 match self
467 .reader
468 .read_event_into(&mut buf)
469 .map_err(|e| Error::XmlParse(e.to_string()))?
470 {
471 Event::Text(ref e) => {
472 let decoded = e.unescape().map_err(|e| Error::XmlParse(e.to_string()))?;
473 text.push_str(&decoded);
474 }
475 Event::End(ref e) if e.local_name().as_ref() == end_tag => break,
476 Event::Eof => {
477 self.done = true;
478 break;
479 }
480 _ => {}
481 }
482 }
483 Ok(text)
484 }
485
486 fn skip_to_end_of(&mut self, tag: &[u8]) -> Result<()> {
487 let mut buf = Vec::with_capacity(256);
488 let mut depth: u32 = 1;
489 loop {
490 buf.clear();
491 match self
492 .reader
493 .read_event_into(&mut buf)
494 .map_err(|e| Error::XmlParse(e.to_string()))?
495 {
496 Event::Start(ref e) if e.local_name().as_ref() == tag => {
497 depth += 1;
498 }
499 Event::End(ref e) if e.local_name().as_ref() == tag => {
500 depth -= 1;
501 if depth == 0 {
502 break;
503 }
504 }
505 Event::Eof => {
506 self.done = true;
507 break;
508 }
509 _ => {}
510 }
511 }
512 Ok(())
513 }
514}
515
516fn extract_row_number(start: &quick_xml::events::BytesStart<'_>) -> Result<u32> {
518 for attr in start.attributes().flatten() {
519 if attr.key == QName(b"r") {
520 let val =
521 std::str::from_utf8(&attr.value).map_err(|e| Error::XmlParse(e.to_string()))?;
522 return val
523 .parse::<u32>()
524 .map_err(|e| Error::XmlParse(format!("invalid row number: {e}")));
525 }
526 }
527 Err(Error::XmlParse(
528 "row element missing r attribute".to_string(),
529 ))
530}
531
532fn extract_cell_attrs(
534 start: &quick_xml::events::BytesStart<'_>,
535) -> Result<(Option<u32>, Option<String>)> {
536 let mut cell_ref: Option<String> = None;
537 let mut cell_type: Option<String> = None;
538
539 for attr in start.attributes().flatten() {
540 match attr.key {
541 QName(b"r") => {
542 cell_ref = Some(
543 std::str::from_utf8(&attr.value)
544 .map_err(|e| Error::XmlParse(e.to_string()))?
545 .to_string(),
546 );
547 }
548 QName(b"t") => {
549 cell_type = Some(
550 std::str::from_utf8(&attr.value)
551 .map_err(|e| Error::XmlParse(e.to_string()))?
552 .to_string(),
553 );
554 }
555 _ => {}
556 }
557 }
558
559 let col = match &cell_ref {
560 Some(r) => Some(cell_name_to_coordinates(r)?.0),
561 None => None,
562 };
563
564 Ok((col, cell_type))
565}
566
567fn resolve_cell_value(
569 sst: &SharedStringTable,
570 cell_type: Option<&str>,
571 value_text: Option<&str>,
572 formula_text: Option<String>,
573 inline_string: Option<String>,
574) -> Result<CellValue> {
575 if let Some(formula) = formula_text {
576 let cached = match (cell_type, value_text) {
577 (Some("b"), Some(v)) => Some(Box::new(CellValue::Bool(v == "1"))),
578 (Some("e"), Some(v)) => Some(Box::new(CellValue::Error(v.to_string()))),
579 (Some("str"), Some(v)) => Some(Box::new(CellValue::String(v.to_string()))),
580 (_, Some(v)) => v
581 .parse::<f64>()
582 .ok()
583 .map(|n| Box::new(CellValue::Number(n))),
584 _ => None,
585 };
586 return Ok(CellValue::Formula {
587 expr: formula,
588 result: cached,
589 });
590 }
591
592 match (cell_type, value_text) {
593 (Some("s"), Some(v)) => {
594 let idx: usize = v
595 .parse()
596 .map_err(|_| Error::Internal(format!("invalid SST index: {v}")))?;
597 let s = sst
598 .get(idx)
599 .ok_or_else(|| Error::Internal(format!("SST index {idx} out of bounds")))?;
600 Ok(CellValue::String(s.to_string()))
601 }
602 (Some("b"), Some(v)) => Ok(CellValue::Bool(v == "1")),
603 (Some("e"), Some(v)) => Ok(CellValue::Error(v.to_string())),
604 (Some("inlineStr"), _) => Ok(CellValue::String(inline_string.unwrap_or_default())),
605 (Some("str"), Some(v)) => Ok(CellValue::String(v.to_string())),
606 (Some("d"), Some(v)) => {
607 let n: f64 = v
608 .parse()
609 .map_err(|_| Error::Internal(format!("invalid date value: {v}")))?;
610 Ok(CellValue::Date(n))
611 }
612 (Some("n") | None, Some(v)) => {
613 let n: f64 = v
614 .parse()
615 .map_err(|_| Error::Internal(format!("invalid number: {v}")))?;
616 Ok(CellValue::Number(n))
617 }
618 _ => Ok(CellValue::Empty),
619 }
620}
621
622#[cfg(test)]
623mod tests {
624 use super::*;
625 use std::io::Cursor;
626
627 fn make_sst(strings: &[&str]) -> SharedStringTable {
628 let mut sst = SharedStringTable::new();
629 for s in strings {
630 sst.add(s);
631 }
632 sst
633 }
634
635 fn worksheet_xml(sheet_data: &str) -> String {
636 format!(
637 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
638<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
639<sheetData>
640{sheet_data}
641</sheetData>
642</worksheet>"#
643 )
644 }
645
646 fn read_all(xml: &str, sst: &SharedStringTable, row_limit: Option<u32>) -> Vec<StreamRow> {
647 let cursor = Cursor::new(xml.as_bytes().to_vec());
648 let mut reader = SheetStreamReader::new(cursor, sst, row_limit);
649 let mut all = Vec::new();
650 loop {
651 let batch = reader.next_batch(100).unwrap();
652 if batch.is_empty() {
653 break;
654 }
655 all.extend(batch);
656 }
657 all
658 }
659
660 #[test]
661 fn test_basic_batch_reading() {
662 let sst = make_sst(&["Name", "Age"]);
663 let xml = worksheet_xml(
664 r#"
665<row r="1"><c r="A1" t="s"><v>0</v></c><c r="B1" t="s"><v>1</v></c></row>
666<row r="2"><c r="A2" t="s"><v>0</v></c><c r="B2"><v>30</v></c></row>
667<row r="3"><c r="A3" t="s"><v>0</v></c><c r="B3"><v>25</v></c></row>
668"#,
669 );
670
671 let cursor = Cursor::new(xml.as_bytes().to_vec());
672 let mut reader = SheetStreamReader::new(cursor, &sst, None);
673
674 let batch1 = reader.next_batch(2).unwrap();
675 assert_eq!(batch1.len(), 2);
676 assert!(reader.has_more());
677
678 let batch2 = reader.next_batch(2).unwrap();
679 assert_eq!(batch2.len(), 1);
680
681 let batch3 = reader.next_batch(2).unwrap();
682 assert!(batch3.is_empty());
683 assert!(!reader.has_more());
684 }
685
686 #[test]
687 fn test_sparse_rows() {
688 let sst = SharedStringTable::new();
689 let xml = worksheet_xml(
690 r#"
691<row r="1"><c r="A1"><v>1</v></c></row>
692<row r="5"><c r="C5"><v>5</v></c></row>
693<row r="100"><c r="A100"><v>100</v></c></row>
694"#,
695 );
696
697 let rows = read_all(&xml, &sst, None);
698 assert_eq!(rows.len(), 3);
699 assert_eq!(rows[0].row_number, 1);
700 assert_eq!(rows[1].row_number, 5);
701 assert_eq!(rows[1].cells[0].0, 3);
702 assert_eq!(rows[2].row_number, 100);
703 }
704
705 #[test]
706 fn test_all_cell_types() {
707 let sst = make_sst(&["Hello"]);
708 let xml = worksheet_xml(
709 r#"
710<row r="1">
711 <c r="A1" t="s"><v>0</v></c>
712 <c r="B1"><v>42.5</v></c>
713 <c r="C1" t="b"><v>1</v></c>
714 <c r="D1" t="e"><v>#DIV/0!</v></c>
715 <c r="E1" t="inlineStr"><is><t>Inline</t></is></c>
716 <c r="F1" t="n"><v>99</v></c>
717 <c r="G1" t="d"><v>45000</v></c>
718</row>
719"#,
720 );
721
722 let rows = read_all(&xml, &sst, None);
723 assert_eq!(rows.len(), 1);
724 let cells = &rows[0].cells;
725
726 assert_eq!(cells[0], (1, CellValue::String("Hello".to_string())));
727 assert_eq!(cells[1], (2, CellValue::Number(42.5)));
728 assert_eq!(cells[2], (3, CellValue::Bool(true)));
729 assert_eq!(cells[3], (4, CellValue::Error("#DIV/0!".to_string())));
730 assert_eq!(cells[4], (5, CellValue::String("Inline".to_string())));
731 assert_eq!(cells[5], (6, CellValue::Number(99.0)));
732 assert_eq!(cells[6], (7, CellValue::Date(45000.0)));
733 }
734
735 #[test]
736 fn test_boolean_false() {
737 let sst = SharedStringTable::new();
738 let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="b"><v>0</v></c></row>"#);
739 let rows = read_all(&xml, &sst, None);
740 assert_eq!(rows[0].cells[0].1, CellValue::Bool(false));
741 }
742
743 #[test]
744 fn test_shared_string_resolution() {
745 let sst = make_sst(&["First", "Second", "Third"]);
746 let xml = worksheet_xml(
747 r#"
748<row r="1">
749 <c r="A1" t="s"><v>0</v></c>
750 <c r="B1" t="s"><v>1</v></c>
751 <c r="C1" t="s"><v>2</v></c>
752</row>
753"#,
754 );
755
756 let rows = read_all(&xml, &sst, None);
757 assert_eq!(rows[0].cells[0].1, CellValue::String("First".to_string()));
758 assert_eq!(rows[0].cells[1].1, CellValue::String("Second".to_string()));
759 assert_eq!(rows[0].cells[2].1, CellValue::String("Third".to_string()));
760 }
761
762 #[test]
763 fn test_shared_string_out_of_bounds() {
764 let sst = make_sst(&["Only"]);
765 let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="s"><v>999</v></c></row>"#);
766
767 let cursor = Cursor::new(xml.as_bytes().to_vec());
768 let mut reader = SheetStreamReader::new(cursor, &sst, None);
769 let result = reader.next_batch(10);
770 assert!(result.is_err());
771 }
772
773 #[test]
774 fn test_row_limit() {
775 let sst = SharedStringTable::new();
776 let xml = worksheet_xml(
777 r#"
778<row r="1"><c r="A1"><v>1</v></c></row>
779<row r="2"><c r="A2"><v>2</v></c></row>
780<row r="3"><c r="A3"><v>3</v></c></row>
781<row r="4"><c r="A4"><v>4</v></c></row>
782<row r="5"><c r="A5"><v>5</v></c></row>
783"#,
784 );
785
786 let rows = read_all(&xml, &sst, Some(3));
787 assert_eq!(rows.len(), 3);
788 assert_eq!(rows[0].row_number, 1);
789 assert_eq!(rows[2].row_number, 3);
790 }
791
792 #[test]
793 fn test_row_limit_zero() {
794 let sst = SharedStringTable::new();
795 let xml = worksheet_xml(r#"<row r="1"><c r="A1"><v>1</v></c></row>"#);
796
797 let rows = read_all(&xml, &sst, Some(0));
798 assert!(rows.is_empty());
799 }
800
801 #[test]
802 fn test_empty_sheet() {
803 let sst = SharedStringTable::new();
804 let xml = worksheet_xml("");
805
806 let rows = read_all(&xml, &sst, None);
807 assert!(rows.is_empty());
808 }
809
810 #[test]
811 fn test_empty_rows_are_skipped() {
812 let sst = SharedStringTable::new();
813 let xml = worksheet_xml(
814 r#"
815<row r="1"></row>
816<row r="2"><c r="A2"><v>42</v></c></row>
817<row r="3"></row>
818"#,
819 );
820
821 let rows = read_all(&xml, &sst, None);
822 assert_eq!(rows.len(), 1);
823 assert_eq!(rows[0].row_number, 2);
824 }
825
826 #[test]
827 fn test_empty_rows_count_against_limit() {
828 let sst = SharedStringTable::new();
829 let xml = worksheet_xml(
830 r#"
831<row r="1"></row>
832<row r="2"></row>
833<row r="3"><c r="A3"><v>3</v></c></row>
834<row r="4"><c r="A4"><v>4</v></c></row>
835"#,
836 );
837
838 let rows = read_all(&xml, &sst, Some(2));
839 assert!(
840 rows.is_empty(),
841 "with limit=2 and 2 empty rows, no data rows should be returned"
842 );
843
844 let rows2 = read_all(&xml, &sst, Some(3));
845 assert_eq!(rows2.len(), 1);
846 assert_eq!(rows2[0].row_number, 3);
847 }
848
849 #[test]
850 fn test_formula_with_cached_number() {
851 let sst = SharedStringTable::new();
852 let xml = worksheet_xml(r#"<row r="1"><c r="A1"><f>SUM(B1:B10)</f><v>42</v></c></row>"#);
853
854 let rows = read_all(&xml, &sst, None);
855 match &rows[0].cells[0].1 {
856 CellValue::Formula { expr, result } => {
857 assert_eq!(expr, "SUM(B1:B10)");
858 assert_eq!(result.as_deref(), Some(&CellValue::Number(42.0)));
859 }
860 other => panic!("expected Formula, got {:?}", other),
861 }
862 }
863
864 #[test]
865 fn test_formula_with_cached_string() {
866 let sst = SharedStringTable::new();
867 let xml = worksheet_xml(
868 r#"<row r="1"><c r="A1" t="str"><f>CONCAT("a","b")</f><v>ab</v></c></row>"#,
869 );
870
871 let rows = read_all(&xml, &sst, None);
872 match &rows[0].cells[0].1 {
873 CellValue::Formula { expr, result } => {
874 assert_eq!(expr, r#"CONCAT("a","b")"#);
875 assert_eq!(
876 result.as_deref(),
877 Some(&CellValue::String("ab".to_string()))
878 );
879 }
880 other => panic!("expected Formula, got {:?}", other),
881 }
882 }
883
884 #[test]
885 fn test_formula_with_cached_boolean() {
886 let sst = SharedStringTable::new();
887 let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="b"><f>TRUE()</f><v>1</v></c></row>"#);
888
889 let rows = read_all(&xml, &sst, None);
890 match &rows[0].cells[0].1 {
891 CellValue::Formula { expr, result } => {
892 assert_eq!(expr, "TRUE()");
893 assert_eq!(result.as_deref(), Some(&CellValue::Bool(true)));
894 }
895 other => panic!("expected Formula, got {:?}", other),
896 }
897 }
898
899 #[test]
900 fn test_formula_with_cached_error() {
901 let sst = SharedStringTable::new();
902 let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="e"><f>1/0</f><v>#DIV/0!</v></c></row>"#);
903
904 let rows = read_all(&xml, &sst, None);
905 match &rows[0].cells[0].1 {
906 CellValue::Formula { expr, result } => {
907 assert_eq!(expr, "1/0");
908 assert_eq!(
909 result.as_deref(),
910 Some(&CellValue::Error("#DIV/0!".to_string()))
911 );
912 }
913 other => panic!("expected Formula, got {:?}", other),
914 }
915 }
916
917 #[test]
918 fn test_formula_without_cached_value() {
919 let sst = SharedStringTable::new();
920 let xml = worksheet_xml(r#"<row r="1"><c r="A1"><f>A2+A3</f></c></row>"#);
921
922 let rows = read_all(&xml, &sst, None);
923 match &rows[0].cells[0].1 {
924 CellValue::Formula { expr, result } => {
925 assert_eq!(expr, "A2+A3");
926 assert!(result.is_none());
927 }
928 other => panic!("expected Formula, got {:?}", other),
929 }
930 }
931
932 #[test]
933 fn test_inline_string_with_rich_text_runs() {
934 let sst = SharedStringTable::new();
935 let xml = worksheet_xml(
936 r#"<row r="1"><c r="A1" t="inlineStr"><is><r><t>Bold</t></r><r><t> Normal</t></r></is></c></row>"#,
937 );
938
939 let rows = read_all(&xml, &sst, None);
940 assert_eq!(
941 rows[0].cells[0].1,
942 CellValue::String("Bold Normal".to_string())
943 );
944 }
945
946 #[test]
947 fn test_reader_close() {
948 let sst = SharedStringTable::new();
949 let xml = worksheet_xml(r#"<row r="1"><c r="A1"><v>1</v></c></row>"#);
950 let cursor = Cursor::new(xml.as_bytes().to_vec());
951 let reader = SheetStreamReader::new(cursor, &sst, None);
952 reader.close();
953 }
954
955 #[test]
956 fn test_reader_drop_without_reading_all() {
957 let sst = SharedStringTable::new();
958 let xml = worksheet_xml(
959 r#"
960<row r="1"><c r="A1"><v>1</v></c></row>
961<row r="2"><c r="A2"><v>2</v></c></row>
962"#,
963 );
964 let cursor = Cursor::new(xml.as_bytes().to_vec());
965 let mut reader = SheetStreamReader::new(cursor, &sst, None);
966 let batch = reader.next_batch(1).unwrap();
967 assert_eq!(batch.len(), 1);
968 drop(reader);
969 }
970
971 #[test]
972 fn test_has_more_transitions() {
973 let sst = SharedStringTable::new();
974 let xml = worksheet_xml(r#"<row r="1"><c r="A1"><v>1</v></c></row>"#);
975
976 let cursor = Cursor::new(xml.as_bytes().to_vec());
977 let mut reader = SheetStreamReader::new(cursor, &sst, None);
978 assert!(reader.has_more());
979
980 let batch = reader.next_batch(100).unwrap();
981 assert_eq!(batch.len(), 1);
982
983 let batch2 = reader.next_batch(100).unwrap();
984 assert!(batch2.is_empty());
985 assert!(!reader.has_more());
986 }
987
988 #[test]
989 fn test_batch_size_one() {
990 let sst = SharedStringTable::new();
991 let xml = worksheet_xml(
992 r#"
993<row r="1"><c r="A1"><v>1</v></c></row>
994<row r="2"><c r="A2"><v>2</v></c></row>
995<row r="3"><c r="A3"><v>3</v></c></row>
996"#,
997 );
998
999 let cursor = Cursor::new(xml.as_bytes().to_vec());
1000 let mut reader = SheetStreamReader::new(cursor, &sst, None);
1001
1002 for expected_row in 1..=3 {
1003 let batch = reader.next_batch(1).unwrap();
1004 assert_eq!(batch.len(), 1);
1005 assert_eq!(batch[0].row_number, expected_row);
1006 }
1007
1008 let batch = reader.next_batch(1).unwrap();
1009 assert!(batch.is_empty());
1010 }
1011
1012 #[test]
1013 fn test_cell_with_no_value() {
1014 let sst = SharedStringTable::new();
1015 let xml = worksheet_xml(r#"<row r="1"><c r="A1"></c><c r="B1"><v>42</v></c></row>"#);
1016
1017 let rows = read_all(&xml, &sst, None);
1018 assert_eq!(rows[0].cells.len(), 2);
1019 assert_eq!(rows[0].cells[0].1, CellValue::Empty);
1020 assert_eq!(rows[0].cells[1].1, CellValue::Number(42.0));
1021 }
1022
1023 #[test]
1024 fn test_self_closing_cell_element() {
1025 let sst = SharedStringTable::new();
1026 let xml = worksheet_xml(
1027 r#"<row r="1"><c r="A1"/><c r="B1"><v>42</v></c><c r="C1" t="b"/></row>"#,
1028 );
1029
1030 let rows = read_all(&xml, &sst, None);
1031 assert_eq!(rows[0].cells.len(), 3);
1032 assert_eq!(rows[0].cells[0], (1, CellValue::Empty));
1033 assert_eq!(rows[0].cells[1], (2, CellValue::Number(42.0)));
1034 assert_eq!(rows[0].cells[2], (3, CellValue::Empty));
1035 }
1036
1037 #[test]
1038 fn test_integration_with_saved_workbook() {
1039 let mut wb = crate::workbook::Workbook::new();
1040 wb.set_cell_value("Sheet1", "A1", "Name").unwrap();
1041 wb.set_cell_value("Sheet1", "B1", "Score").unwrap();
1042 wb.set_cell_value("Sheet1", "A2", "Alice").unwrap();
1043 wb.set_cell_value("Sheet1", "B2", 95.5f64).unwrap();
1044 wb.set_cell_value("Sheet1", "A3", "Bob").unwrap();
1045 wb.set_cell_value("Sheet1", "B3", 87.0f64).unwrap();
1046
1047 let dir = tempfile::TempDir::new().unwrap();
1048 let path = dir.path().join("stream_reader_test.xlsx");
1049 wb.save(&path).unwrap();
1050
1051 let wb2 = crate::workbook::Workbook::open_with_options(
1052 &path,
1053 &crate::workbook::OpenOptions::new().read_mode(crate::workbook::ReadMode::Lazy),
1054 )
1055 .unwrap();
1056
1057 let mut reader = wb2.open_sheet_reader("Sheet1").unwrap();
1058 let rows = reader.next_batch(100).unwrap();
1059
1060 assert_eq!(rows.len(), 3);
1061 assert_eq!(rows[0].row_number, 1);
1062 assert_eq!(rows[0].cells[0].1, CellValue::String("Name".to_string()));
1063 assert_eq!(rows[0].cells[1].1, CellValue::String("Score".to_string()));
1064 assert_eq!(rows[1].cells[0].1, CellValue::String("Alice".to_string()));
1065 assert_eq!(rows[1].cells[1].1, CellValue::Number(95.5));
1066 assert_eq!(rows[2].cells[0].1, CellValue::String("Bob".to_string()));
1067 assert_eq!(rows[2].cells[1].1, CellValue::Number(87.0));
1068 }
1069
1070 #[test]
1071 fn test_integration_with_row_limit() {
1072 let mut wb = crate::workbook::Workbook::new();
1073 for i in 1..=10 {
1074 let cell = format!("A{i}");
1075 wb.set_cell_value("Sheet1", &cell, i as f64).unwrap();
1076 }
1077
1078 let dir = tempfile::TempDir::new().unwrap();
1079 let path = dir.path().join("stream_limit_test.xlsx");
1080 wb.save(&path).unwrap();
1081
1082 let wb2 = crate::workbook::Workbook::open_with_options(
1083 &path,
1084 &crate::workbook::OpenOptions::new()
1085 .read_mode(crate::workbook::ReadMode::Lazy)
1086 .sheet_rows(5),
1087 )
1088 .unwrap();
1089
1090 let mut reader = wb2.open_sheet_reader("Sheet1").unwrap();
1091 let mut all_rows = Vec::new();
1092 loop {
1093 let batch = reader.next_batch(3).unwrap();
1094 if batch.is_empty() {
1095 break;
1096 }
1097 all_rows.extend(batch);
1098 }
1099
1100 assert_eq!(all_rows.len(), 5);
1101 assert_eq!(all_rows[4].row_number, 5);
1102 }
1103
1104 #[test]
1105 fn test_integration_sheet_not_found() {
1106 let wb = crate::workbook::Workbook::new();
1107 let result = wb.open_sheet_reader("NonExistent");
1108 assert!(result.is_err());
1109 }
1110}