sheetkit_core/
merge.rs

1//! Merge cell operations.
2//!
3//! Provides functions for merging and unmerging ranges of cells in a worksheet.
4
5use crate::error::{Error, Result};
6use crate::utils::cell_ref::cell_name_to_coordinates;
7use sheetkit_xml::worksheet::{MergeCell, MergeCells, WorksheetXml};
8
9/// Parse a range reference like "A1:C3" into ((col1, row1), (col2, row2)) coordinates,
10/// both 1-based. Ensures the returned rectangle is normalized so that
11/// (col1, row1) is the top-left and (col2, row2) is the bottom-right.
12fn parse_range(reference: &str) -> Result<(u32, u32, u32, u32)> {
13    let parts: Vec<&str> = reference.split(':').collect();
14    if parts.len() != 2 {
15        return Err(Error::InvalidCellReference(format!(
16            "expected range like 'A1:C3', got '{reference}'"
17        )));
18    }
19    let (c1, r1) = cell_name_to_coordinates(parts[0])?;
20    let (c2, r2) = cell_name_to_coordinates(parts[1])?;
21    let min_col = c1.min(c2);
22    let max_col = c1.max(c2);
23    let min_row = r1.min(r2);
24    let max_row = r1.max(r2);
25    Ok((min_col, min_row, max_col, max_row))
26}
27
28/// Check whether two rectangular ranges overlap.
29fn ranges_overlap(a: (u32, u32, u32, u32), b: (u32, u32, u32, u32)) -> bool {
30    let (a_min_col, a_min_row, a_max_col, a_max_row) = a;
31    let (b_min_col, b_min_row, b_max_col, b_max_row) = b;
32    a_min_col <= b_max_col
33        && a_max_col >= b_min_col
34        && a_min_row <= b_max_row
35        && a_max_row >= b_min_row
36}
37
38/// Populate the coordinate cache from the reference strings if it is stale.
39/// This handles worksheets deserialized from XML where the cache starts empty.
40fn ensure_cache(mc: &mut MergeCells) -> Result<()> {
41    if mc.cached_coords.len() == mc.merge_cells.len() {
42        return Ok(());
43    }
44    mc.cached_coords.clear();
45    mc.cached_coords.reserve(mc.merge_cells.len());
46    for entry in &mc.merge_cells {
47        mc.cached_coords.push(parse_range(&entry.reference)?);
48    }
49    Ok(())
50}
51
52/// Merge a range of cells on the given worksheet.
53///
54/// `top_left` and `bottom_right` are cell references like "A1" and "C3".
55/// Returns an error if the new range overlaps with any existing merge region.
56pub fn merge_cells(ws: &mut WorksheetXml, top_left: &str, bottom_right: &str) -> Result<()> {
57    let (tl_col, tl_row) = cell_name_to_coordinates(top_left)?;
58    let (br_col, br_row) = cell_name_to_coordinates(bottom_right)?;
59
60    let min_col = tl_col.min(br_col);
61    let max_col = tl_col.max(br_col);
62    let min_row = tl_row.min(br_row);
63    let max_row = tl_row.max(br_row);
64    let new_range = (min_col, min_row, max_col, max_row);
65
66    let reference = format!("{top_left}:{bottom_right}");
67
68    // Check for overlaps using cached coordinates (no string parsing per check).
69    if let Some(ref mut mc) = ws.merge_cells {
70        ensure_cache(mc)?;
71        for (i, coords) in mc.cached_coords.iter().enumerate() {
72            if ranges_overlap(new_range, *coords) {
73                return Err(Error::MergeCellOverlap {
74                    new: reference,
75                    existing: mc.merge_cells[i].reference.clone(),
76                });
77            }
78        }
79    }
80
81    // Add the merge cell entry and its cached coordinates.
82    let merge_cells = ws.merge_cells.get_or_insert_with(|| MergeCells {
83        count: None,
84        merge_cells: Vec::new(),
85        cached_coords: Vec::new(),
86    });
87    merge_cells.merge_cells.push(MergeCell { reference });
88    merge_cells.cached_coords.push(new_range);
89    merge_cells.count = Some(merge_cells.merge_cells.len() as u32);
90
91    Ok(())
92}
93
94/// Remove a specific merge cell range from the worksheet.
95///
96/// `reference` is the exact range string like "A1:C3" that was previously merged.
97/// Returns an error if the range is not found.
98pub fn unmerge_cell(ws: &mut WorksheetXml, reference: &str) -> Result<()> {
99    let mc = ws
100        .merge_cells
101        .as_mut()
102        .ok_or_else(|| Error::MergeCellNotFound(reference.to_string()))?;
103
104    let pos = mc.merge_cells.iter().position(|m| m.reference == reference);
105
106    match pos {
107        Some(idx) => {
108            mc.merge_cells.remove(idx);
109            if mc.cached_coords.len() > idx {
110                mc.cached_coords.remove(idx);
111            }
112        }
113        None => return Err(Error::MergeCellNotFound(reference.to_string())),
114    }
115
116    if mc.merge_cells.is_empty() {
117        ws.merge_cells = None;
118    } else {
119        mc.count = Some(mc.merge_cells.len() as u32);
120    }
121
122    Ok(())
123}
124
125/// Get all merge cell references in the worksheet.
126///
127/// Returns a list of range strings like `["A1:B2", "D1:F3"]`.
128pub fn get_merge_cells(ws: &WorksheetXml) -> Vec<String> {
129    ws.merge_cells
130        .as_ref()
131        .map(|mc| mc.merge_cells.iter().map(|m| m.reference.clone()).collect())
132        .unwrap_or_default()
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    fn new_ws() -> WorksheetXml {
140        WorksheetXml::default()
141    }
142
143    #[test]
144    fn test_merge_cells_basic() {
145        let mut ws = new_ws();
146        merge_cells(&mut ws, "A1", "B2").unwrap();
147        let merged = get_merge_cells(&ws);
148        assert_eq!(merged, vec!["A1:B2"]);
149        assert_eq!(ws.merge_cells.as_ref().unwrap().count, Some(1));
150    }
151
152    #[test]
153    fn test_merge_cells_multiple() {
154        let mut ws = new_ws();
155        merge_cells(&mut ws, "A1", "B2").unwrap();
156        merge_cells(&mut ws, "D1", "F3").unwrap();
157        merge_cells(&mut ws, "A5", "C7").unwrap();
158        let merged = get_merge_cells(&ws);
159        assert_eq!(merged.len(), 3);
160        assert_eq!(merged[0], "A1:B2");
161        assert_eq!(merged[1], "D1:F3");
162        assert_eq!(merged[2], "A5:C7");
163        assert_eq!(ws.merge_cells.as_ref().unwrap().count, Some(3));
164    }
165
166    #[test]
167    fn test_merge_cells_overlap_detection() {
168        let mut ws = new_ws();
169        merge_cells(&mut ws, "A1", "C3").unwrap();
170
171        // Exact overlap.
172        let err = merge_cells(&mut ws, "A1", "C3").unwrap_err();
173        assert!(err.to_string().contains("overlaps"));
174
175        // Partial overlap -- B2:D4 overlaps with A1:C3.
176        let err = merge_cells(&mut ws, "B2", "D4").unwrap_err();
177        assert!(err.to_string().contains("overlaps"));
178
179        // Fully contained -- B2:B2 is inside A1:C3.
180        let err = merge_cells(&mut ws, "B2", "B2").unwrap_err();
181        assert!(err.to_string().contains("overlaps"));
182
183        // Non-overlapping should succeed.
184        merge_cells(&mut ws, "D1", "F3").unwrap();
185    }
186
187    #[test]
188    fn test_merge_cells_overlap_adjacent_no_overlap() {
189        let mut ws = new_ws();
190        merge_cells(&mut ws, "A1", "B2").unwrap();
191        // C1:D2 is adjacent but does not overlap with A1:B2.
192        merge_cells(&mut ws, "C1", "D2").unwrap();
193        // A3:B4 is below and does not overlap.
194        merge_cells(&mut ws, "A3", "B4").unwrap();
195        assert_eq!(get_merge_cells(&ws).len(), 3);
196    }
197
198    #[test]
199    fn test_unmerge_cell() {
200        let mut ws = new_ws();
201        merge_cells(&mut ws, "A1", "B2").unwrap();
202        merge_cells(&mut ws, "D1", "F3").unwrap();
203
204        unmerge_cell(&mut ws, "A1:B2").unwrap();
205        let merged = get_merge_cells(&ws);
206        assert_eq!(merged, vec!["D1:F3"]);
207        assert_eq!(ws.merge_cells.as_ref().unwrap().count, Some(1));
208    }
209
210    #[test]
211    fn test_unmerge_cell_last_removes_element() {
212        let mut ws = new_ws();
213        merge_cells(&mut ws, "A1", "B2").unwrap();
214        unmerge_cell(&mut ws, "A1:B2").unwrap();
215        assert!(ws.merge_cells.is_none());
216        assert!(get_merge_cells(&ws).is_empty());
217    }
218
219    #[test]
220    fn test_unmerge_cell_not_found() {
221        let mut ws = new_ws();
222        let err = unmerge_cell(&mut ws, "A1:B2").unwrap_err();
223        assert!(err.to_string().contains("not found"));
224
225        // Add one range, then try to unmerge a different range.
226        merge_cells(&mut ws, "A1", "B2").unwrap();
227        let err = unmerge_cell(&mut ws, "C1:D2").unwrap_err();
228        assert!(err.to_string().contains("not found"));
229    }
230
231    #[test]
232    fn test_get_merge_cells_empty() {
233        let ws = new_ws();
234        assert!(get_merge_cells(&ws).is_empty());
235    }
236
237    #[test]
238    fn test_merge_cells_invalid_reference() {
239        let mut ws = new_ws();
240        let err = merge_cells(&mut ws, "!!!", "B2").unwrap_err();
241        assert!(err.to_string().contains("invalid cell reference"));
242
243        let err = merge_cells(&mut ws, "A1", "ZZZ").unwrap_err();
244        assert!(err.to_string().contains("no row number"));
245    }
246
247    #[test]
248    fn test_parse_range_valid() {
249        let (c1, r1, c2, r2) = parse_range("A1:C3").unwrap();
250        assert_eq!((c1, r1, c2, r2), (1, 1, 3, 3));
251    }
252
253    #[test]
254    fn test_parse_range_reversed() {
255        // Even if cells are given in reversed order, we normalize.
256        let (c1, r1, c2, r2) = parse_range("C3:A1").unwrap();
257        assert_eq!((c1, r1, c2, r2), (1, 1, 3, 3));
258    }
259
260    #[test]
261    fn test_parse_range_invalid() {
262        assert!(parse_range("A1").is_err());
263        assert!(parse_range("A1:B2:C3").is_err());
264        assert!(parse_range("").is_err());
265    }
266
267    #[test]
268    fn test_ranges_overlap_function() {
269        // Overlapping rectangles.
270        assert!(ranges_overlap((1, 1, 3, 3), (2, 2, 4, 4)));
271        // Identical.
272        assert!(ranges_overlap((1, 1, 3, 3), (1, 1, 3, 3)));
273        // Contained.
274        assert!(ranges_overlap((1, 1, 5, 5), (2, 2, 3, 3)));
275        // Adjacent horizontally -- no overlap.
276        assert!(!ranges_overlap((1, 1, 2, 2), (3, 1, 4, 2)));
277        // Adjacent vertically -- no overlap.
278        assert!(!ranges_overlap((1, 1, 2, 2), (1, 3, 2, 4)));
279        // Completely disjoint.
280        assert!(!ranges_overlap((1, 1, 2, 2), (5, 5, 6, 6)));
281    }
282
283    #[test]
284    fn test_merge_cache_stays_in_sync_after_add_and_remove() {
285        let mut ws = new_ws();
286        merge_cells(&mut ws, "A1", "B2").unwrap();
287        merge_cells(&mut ws, "D1", "F3").unwrap();
288        merge_cells(&mut ws, "A5", "C7").unwrap();
289
290        let mc = ws.merge_cells.as_ref().unwrap();
291        assert_eq!(mc.cached_coords.len(), 3);
292        assert_eq!(mc.cached_coords[0], (1, 1, 2, 2));
293        assert_eq!(mc.cached_coords[1], (4, 1, 6, 3));
294        assert_eq!(mc.cached_coords[2], (1, 5, 3, 7));
295
296        unmerge_cell(&mut ws, "D1:F3").unwrap();
297        let mc = ws.merge_cells.as_ref().unwrap();
298        assert_eq!(mc.cached_coords.len(), 2);
299        assert_eq!(mc.cached_coords[0], (1, 1, 2, 2));
300        assert_eq!(mc.cached_coords[1], (1, 5, 3, 7));
301    }
302
303    #[test]
304    fn test_merge_cache_lazy_init_from_deserialized_data() {
305        let mut ws = new_ws();
306        // Simulate a worksheet loaded from XML (cache is empty but merge_cells has entries).
307        ws.merge_cells = Some(MergeCells {
308            count: Some(2),
309            merge_cells: vec![
310                MergeCell {
311                    reference: "A1:B2".to_string(),
312                },
313                MergeCell {
314                    reference: "D5:F8".to_string(),
315                },
316            ],
317            cached_coords: Vec::new(),
318        });
319
320        // Adding a non-overlapping merge should succeed after lazy cache init.
321        merge_cells(&mut ws, "H1", "J3").unwrap();
322        let mc = ws.merge_cells.as_ref().unwrap();
323        assert_eq!(mc.merge_cells.len(), 3);
324        assert_eq!(mc.cached_coords.len(), 3);
325
326        // Adding an overlapping merge should still be detected.
327        let err = merge_cells(&mut ws, "A1", "A1").unwrap_err();
328        assert!(err.to_string().contains("overlaps"));
329    }
330
331    #[test]
332    fn test_merge_many_non_overlapping_regions() {
333        let mut ws = new_ws();
334        // Add 500 non-overlapping single-row merges across different rows.
335        for i in 0..500u32 {
336            let row = i + 1;
337            let top_left = format!("A{row}");
338            let bottom_right = format!("C{row}");
339            merge_cells(&mut ws, &top_left, &bottom_right).unwrap();
340        }
341        assert_eq!(get_merge_cells(&ws).len(), 500);
342        let mc = ws.merge_cells.as_ref().unwrap();
343        assert_eq!(mc.cached_coords.len(), 500);
344        assert_eq!(mc.count, Some(500));
345    }
346
347    #[test]
348    fn test_unmerge_then_add_reuses_cache_correctly() {
349        let mut ws = new_ws();
350        merge_cells(&mut ws, "A1", "B2").unwrap();
351        merge_cells(&mut ws, "D1", "E2").unwrap();
352        unmerge_cell(&mut ws, "A1:B2").unwrap();
353
354        // Now A1:B2 region is free -- adding it again should succeed.
355        merge_cells(&mut ws, "A1", "B2").unwrap();
356        assert_eq!(get_merge_cells(&ws).len(), 2);
357
358        // But D1:E2 overlap should still be caught.
359        let err = merge_cells(&mut ws, "D1", "D1").unwrap_err();
360        assert!(err.to_string().contains("overlaps"));
361    }
362
363    #[test]
364    fn test_equality_ignores_cache_state() {
365        let with_cache = MergeCells {
366            count: Some(1),
367            merge_cells: vec![MergeCell {
368                reference: "A1:B2".to_string(),
369            }],
370            cached_coords: vec![(1, 1, 2, 2)],
371        };
372        let without_cache = MergeCells {
373            count: Some(1),
374            merge_cells: vec![MergeCell {
375                reference: "A1:B2".to_string(),
376            }],
377            cached_coords: Vec::new(),
378        };
379        assert_eq!(with_cache, without_cache);
380    }
381
382    #[test]
383    fn test_cache_not_serialized() {
384        let mc = MergeCells {
385            count: Some(1),
386            merge_cells: vec![MergeCell {
387                reference: "A1:B2".to_string(),
388            }],
389            cached_coords: vec![(1, 1, 2, 2)],
390        };
391        let xml = quick_xml::se::to_string(&mc).unwrap();
392        assert!(!xml.contains("cached"));
393        assert!(xml.contains("A1:B2"));
394    }
395}