rusqlite_gpkg/gpkg/
batch_iterator.rs

1use crate::gpkg::GpkgFeature;
2use crate::types::ColumnSpec;
3use crate::{GpkgLayer, Result};
4use std::collections::HashMap;
5use std::rc::Rc;
6
7/// Iterator that yields batches of features from a layer.
8///
9/// Each call to `next()` returns a `Result<Vec<GpkgFeature>>` containing up to
10/// `batch_size` features. This provides a chunked alternative to `features()`,
11/// which always allocates a single vector for the whole layer.
12pub struct GpkgFeatureBatchIterator<'a> {
13    pub(super) stmt: rusqlite::Statement<'a>,
14    pub(super) property_columns: Vec<ColumnSpec>,
15    pub(super) geometry_column: String,
16    pub(super) primary_key_column: String,
17    pub(super) property_index_by_name: Rc<HashMap<String, usize>>,
18    pub(super) batch_size: u32,
19    pub(super) offset: u32,
20    pub(super) end_or_invalid_state: bool,
21}
22
23impl<'a> GpkgFeatureBatchIterator<'a> {
24    pub(crate) fn new(stmt: rusqlite::Statement<'a>, layer: &GpkgLayer, batch_size: u32) -> Self {
25        Self {
26            stmt,
27            batch_size,
28            property_columns: layer.property_columns.clone(),
29            geometry_column: layer.geometry_column.clone(),
30            primary_key_column: layer.primary_key_column.clone(),
31            property_index_by_name: layer.property_index_by_name.clone(),
32            offset: 0,
33            end_or_invalid_state: false,
34        }
35    }
36}
37
38impl<'a> Iterator for GpkgFeatureBatchIterator<'a> {
39    type Item = Result<Vec<GpkgFeature>>;
40
41    fn next(&mut self) -> Option<Self::Item> {
42        if self.end_or_invalid_state {
43            return None;
44        }
45
46        let result = self.stmt.query_map([self.offset], |row| {
47            super::layer::row_to_feature(
48                row,
49                &self.property_columns,
50                &self.geometry_column,
51                &self.primary_key_column,
52                &self.property_index_by_name,
53            )
54        });
55
56        let collected_result = match result {
57            Ok(mapped_rows) => mapped_rows.collect::<rusqlite::Result<Vec<GpkgFeature>>>(),
58            Err(e) => {
59                // I don't know in what case some error happens, but I bet it's unrecoverable.
60                self.end_or_invalid_state = true;
61                return Some(Err(e.into()));
62            }
63        };
64
65        let features = match collected_result {
66            Ok(features) => features,
67            Err(e) => {
68                // I don't know in what case some error happens, but I bet it's unrecoverable.
69                self.end_or_invalid_state = true;
70                return Some(Err(e.into()));
71            }
72        };
73
74        // If the result is less than the batch size, it means it reached the end.
75        let result_size = features.len();
76        if result_size < self.batch_size as usize {
77            self.end_or_invalid_state = true;
78            if result_size == 0 {
79                return None;
80            }
81        }
82
83        self.offset += result_size as u32;
84
85        Some(Ok(features))
86    }
87}
88
89#[cfg(test)]
90mod tests {
91    use crate::Result;
92    use crate::Value;
93    use crate::gpkg::Gpkg;
94    use crate::types::ColumnSpec;
95    use geo_types::Point;
96    use wkb::reader::GeometryType;
97
98    fn assert_batch_iteration(total: usize, batch_size: u32) -> Result<()> {
99        let gpkg = Gpkg::open_in_memory()?;
100        let columns: Vec<ColumnSpec> = Vec::new();
101        let layer = gpkg.create_layer(
102            "batch_points",
103            "geom",
104            GeometryType::Point,
105            wkb::reader::Dimension::Xy,
106            4326,
107            &columns,
108        )?;
109
110        for i in 0..total {
111            layer.insert(Point::new(i as f64, i as f64), std::iter::empty::<&Value>())?;
112        }
113
114        let mut counts = Vec::new();
115        for batch in layer.features_batch(batch_size)? {
116            let features = batch?;
117            counts.push(features.len());
118        }
119
120        let total_seen: usize = counts.iter().sum();
121        assert_eq!(total_seen, total);
122
123        if total == 0 {
124            assert!(counts.is_empty());
125            return Ok(());
126        }
127
128        let expected_full_batches = total / batch_size as usize;
129        let expected_remainder = total % batch_size as usize;
130
131        for (idx, count) in counts.iter().enumerate() {
132            let is_last = idx == counts.len() - 1;
133            if !is_last || expected_remainder == 0 {
134                assert_eq!(*count, batch_size as usize);
135            } else {
136                assert_eq!(*count, expected_remainder);
137            }
138        }
139
140        assert_eq!(
141            counts.len(),
142            expected_full_batches + if expected_remainder == 0 { 0 } else { 1 }
143        );
144
145        Ok(())
146    }
147
148    #[test]
149    fn batch_iterator_handles_empty_layer() -> Result<()> {
150        assert_batch_iteration(0, 3)?;
151        Ok(())
152    }
153
154    #[test]
155    fn batch_iterator_handles_smaller_than_batch() -> Result<()> {
156        assert_batch_iteration(2, 5)?;
157        Ok(())
158    }
159
160    #[test]
161    fn batch_iterator_handles_exact_multiple() -> Result<()> {
162        assert_batch_iteration(6, 3)?;
163        Ok(())
164    }
165
166    #[test]
167    fn batch_iterator_handles_remainder() -> Result<()> {
168        assert_batch_iteration(7, 3)?;
169        Ok(())
170    }
171
172    #[test]
173    fn batch_iterator_handles_single_item_batches() -> Result<()> {
174        assert_batch_iteration(4, 1)?;
175        Ok(())
176    }
177}