19 #include <parquet/schema.h>
20 #include <parquet/types.h>
25 namespace foreign_storage {
38 const bool geo_validate_geometry)
53 chunks.begin()->getBuffer());
85 const int16_t* rep_levels,
86 const int64_t values_read,
87 const int64_t levels_read,
92 appendData(def_levels, rep_levels, values_read, levels_read, values);
97 if (invalid_indices.empty()) {
114 const int16_t* rep_levels,
115 const int64_t values_read,
116 const int64_t levels_read,
117 int8_t* values)
override {
118 auto parquet_data_ptr =
reinterpret_cast<const parquet::ByteArray*
>(values);
122 for (int64_t i = 0, j = 0; i < levels_read; ++i) {
124 if (def_levels[i] == 0) {
127 CHECK(j < values_read);
128 auto& byte_array = parquet_data_ptr[j++];
129 auto geo_string_view = std::string_view{
130 reinterpret_cast<const char*
>(byte_array.ptr), byte_array.len};
133 }
catch (
const std::runtime_error& error) {
151 const int16_t* rep_levels,
152 const int64_t values_read,
153 const int64_t levels_read,
154 int8_t* values)
override {
155 UNREACHABLE() <<
"unexpected call to appendDataTrackErrors from unsupported encoder";
160 const std::vector<ArrayDatum>& datum_buffer) {
162 for (
const auto& datum : datum_buffer) {
166 CHECK(datum_buffer.empty());
179 for (int64_t i = 0; i < row_count; ++i) {
188 auto buffer = chunk->getBuffer();
AbstractBuffer * getBuffer(std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
void eraseInvalidData(const FindContainer &invalid_indices)
int64_t current_batch_offset_
void validateAndAppendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override
void processNullGeoElement()
void appendArrayDatumsToBuffer()
std::vector< ArrayDatum > coords_datum_buffer_
void appendDataTrackErrors(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
std::vector< ArrayDatum > ring_or_line_sizes_datum_buffer_
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
TypedParquetStorageBuffer< std::string > * base_column_buffer_
std::vector< ArrayDatum > bounds_datum_buffer_
HOST DEVICE SQLTypes get_type() const
ParquetGeospatialImportEncoder(std::list< Chunk_NS::Chunk > &chunks, const bool geo_validate_geometry)
void appendArrayDatumsIfApplicable(TypedParquetStorageBuffer< ArrayDatum > *column_buffer, const std::vector< ArrayDatum > &datum_buffer)
bool hasRingOrLineSizesColumn() const
std::set< int64_t > InvalidRowGroupIndices
An AbstractBuffer is a unit of data management for a data manager.
bool hasBoundsColumn() const
void processGeoElement(std::string_view geo_string_view)
ParquetGeospatialImportEncoder(const bool geo_validate_geometry)
void appendBaseData(const int64_t row_count)
bool hasPolyRingsColumn() const
TypedParquetStorageBuffer< ArrayDatum > * ring_or_line_sizes_column_buffer_
const ColumnDescriptor * geo_column_descriptor_
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
std::vector< ArrayDatum > poly_rings_datum_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_
void appendElement(const Type &element)
void eraseInvalidIndicesInBuffer(const InvalidRowGroupIndices &invalid_indices) override
std::list< T >::iterator getIteratorForGeoColumnType(std::list< T > &list, const SQLTypes column_type, const GeoColumnType geo_column)
InvalidRowGroupIndices * invalid_indices_