public class ParquetSplitReaderUtil extends Object
ParquetColumnarRowSplitReader
.Constructor and Description |
---|
ParquetSplitReaderUtil() |
Modifier and Type | Method and Description |
---|---|
static List<ParquetField> |
buildFieldsList(List<RowType.RowField> childrens,
List<String> fieldNames,
org.apache.parquet.io.MessageColumnIO columnIO) |
static ColumnReader |
createColumnReader(boolean isUtcTimestamp,
LogicalType fieldType,
org.apache.parquet.schema.Type type,
List<org.apache.parquet.column.ColumnDescriptor> columnDescriptors,
org.apache.parquet.column.page.PageReadStore pages,
ParquetField field,
int depth) |
static ColumnVector |
createVectorFromConstant(LogicalType type,
Object value,
int batchSize) |
static WritableColumnVector |
createWritableColumnVector(int batchSize,
LogicalType fieldType,
org.apache.parquet.schema.Type type,
List<org.apache.parquet.column.ColumnDescriptor> columnDescriptors,
int depth) |
static ParquetColumnarRowSplitReader |
genPartColumnarRowReader(boolean utcTimestamp,
boolean caseSensitive,
org.apache.hadoop.conf.Configuration conf,
String[] fullFieldNames,
DataType[] fullFieldTypes,
Map<String,Object> partitionSpec,
int[] selectedFields,
int batchSize,
Path path,
long splitStart,
long splitLength)
Util for generating partitioned
ParquetColumnarRowSplitReader . |
static org.apache.parquet.io.ColumnIO |
getArrayElementColumn(org.apache.parquet.io.ColumnIO columnIO) |
static org.apache.parquet.io.GroupColumnIO |
getMapKeyValueColumn(org.apache.parquet.io.GroupColumnIO groupColumnIO) |
static org.apache.parquet.io.ColumnIO |
lookupColumnByName(org.apache.parquet.io.GroupColumnIO groupColumnIO,
String columnName)
Parquet's column names are case in sensitive.
|
public static ParquetColumnarRowSplitReader genPartColumnarRowReader(boolean utcTimestamp, boolean caseSensitive, org.apache.hadoop.conf.Configuration conf, String[] fullFieldNames, DataType[] fullFieldTypes, Map<String,Object> partitionSpec, int[] selectedFields, int batchSize, Path path, long splitStart, long splitLength) throws IOException
ParquetColumnarRowSplitReader
.IOException
public static ColumnVector createVectorFromConstant(LogicalType type, Object value, int batchSize)
public static ColumnReader createColumnReader(boolean isUtcTimestamp, LogicalType fieldType, org.apache.parquet.schema.Type type, List<org.apache.parquet.column.ColumnDescriptor> columnDescriptors, org.apache.parquet.column.page.PageReadStore pages, ParquetField field, int depth) throws IOException
IOException
public static WritableColumnVector createWritableColumnVector(int batchSize, LogicalType fieldType, org.apache.parquet.schema.Type type, List<org.apache.parquet.column.ColumnDescriptor> columnDescriptors, int depth)
public static List<ParquetField> buildFieldsList(List<RowType.RowField> childrens, List<String> fieldNames, org.apache.parquet.io.MessageColumnIO columnIO)
public static org.apache.parquet.io.ColumnIO lookupColumnByName(org.apache.parquet.io.GroupColumnIO groupColumnIO, String columnName)
public static org.apache.parquet.io.GroupColumnIO getMapKeyValueColumn(org.apache.parquet.io.GroupColumnIO groupColumnIO)
public static org.apache.parquet.io.ColumnIO getArrayElementColumn(org.apache.parquet.io.ColumnIO columnIO)
Copyright © 2014–2024 The Apache Software Foundation. All rights reserved.