@Internal public abstract class GenericCsvInputFormat<OT> extends DelimitedInputFormat<OT>
FileInputFormat.FileBaseStatistics, FileInputFormat.InputSplitOpenThread
Modifier and Type | Field and Description |
---|---|
protected int |
commentCount |
protected byte[] |
commentPrefix |
protected boolean[] |
fieldIncluded |
protected int |
invalidLineCount |
protected boolean |
lineDelimiterIsLinebreak |
currBuffer, currLen, currOffset, RECORD_DELIMITER
currentSplit, ENUMERATE_NESTED_FILES_FLAG, enumerateNestedFiles, filePath, INFLATER_INPUT_STREAM_FACTORIES, minSplitSize, numSplits, openTimeout, READ_WHOLE_SPLIT_FLAG, splitLength, splitStart, stream, unsplittable
Modifier | Constructor and Description |
---|---|
protected |
GenericCsvInputFormat() |
protected |
GenericCsvInputFormat(Path filePath) |
Modifier and Type | Method and Description |
---|---|
protected static void |
checkAndCoSort(int[] positions,
Class<?>[] types) |
protected static void |
checkForMonotonousOrder(int[] positions,
Class<?>[] types) |
void |
close()
Closes the input by releasing all buffers and closing the file input stream.
|
void |
enableQuotedStringParsing(char quoteCharacter) |
byte[] |
getCommentPrefix() |
byte[] |
getFieldDelimiter() |
protected FieldParser<?>[] |
getFieldParsers() |
protected Class<?>[] |
getGenericFieldTypes() |
int |
getNumberOfFieldsTotal() |
int |
getNumberOfNonNullFields() |
protected void |
initializeSplit(FileInputSplit split,
Long offset)
Initialization method that is called after opening or reopening an input split.
|
boolean |
isLenient() |
boolean |
isSkippingFirstLineAsHeader() |
protected boolean |
parseRecord(Object[] holders,
byte[] bytes,
int offset,
int numBytes) |
void |
setCharset(String charset)
Set the name of the character set used for the row delimiter.
|
void |
setCommentPrefix(String commentPrefix) |
void |
setFieldDelimiter(String delimiter) |
protected void |
setFieldsGeneric(boolean[] includedMask,
Class<?>[] fieldTypes) |
protected void |
setFieldsGeneric(int[] sourceFieldIndices,
Class<?>[] fieldTypes) |
protected void |
setFieldTypesGeneric(Class<?>... fieldTypes) |
void |
setLenient(boolean lenient) |
void |
setSkipFirstLineAsHeader(boolean skipFirstLine) |
protected int |
skipFields(byte[] bytes,
int startPos,
int limit,
byte[] delim) |
boolean |
supportsMultiPaths()
Override this method to supports multiple paths.
|
configure, getBufferSize, getCharset, getCurrentState, getDelimiter, getLineLengthLimit, getNumLineSamples, getStatistics, loadConfigParameters, loadGlobalConfigParams, nextRecord, open, reachedEnd, readLine, readRecord, reopen, setBufferSize, setDelimiter, setDelimiter, setDelimiter, setLineLengthLimit, setNumLineSamples
acceptFile, createInputSplits, decorateInputStream, extractFileExtension, getFilePath, getFilePaths, getFileStats, getFileStats, getInflaterInputStreamFactory, getInputSplitAssigner, getMinSplitSize, getNestedFileEnumeration, getNumSplits, getOpenTimeout, getSplitLength, getSplitStart, registerInflaterInputStreamFactory, setFilePath, setFilePath, setFilePaths, setFilePaths, setFilesFilter, setMinSplitSize, setNestedFileEnumeration, setNumSplits, setOpenTimeout, testForUnsplittable, toString
closeInputFormat, getRuntimeContext, openInputFormat, setRuntimeContext
protected boolean lineDelimiterIsLinebreak
protected transient int commentCount
protected transient int invalidLineCount
protected boolean[] fieldIncluded
protected byte[] commentPrefix
protected GenericCsvInputFormat()
protected GenericCsvInputFormat(Path filePath)
public boolean supportsMultiPaths()
FileInputFormat
supportsMultiPaths
in class FileInputFormat<OT>
public int getNumberOfFieldsTotal()
public int getNumberOfNonNullFields()
public void setCharset(String charset)
DelimitedInputFormat
FieldParser
s.
These fields are interpreted when set. Changing the charset thereafter may cause unexpected results.
setCharset
in class DelimitedInputFormat<OT>
charset
- name of the charsetpublic byte[] getCommentPrefix()
public void setCommentPrefix(String commentPrefix)
public byte[] getFieldDelimiter()
public void setFieldDelimiter(String delimiter)
public boolean isLenient()
public void setLenient(boolean lenient)
public boolean isSkippingFirstLineAsHeader()
public void setSkipFirstLineAsHeader(boolean skipFirstLine)
public void enableQuotedStringParsing(char quoteCharacter)
protected FieldParser<?>[] getFieldParsers()
protected Class<?>[] getGenericFieldTypes()
protected void setFieldTypesGeneric(Class<?>... fieldTypes)
protected void setFieldsGeneric(int[] sourceFieldIndices, Class<?>[] fieldTypes)
protected void setFieldsGeneric(boolean[] includedMask, Class<?>[] fieldTypes)
protected void initializeSplit(FileInputSplit split, Long offset) throws IOException
DelimitedInputFormat
initializeSplit
in class DelimitedInputFormat<OT>
split
- Split that was opened or reopenedoffset
- Checkpointed state if the split was reopenedIOException
public void close() throws IOException
DelimitedInputFormat
close
in interface InputFormat<OT,FileInputSplit>
close
in class DelimitedInputFormat<OT>
IOException
- Thrown, if the closing of the file stream causes an I/O error.protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException
ParseException
protected int skipFields(byte[] bytes, int startPos, int limit, byte[] delim)
protected static void checkAndCoSort(int[] positions, Class<?>[] types)
protected static void checkForMonotonousOrder(int[] positions, Class<?>[] types)
Copyright © 2014–2024 The Apache Software Foundation. All rights reserved.