@Public public abstract class DelimitedInputFormat<OT> extends FileInputFormat<OT>
readRecord(Object, byte[], int, int)
method.
The default delimiter is the newline character '\n'
.
FileInputFormat.FileBaseStatistics, FileInputFormat.InputSplitOpenThread
Modifier and Type | Field and Description |
---|---|
protected static String |
RECORD_DELIMITER
The configuration key to set the record delimiter.
|
currentSplit, ENUMERATE_NESTED_FILES_FLAG, enumerateNestedFiles, filePath, INFLATER_INPUT_STREAM_FACTORIES, minSplitSize, numSplits, openTimeout, READ_WHOLE_SPLIT_FLAG, splitLength, splitStart, stream, unsplittable
Modifier | Constructor and Description |
---|---|
|
DelimitedInputFormat() |
protected |
DelimitedInputFormat(Path filePath) |
Modifier and Type | Method and Description |
---|---|
void |
close()
Closes the input by releasing all buffers and closing the file input stream.
|
void |
configure(Configuration parameters)
Configures this input format by reading the path to the file from the configuration andge the string that
defines the record delimiter.
|
int |
getBufferSize() |
byte[] |
getDelimiter() |
int |
getLineLengthLimit() |
int |
getNumLineSamples() |
FileInputFormat.FileBaseStatistics |
getStatistics(BaseStatistics cachedStats)
Obtains basic file statistics containing only file size.
|
protected static void |
loadGlobalConfigParams() |
OT |
nextRecord(OT record)
Reads the next record from the input.
|
void |
open(FileInputSplit split)
Opens the given input split.
|
boolean |
reachedEnd()
Checks whether the current split is at its end.
|
protected boolean |
readLine() |
abstract OT |
readRecord(OT reuse,
byte[] bytes,
int offset,
int numBytes)
This function parses the given byte array which represents a serialized record.
|
void |
setBufferSize(int bufferSize) |
void |
setDelimiter(byte[] delimiter) |
void |
setDelimiter(char delimiter) |
void |
setDelimiter(String delimiter) |
void |
setLineLengthLimit(int lineLengthLimit) |
void |
setNumLineSamples(int numLineSamples) |
acceptFile, createInputSplits, decorateInputStream, extractFileExtension, getFilePath, getFileStats, getInflaterInputStreamFactory, getInputSplitAssigner, getMinSplitSize, getNumSplits, getOpenTimeout, getSplitLength, getSplitStart, registerInflaterInputStreamFactory, setFilePath, setFilePath, setMinSplitSize, setNumSplits, setOpenTimeout, testForUnsplittable, toString
getRuntimeContext, setRuntimeContext
protected static final String RECORD_DELIMITER
public DelimitedInputFormat()
protected DelimitedInputFormat(Path filePath)
protected static void loadGlobalConfigParams()
public byte[] getDelimiter()
public void setDelimiter(byte[] delimiter)
public void setDelimiter(char delimiter)
public void setDelimiter(String delimiter)
public int getLineLengthLimit()
public void setLineLengthLimit(int lineLengthLimit)
public int getBufferSize()
public void setBufferSize(int bufferSize)
public int getNumLineSamples()
public void setNumLineSamples(int numLineSamples)
public abstract OT readRecord(OT reuse, byte[] bytes, int offset, int numBytes) throws IOException
reuse
- An optionally reusable object.bytes
- Binary data of serialized records.offset
- The offset where to start to read the record data.numBytes
- The number of bytes that can be read starting at the offset position.IOException
- if the record could not be read.public void configure(Configuration parameters)
configure
in interface InputFormat<OT,FileInputSplit>
configure
in class FileInputFormat<OT>
parameters
- The configuration object to read the parameters from.InputFormat.configure(org.apache.flink.configuration.Configuration)
public FileInputFormat.FileBaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException
FileInputFormat
getStatistics
in interface InputFormat<OT,FileInputSplit>
getStatistics
in class FileInputFormat<OT>
cachedStats
- The statistics that were cached. May be null.IOException
InputFormat.getStatistics(org.apache.flink.api.common.io.statistics.BaseStatistics)
public void open(FileInputSplit split) throws IOException
open
in interface InputFormat<OT,FileInputSplit>
open
in class FileInputFormat<OT>
split
- The input split to open.IOException
- Thrown, if the spit could not be opened due to an I/O problem.FileInputFormat.open(org.apache.flink.core.fs.FileInputSplit)
public boolean reachedEnd()
public OT nextRecord(OT record) throws IOException
InputFormat
When this method is called, the input format it guaranteed to be opened.
record
- Object that may be reused.IOException
- Thrown, if an I/O error occurred.public void close() throws IOException
close
in interface InputFormat<OT,FileInputSplit>
close
in class FileInputFormat<OT>
IOException
- Thrown, if the closing of the file stream causes an I/O error.protected final boolean readLine() throws IOException
IOException
Copyright © 2014–2017 The Apache Software Foundation. All rights reserved.