public abstract class BaseHybridHashTable extends Object implements MemorySegmentPool
LongHybridHashTable
and BinaryHashTable
.Modifier and Type | Field and Description |
---|---|
ArrayList<MemorySegment> |
availableMemory
The free memory segments currently available to the hash join.
|
protected long |
buildRowCount |
protected int |
buildSpillRetBufferNumbers
The number of buffers in the build spill return buffer queue that are actually not write behind buffers,
but regular buffers that only have not yet returned.
|
protected LinkedBlockingQueue<MemorySegment> |
buildSpillReturnBuffers
The queue of buffers that can be used for write-behind.
|
protected AtomicBoolean |
closed
Flag indicating that the closing logic has been invoked.
|
protected int |
compressionBlockSize |
protected BlockCompressionFactory |
compressionCodecFactory |
protected boolean |
compressionEnable |
protected FileIOChannel.Enumerator |
currentEnumerator
The channel enumerator that is used while processing the current partition to create
channels for the spill partitions it requires.
|
protected int |
currentRecursionDepth
The recursion depth of the partition that is currently processed.
|
protected HeaderlessChannelReaderInputView |
currentSpilledBuildSide
The reader for the spilled-file of the build partition that is currently read.
|
protected AbstractChannelReaderInputView |
currentSpilledProbeSide
The reader for the spilled-file of the probe partition that is currently read.
|
protected int |
initPartitionFanOut |
protected IOManager |
ioManager
The I/O manager used to instantiate writers for the spilled partitions.
|
protected static org.slf4j.Logger |
LOG |
protected static int |
MAX_NUM_PARTITIONS
The maximum number of partitions, which defines the spilling granularity.
|
protected static int |
MAX_RECURSION_DEPTH
The maximum number of recursive partitionings that the join does before giving up.
|
protected long |
numSpillFiles |
protected int |
segmentSize
The size of the segments used by the hash join buckets.
|
int |
segmentSizeBits |
int |
segmentSizeMask |
protected long |
spillInBytes |
protected int |
totalNumBuffers
The total reserved number of memory segments available to the hash join.
|
boolean |
tryDistinctBuildRow
Try to make the buildSide rows distinct.
|
Constructor and Description |
---|
BaseHybridHashTable(Configuration conf,
Object owner,
MemoryManager memManager,
long reservedMemorySize,
IOManager ioManager,
int avgRecordLen,
long buildRowCount,
boolean tryDistinctBuildRow) |
Modifier and Type | Method and Description |
---|---|
protected abstract void |
clearPartitions() |
void |
close()
Closes the hash table.
|
protected HeaderlessChannelReaderInputView |
createInputView(FileIOChannel.ID id,
int blockCount,
int lastSegmentLimit) |
void |
ensureNumBuffersReturned(int minRequiredAvailable)
This method makes sure that at least a certain number of memory segments is in the list of
free segments.
|
void |
free() |
void |
free(MemorySegment segment) |
void |
freeCurrent()
Free the memory not used.
|
List<MemorySegment> |
getFreedMemory() |
MemorySegment |
getNextBuffer()
Gets the next buffer to be used with the hash-table, either for an in-memory partition, or
for the table buckets.
|
MemorySegment[] |
getNextBuffers(int bufferSize)
Bulk memory acquisition.
|
protected MemorySegment |
getNotNullNextBuffer() |
long |
getNumSpillFiles() |
long |
getSpillInBytes() |
long |
getUsedMemoryInBytes() |
static int |
hash(int hashCode,
int level)
The level parameter is needed so that we can have different hash functions when we
recursively apply the partitioning, so that the working set eventually fits into memory.
|
int |
maxInitBufferOfBucketArea(int partitions)
Give up to one-sixth of the memory of the bucket area.
|
protected int |
maxNumPartition()
Bucket area need at-least one and data need at-least one.
|
MemorySegment |
nextSegment()
This is the method called by the partitions to request memory to serialize records.
|
int |
pageSize()
Get the page size of each page this pool holds.
|
protected List<MemorySegment> |
readAllBuffers(FileIOChannel.ID id,
int blockCount) |
int |
remainBuffers() |
void |
returnAll(List<MemorySegment> memory)
Return all pages back into this pool.
|
protected abstract int |
spillPartition() |
protected static final org.slf4j.Logger LOG
protected static final int MAX_RECURSION_DEPTH
protected static final int MAX_NUM_PARTITIONS
protected final int initPartitionFanOut
protected final long buildRowCount
protected final int totalNumBuffers
public final ArrayList<MemorySegment> availableMemory
protected final IOManager ioManager
protected final int segmentSize
protected final LinkedBlockingQueue<MemorySegment> buildSpillReturnBuffers
public final int segmentSizeBits
public final int segmentSizeMask
protected AtomicBoolean closed
public final boolean tryDistinctBuildRow
protected int currentRecursionDepth
protected int buildSpillRetBufferNumbers
protected HeaderlessChannelReaderInputView currentSpilledBuildSide
protected AbstractChannelReaderInputView currentSpilledProbeSide
protected FileIOChannel.Enumerator currentEnumerator
protected final boolean compressionEnable
protected final BlockCompressionFactory compressionCodecFactory
protected final int compressionBlockSize
protected transient long numSpillFiles
protected transient long spillInBytes
public BaseHybridHashTable(Configuration conf, Object owner, MemoryManager memManager, long reservedMemorySize, IOManager ioManager, int avgRecordLen, long buildRowCount, boolean tryDistinctBuildRow)
protected int maxNumPartition()
public MemorySegment getNextBuffer()
public MemorySegment[] getNextBuffers(int bufferSize)
protected MemorySegment getNotNullNextBuffer()
public MemorySegment nextSegment()
nextSegment
in interface MemorySegmentSource
public int pageSize()
MemorySegmentPool
pageSize
in interface MemorySegmentPool
public void returnAll(List<MemorySegment> memory)
MemorySegmentPool
returnAll
in interface MemorySegmentPool
memory
- the pages which want to be returned.protected abstract int spillPartition() throws IOException
IOException
public void ensureNumBuffersReturned(int minRequiredAvailable)
minRequiredAvailable
- The minimum number of buffers that needs to be reclaimed.public void close()
protected abstract void clearPartitions()
public void free()
public void freeCurrent()
@VisibleForTesting public List<MemorySegment> getFreedMemory()
public void free(MemorySegment segment)
public int remainBuffers()
public long getUsedMemoryInBytes()
public long getNumSpillFiles()
public long getSpillInBytes()
public int maxInitBufferOfBucketArea(int partitions)
protected List<MemorySegment> readAllBuffers(FileIOChannel.ID id, int blockCount) throws IOException
IOException
protected HeaderlessChannelReaderInputView createInputView(FileIOChannel.ID id, int blockCount, int lastSegmentLimit) throws IOException
IOException
public static int hash(int hashCode, int level)
Copyright © 2014–2020 The Apache Software Foundation. All rights reserved.