public class CheckpointCoordinator extends Object
Modifier and Type | Method and Description |
---|---|
void |
abortPendingCheckpoints(CheckpointException exception)
Aborts all the pending checkpoints due to en exception.
|
boolean |
addMasterHook(MasterTriggerRestoreHook<?> hook)
Adds the given master hook to the checkpoint coordinator.
|
JobStatusListener |
createActivatorDeactivator() |
void |
failUnacknowledgedPendingCheckpointsFor(ExecutionAttemptID executionAttemptId,
Throwable cause)
Fails all pending checkpoints which have not been acknowledged by the given execution attempt
id.
|
CheckpointStorageCoordinatorView |
getCheckpointStorage() |
CompletedCheckpointStore |
getCheckpointStore() |
long |
getCheckpointTimeout() |
int |
getNumberOfPendingCheckpoints() |
int |
getNumberOfRegisteredMasterHooks()
Gets the number of currently register master hooks.
|
int |
getNumberOfRetainedSuccessfulCheckpoints() |
Map<Long,PendingCheckpoint> |
getPendingCheckpoints() |
List<CompletedCheckpoint> |
getSuccessfulCheckpoints() |
boolean |
isPeriodicCheckpointingConfigured()
Returns whether periodic checkpointing has been configured.
|
boolean |
isPeriodicCheckpointingStarted() |
boolean |
isShutdown() |
boolean |
isTriggering() |
boolean |
receiveAcknowledgeMessage(AcknowledgeCheckpoint message,
String taskManagerLocationInfo)
Receives an AcknowledgeCheckpoint message and returns whether the message was associated with
a pending checkpoint.
|
void |
receiveDeclineMessage(DeclineCheckpoint message,
String taskManagerLocationInfo)
Receives a
DeclineCheckpoint message for a pending checkpoint. |
void |
reportStats(long id,
ExecutionAttemptID attemptId,
CheckpointMetrics metrics) |
boolean |
restoreInitialCheckpointIfPresent(Set<ExecutionJobVertex> tasks)
Restores the latest checkpointed at the beginning of the job execution.
|
boolean |
restoreLatestCheckpointedStateToAll(Set<ExecutionJobVertex> tasks,
boolean allowNonRestoredState)
Restores the latest checkpointed state to all tasks and all coordinators.
|
OptionalLong |
restoreLatestCheckpointedStateToSubtasks(Set<ExecutionJobVertex> tasks)
Restores the latest checkpointed state to a set of subtasks.
|
boolean |
restoreSavepoint(SavepointRestoreSettings restoreSettings,
Map<JobVertexID,ExecutionJobVertex> tasks,
ClassLoader userClassLoader)
Restore the state with given savepoint.
|
void |
shutdown()
Shuts down the checkpoint coordinator.
|
void |
startCheckpointScheduler() |
void |
stopCheckpointScheduler() |
CompletableFuture<CompletedCheckpoint> |
triggerCheckpoint(boolean isPeriodic)
Triggers a new standard checkpoint and uses the given timestamp as the checkpoint timestamp.
|
CompletableFuture<CompletedCheckpoint> |
triggerCheckpoint(CheckpointType checkpointType)
Triggers one new checkpoint with the given checkpointType.
|
CompletableFuture<CompletedCheckpoint> |
triggerSavepoint(String targetLocation,
SavepointFormatType formatType)
Triggers a savepoint with the given savepoint directory as a target.
|
CompletableFuture<CompletedCheckpoint> |
triggerSynchronousSavepoint(boolean terminate,
String targetLocation,
SavepointFormatType formatType)
Triggers a synchronous savepoint with the given savepoint directory as a target.
|
public CheckpointCoordinator(JobID job, CheckpointCoordinatorConfiguration chkConfig, Collection<OperatorCoordinatorCheckpointContext> coordinatorsToCheckpoint, CheckpointIDCounter checkpointIDCounter, CompletedCheckpointStore completedCheckpointStore, CheckpointStorage checkpointStorage, Executor executor, CheckpointsCleaner checkpointsCleaner, ScheduledExecutor timer, CheckpointFailureManager failureManager, CheckpointPlanCalculator checkpointPlanCalculator, ExecutionAttemptMappingProvider attemptMappingProvider, CheckpointStatsTracker statsTracker)
@VisibleForTesting public CheckpointCoordinator(JobID job, CheckpointCoordinatorConfiguration chkConfig, Collection<OperatorCoordinatorCheckpointContext> coordinatorsToCheckpoint, CheckpointIDCounter checkpointIDCounter, CompletedCheckpointStore completedCheckpointStore, CheckpointStorage checkpointStorage, Executor executor, CheckpointsCleaner checkpointsCleaner, ScheduledExecutor timer, CheckpointFailureManager failureManager, CheckpointPlanCalculator checkpointPlanCalculator, ExecutionAttemptMappingProvider attemptMappingProvider, Clock clock, CheckpointStatsTracker statsTracker, java.util.function.BiFunction<Set<ExecutionJobVertex>,Map<OperatorID,OperatorState>,VertexFinishedStateChecker> vertexFinishedStateCheckerFactory)
public boolean addMasterHook(MasterTriggerRestoreHook<?> hook)
MasterTriggerRestoreHook.getIdentifier()
).hook
- The hook to add.public int getNumberOfRegisteredMasterHooks()
public void shutdown() throws Exception
After this method has been called, the coordinator does not accept and further messages and cannot trigger any further checkpoints.
Exception
public boolean isShutdown()
public CompletableFuture<CompletedCheckpoint> triggerSavepoint(@Nullable String targetLocation, SavepointFormatType formatType)
targetLocation
- Target location for the savepoint, optional. If null, the state
backend's configured default will be used.IllegalStateException
- If no savepoint directory has been specified and no default
savepoint directory has been configuredpublic CompletableFuture<CompletedCheckpoint> triggerSynchronousSavepoint(boolean terminate, @Nullable String targetLocation, SavepointFormatType formatType)
terminate
- flag indicating if the job should terminate or just suspendtargetLocation
- Target location for the savepoint, optional. If null, the state
backend's configured default will be used.IllegalStateException
- If no savepoint directory has been specified and no default
savepoint directory has been configuredpublic CompletableFuture<CompletedCheckpoint> triggerCheckpoint(boolean isPeriodic)
isPeriodic
- Flag indicating whether this triggered checkpoint is periodic.public CompletableFuture<CompletedCheckpoint> triggerCheckpoint(CheckpointType checkpointType)
checkpointType
- specifies the back up type of the checkpoint to trigger.public void receiveDeclineMessage(DeclineCheckpoint message, String taskManagerLocationInfo)
DeclineCheckpoint
message for a pending checkpoint.message
- Checkpoint decline from the task managertaskManagerLocationInfo
- The location info of the decline checkpoint message's senderpublic boolean receiveAcknowledgeMessage(AcknowledgeCheckpoint message, String taskManagerLocationInfo) throws CheckpointException
message
- Checkpoint ack from the task managertaskManagerLocationInfo
- The location of the acknowledge checkpoint message's senderCheckpointException
- If the checkpoint cannot be added to the completed checkpoint
store.public void failUnacknowledgedPendingCheckpointsFor(ExecutionAttemptID executionAttemptId, Throwable cause)
executionAttemptId
- for which to discard unacknowledged pending checkpointscause
- of the failurepublic OptionalLong restoreLatestCheckpointedStateToSubtasks(Set<ExecutionJobVertex> tasks) throws Exception
tasks
- Set of job vertices to restore. State for these vertices is restored via Execution.setInitialState(JobManagerTaskRestore)
.OptionalLong
with the checkpoint ID, if state was restored, an empty
OptionalLong
otherwise.IllegalStateException
- If the CheckpointCoordinator is shut down.IllegalStateException
- If no completed checkpoint is available and the
failIfNoCheckpoint
flag has been set.IllegalStateException
- If the checkpoint contains state that cannot be mapped to any
job vertex in tasks
and the allowNonRestoredState
flag has not
been set.IllegalStateException
- If the max parallelism changed for an operator that restores
state from this checkpoint.IllegalStateException
- If the parallelism changed for an operator that restores
non-partitioned state from this checkpoint.Exception
public boolean restoreLatestCheckpointedStateToAll(Set<ExecutionJobVertex> tasks, boolean allowNonRestoredState) throws Exception
tasks
- Set of job vertices to restore. State for these vertices is restored via Execution.setInitialState(JobManagerTaskRestore)
.allowNonRestoredState
- Allow checkpoint state that cannot be mapped to any job vertex
in tasks.true
if state was restored, false
otherwise.IllegalStateException
- If the CheckpointCoordinator is shut down.IllegalStateException
- If no completed checkpoint is available and the
failIfNoCheckpoint
flag has been set.IllegalStateException
- If the checkpoint contains state that cannot be mapped to any
job vertex in tasks
and the allowNonRestoredState
flag has not
been set.IllegalStateException
- If the max parallelism changed for an operator that restores
state from this checkpoint.IllegalStateException
- If the parallelism changed for an operator that restores
non-partitioned state from this checkpoint.Exception
public boolean restoreInitialCheckpointIfPresent(Set<ExecutionJobVertex> tasks) throws Exception
tasks
- Set of job vertices to restore. State for these vertices is restored via Execution.setInitialState(JobManagerTaskRestore)
.Exception
public boolean restoreSavepoint(SavepointRestoreSettings restoreSettings, Map<JobVertexID,ExecutionJobVertex> tasks, ClassLoader userClassLoader) throws Exception
restoreSettings
- Settings for a snapshot to restore from. Includes the path and
parameters for the restore process.tasks
- Map of job vertices to restore. State for these vertices is restored via Execution.setInitialState(JobManagerTaskRestore)
.userClassLoader
- The class loader to resolve serialized classes in legacy savepoint
versions.Exception
public int getNumberOfPendingCheckpoints()
public int getNumberOfRetainedSuccessfulCheckpoints()
public Map<Long,PendingCheckpoint> getPendingCheckpoints()
public List<CompletedCheckpoint> getSuccessfulCheckpoints() throws Exception
Exception
public CheckpointStorageCoordinatorView getCheckpointStorage()
public CompletedCheckpointStore getCheckpointStore()
public long getCheckpointTimeout()
public boolean isTriggering()
public boolean isPeriodicCheckpointingConfigured()
true
if periodic checkpoints have been configured.public void startCheckpointScheduler()
public void stopCheckpointScheduler()
public boolean isPeriodicCheckpointingStarted()
public void abortPendingCheckpoints(CheckpointException exception)
exception
- The exception.public JobStatusListener createActivatorDeactivator()
public void reportStats(long id, ExecutionAttemptID attemptId, CheckpointMetrics metrics) throws CheckpointException
CheckpointException
Copyright © 2014–2023 The Apache Software Foundation. All rights reserved.