Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose history size to workflows #3055

Merged
merged 5 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
534 changes: 314 additions & 220 deletions api/persistence/v1/executions.pb.go

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions common/dynamicconfig/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,16 @@ const (
HistorySizeLimitError = "limit.historySize.error"
// HistorySizeLimitWarn is the per workflow execution history size limit for warning
HistorySizeLimitWarn = "limit.historySize.warn"
// HistorySizeSuggestContinueAsNew is the workflow execution history size limit to suggest
// continue-as-new (in workflow task started event)
HistorySizeSuggestContinueAsNew = "limit.historySize.suggestContinueAsNew"
// HistoryCountLimitError is the per workflow execution history event count limit
HistoryCountLimitError = "limit.historyCount.error"
// HistoryCountLimitWarn is the per workflow execution history event count limit for warning
HistoryCountLimitWarn = "limit.historyCount.warn"
// HistoryCountSuggestContinueAsNew is the workflow execution history event count limit to
// suggest continue-as-new (in workflow task started event)
HistoryCountSuggestContinueAsNew = "limit.historyCount.suggestContinueAsNew"
// MaxIDLengthLimit is the length limit for various IDs, including: Namespace, TaskQueue, WorkflowID, ActivityID, TimerID,
// WorkflowType, ActivityType, SignalName, MarkerName, ErrorReason/FailureReason/CancelCause, Identity, RequestID
MaxIDLengthLimit = "limit.maxIDLength"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ message WorkflowExecutionInfo {
google.protobuf.Timestamp workflow_task_original_scheduled_time = 30 [(gogoproto.stdtime) = true];
string workflow_task_request_id = 31;
temporal.server.api.enums.v1.WorkflowTaskType workflow_task_type = 68;
bool workflow_task_suggest_continue_as_new = 69;
int64 workflow_task_history_size_bytes = 70;

bool cancel_requested = 29;
string cancel_request_id = 32;
Expand Down
52 changes: 28 additions & 24 deletions service/history/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,18 +181,20 @@ type Config struct {
DurableArchivalEnabled dynamicconfig.BoolPropertyFn

// Size limit related settings
BlobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter
BlobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter
MemoSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter
MemoSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter
HistorySizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter
HistorySizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter
HistoryCountLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter
HistoryCountLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter
NumPendingChildExecutionsLimit dynamicconfig.IntPropertyFnWithNamespaceFilter
NumPendingActivitiesLimit dynamicconfig.IntPropertyFnWithNamespaceFilter
NumPendingSignalsLimit dynamicconfig.IntPropertyFnWithNamespaceFilter
NumPendingCancelsRequestLimit dynamicconfig.IntPropertyFnWithNamespaceFilter
BlobSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter
BlobSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter
MemoSizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter
MemoSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter
HistorySizeLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter
HistorySizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter
HistorySizeSuggestContinueAsNew dynamicconfig.IntPropertyFnWithNamespaceFilter
HistoryCountLimitError dynamicconfig.IntPropertyFnWithNamespaceFilter
HistoryCountLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter
HistoryCountSuggestContinueAsNew dynamicconfig.IntPropertyFnWithNamespaceFilter
NumPendingChildExecutionsLimit dynamicconfig.IntPropertyFnWithNamespaceFilter
NumPendingActivitiesLimit dynamicconfig.IntPropertyFnWithNamespaceFilter
NumPendingSignalsLimit dynamicconfig.IntPropertyFnWithNamespaceFilter
NumPendingCancelsRequestLimit dynamicconfig.IntPropertyFnWithNamespaceFilter

// DefaultActivityRetryOptions specifies the out-of-box retry policy if
// none is configured on the Activity by the user.
Expand Down Expand Up @@ -417,18 +419,20 @@ func NewConfig(dc *dynamicconfig.Collection, numberOfShards int32, isAdvancedVis
ArchiveSignalTimeout: dc.GetDurationProperty(dynamicconfig.ArchiveSignalTimeout, 300*time.Millisecond),
DurableArchivalEnabled: dc.GetBoolProperty(dynamicconfig.DurableArchivalEnabled, true),

BlobSizeLimitError: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.BlobSizeLimitError, 2*1024*1024),
BlobSizeLimitWarn: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.BlobSizeLimitWarn, 512*1024),
MemoSizeLimitError: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.MemoSizeLimitError, 2*1024*1024),
MemoSizeLimitWarn: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.MemoSizeLimitWarn, 2*1024),
NumPendingChildExecutionsLimit: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.NumPendingChildExecutionsLimitError, 50000),
NumPendingActivitiesLimit: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.NumPendingActivitiesLimitError, 50000),
NumPendingSignalsLimit: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.NumPendingSignalsLimitError, 50000),
NumPendingCancelsRequestLimit: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.NumPendingCancelRequestsLimitError, 50000),
HistorySizeLimitError: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistorySizeLimitError, 50*1024*1024),
HistorySizeLimitWarn: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistorySizeLimitWarn, 10*1024*1024),
HistoryCountLimitError: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistoryCountLimitError, 50*1024),
HistoryCountLimitWarn: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistoryCountLimitWarn, 10*1024),
BlobSizeLimitError: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.BlobSizeLimitError, 2*1024*1024),
BlobSizeLimitWarn: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.BlobSizeLimitWarn, 512*1024),
MemoSizeLimitError: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.MemoSizeLimitError, 2*1024*1024),
MemoSizeLimitWarn: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.MemoSizeLimitWarn, 2*1024),
NumPendingChildExecutionsLimit: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.NumPendingChildExecutionsLimitError, 50000),
NumPendingActivitiesLimit: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.NumPendingActivitiesLimitError, 50000),
NumPendingSignalsLimit: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.NumPendingSignalsLimitError, 50000),
NumPendingCancelsRequestLimit: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.NumPendingCancelRequestsLimitError, 50000),
HistorySizeLimitError: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistorySizeLimitError, 50*1024*1024),
HistorySizeLimitWarn: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistorySizeLimitWarn, 10*1024*1024),
HistorySizeSuggestContinueAsNew: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistorySizeSuggestContinueAsNew, 4*1024*1024),
HistoryCountLimitError: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistoryCountLimitError, 50*1024),
HistoryCountLimitWarn: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistoryCountLimitWarn, 10*1024),
HistoryCountSuggestContinueAsNew: dc.GetIntPropertyFilteredByNamespace(dynamicconfig.HistoryCountSuggestContinueAsNew, 4*1024),

ThrottledLogRPS: dc.GetIntProperty(dynamicconfig.HistoryThrottledLogRPS, 4),
EnableStickyQuery: dc.GetBoolPropertyFnWithNamespaceFilter(dynamicconfig.EnableStickyQuery, true),
Expand Down
10 changes: 7 additions & 3 deletions service/history/workflow/history_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,17 @@ func (b *HistoryBuilder) AddWorkflowTaskStartedEvent(
requestID string,
identity string,
startTime time.Time,
suggestContinueAsNew bool,
historySizeBytes int64,
) *historypb.HistoryEvent {
event := b.createNewHistoryEvent(enumspb.EVENT_TYPE_WORKFLOW_TASK_STARTED, startTime)
event.Attributes = &historypb.HistoryEvent_WorkflowTaskStartedEventAttributes{
WorkflowTaskStartedEventAttributes: &historypb.WorkflowTaskStartedEventAttributes{
ScheduledEventId: scheduledEventID,
Identity: identity,
RequestId: requestID,
ScheduledEventId: scheduledEventID,
Identity: identity,
RequestId: requestID,
SuggestContinueAsNew: suggestContinueAsNew,
HistorySizeBytes: historySizeBytes,
Comment on lines +230 to +231
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh.. Attributes are already there for half a year.

},
}

Expand Down
10 changes: 7 additions & 3 deletions service/history/workflow/history_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,8 @@ func (s *historyBuilderSuite) TestWorkflowTaskStarted() {
testRequestID,
testIdentity,
s.now,
false,
123678,
)
s.Equal(event, s.flush())
s.Equal(&historypb.HistoryEvent{
Expand All @@ -649,9 +651,11 @@ func (s *historyBuilderSuite) TestWorkflowTaskStarted() {
Version: s.version,
Attributes: &historypb.HistoryEvent_WorkflowTaskStartedEventAttributes{
WorkflowTaskStartedEventAttributes: &historypb.WorkflowTaskStartedEventAttributes{
ScheduledEventId: scheduledEventID,
Identity: testIdentity,
RequestId: testRequestID,
ScheduledEventId: scheduledEventID,
Identity: testIdentity,
RequestId: testRequestID,
SuggestContinueAsNew: false,
HistorySizeBytes: 123678,
},
},
}, event)
Expand Down
9 changes: 8 additions & 1 deletion service/history/workflow/mutable_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@ type (

// Indicate type of the current workflow task (normal, transient, or speculative).
Type enumsspb.WorkflowTaskType

// These two fields are sent to workers in the WorkflowTaskStarted event. We need to save a
// copy here to ensure that we send the same values with every transient WorkflowTaskStarted
// event, otherwise a dynamic config change of the suggestion threshold could cause the
// event that the worker used to not match the event we saved in history.
SuggestContinueAsNew bool
HistorySizeBytes int64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you trying to make history deterministic? I don't think it is necessary. First of all SDKs can ignore these fields in non-determinism detector same way as they do for activity arguments. But even if they don't, SDK will just replay history from the beginning which is ok for workflows with continuously failing WT. I think SDK already does it (may be not).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But you still need these fields just to pass this data around. All WT related fields from executions.proto must be here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as we discussed: they can change across attempts, but we do need to keep them and can't just recompute them because of determinism. updated comment

}

MutableState interface {
Expand Down Expand Up @@ -224,7 +231,7 @@ type (
ReplicateWorkflowTaskCompletedEvent(*historypb.HistoryEvent) error
ReplicateWorkflowTaskFailedEvent() error
ReplicateWorkflowTaskScheduledEvent(int64, int64, *taskqueuepb.TaskQueue, *time.Duration, int32, *time.Time, *time.Time, enumsspb.WorkflowTaskType) (*WorkflowTaskInfo, error)
ReplicateWorkflowTaskStartedEvent(*WorkflowTaskInfo, int64, int64, int64, string, time.Time) (*WorkflowTaskInfo, error)
ReplicateWorkflowTaskStartedEvent(*WorkflowTaskInfo, int64, int64, int64, string, time.Time, bool, int64) (*WorkflowTaskInfo, error)
ReplicateWorkflowTaskTimedOutEvent(enumspb.TimeoutType) error
ReplicateExternalWorkflowExecutionCancelRequested(*historypb.HistoryEvent) error
ReplicateExternalWorkflowExecutionSignaled(*historypb.HistoryEvent) error
Expand Down
9 changes: 7 additions & 2 deletions service/history/workflow/mutable_state_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,9 @@ func (ms *MutableStateImpl) ClearTransientWorkflowTask() error {
TaskQueue: nil,
OriginalScheduledTime: timestamp.UnixOrZeroTimePtr(0),
Type: enumsspb.WORKFLOW_TASK_TYPE_UNSPECIFIED,

SuggestContinueAsNew: false,
HistorySizeBytes: 0,
dnr marked this conversation as resolved.
Show resolved Hide resolved
}
ms.workflowTaskManager.UpdateWorkflowTask(emptyWorkflowTaskInfo)
return nil
Expand Down Expand Up @@ -1792,9 +1795,11 @@ func (ms *MutableStateImpl) ReplicateWorkflowTaskStartedEvent(
startedEventID int64,
requestID string,
timestamp time.Time,
suggestContinueAsNew bool,
historySizeBytes int64,
) (*WorkflowTaskInfo, error) {

return ms.workflowTaskManager.ReplicateWorkflowTaskStartedEvent(workflowTask, version, scheduledEventID, startedEventID, requestID, timestamp)
return ms.workflowTaskManager.ReplicateWorkflowTaskStartedEvent(workflowTask, version, scheduledEventID,
startedEventID, requestID, timestamp, suggestContinueAsNew, historySizeBytes)
}

// TODO (alex-update): Transient needs to be renamed to "TransientOrSpeculative"
Expand Down
4 changes: 4 additions & 0 deletions service/history/workflow/mutable_state_impl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,8 @@ func (s *mutableStateSuite) prepareTransientWorkflowTaskCompletionFirstBatchRepl
workflowTaskStartedEvent.GetEventId(),
workflowTaskStartedEvent.GetWorkflowTaskStartedEventAttributes().GetRequestId(),
timestamp.TimeValue(workflowTaskStartedEvent.GetEventTime()),
false,
123678,
)
s.Nil(err)
s.NotNil(wt)
Expand Down Expand Up @@ -649,6 +651,8 @@ func (s *mutableStateSuite) prepareTransientWorkflowTaskCompletionFirstBatchRepl
newWorkflowTaskStartedEvent.GetEventId(),
newWorkflowTaskStartedEvent.GetWorkflowTaskStartedEventAttributes().GetRequestId(),
timestamp.TimeValue(newWorkflowTaskStartedEvent.GetEventTime()),
false,
123678,
)
s.Nil(err)
s.NotNil(wt)
Expand Down
8 changes: 4 additions & 4 deletions service/history/workflow/mutable_state_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions service/history/workflow/mutable_state_rebuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ func (b *MutableStateRebuilderImpl) ApplyEvents(
event.GetEventId(),
attributes.GetRequestId(),
timestamp.TimeValue(event.GetEventTime()),
attributes.GetSuggestContinueAsNew(),
attributes.GetHistorySizeBytes(),
)
if err != nil {
return nil, err
Expand Down
1 change: 1 addition & 0 deletions service/history/workflow/mutable_state_rebuilder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,7 @@ func (s *stateBuilderSuite) TestApplyEvents_EventTypeWorkflowTaskStarted() {
}
s.mockMutableState.EXPECT().ReplicateWorkflowTaskStartedEvent(
(*WorkflowTaskInfo)(nil), event.GetVersion(), scheduledEventID, event.GetEventId(), workflowTaskRequestID, timestamp.TimeValue(event.GetEventTime()),
false, gomock.Any(),
).Return(wt, nil)
s.mockUpdateVersion(event)
s.mockTaskGenerator.EXPECT().GenerateStartWorkflowTaskTasks(
Expand Down
Loading