diff --git a/src/testing/api.ts b/src/testing/api.ts index 5cff8e3..0953e4d 100644 --- a/src/testing/api.ts +++ b/src/testing/api.ts @@ -215,6 +215,7 @@ export async function sendTestCaseResult(args: { testCaseHash: string; testCaseOutput: OutputType; testCaseDurationMs?: number; + datasetItemId?: string; serializeTestCaseForHumanReview?: ( testCase: TestCaseType, ) => HumanReviewField[]; @@ -242,6 +243,7 @@ export async function sendTestCaseResult(args: { testCaseDurationMs: args.testCaseDurationMs, testCaseHumanReviewInputFields: serializedHumanReviewInputFields, testCaseHumanReviewOutputFields: serializedHumanReviewOutputFields, + datasetItemId: args.datasetItemId, }, }); const resultId = resp.data.id; @@ -261,6 +263,7 @@ export async function sendTestCaseResult(args: { body: { testCaseHash: args.testCaseHash, testCaseDurationMs: args.testCaseDurationMs, + datasetItemId: args.datasetItemId, }, }); const resultId = resp.data.id; diff --git a/src/testing/run.ts b/src/testing/run.ts index 7c74c7c..9fcdc99 100644 --- a/src/testing/run.ts +++ b/src/testing/run.ts @@ -141,6 +141,7 @@ async function runTestCaseUnsafe(args: { testCase: TestCaseType; testCaseHash: string; fn: (args: { testCase: TestCaseType }) => OutputType | Promise; + serializeDatasetItemId?: (testCase: TestCaseType) => string; serializeTestCaseForHumanReview?: ( testCase: TestCaseType, ) => HumanReviewField[]; @@ -165,6 +166,7 @@ async function runTestCaseUnsafe(args: { testCaseHash: args.testCaseHash, testCaseOutput: output, testCaseDurationMs: durationMs, + datasetItemId: args.serializeDatasetItemId?.(args.testCase), serializeTestCaseForHumanReview: args.serializeTestCaseForHumanReview, serializeOutputForHumanReview: args.serializeOutputForHumanReview, }); @@ -182,6 +184,7 @@ async function runTestCase(args: { testCaseHash: string; evaluators: BaseTestEvaluator[]; fn: (args: { testCase: TestCaseType }) => OutputType | Promise; + serializeDatasetItemId?: (testCase: TestCaseType) => string; serializeTestCaseForHumanReview?: ( testCase: TestCaseType, ) => HumanReviewField[]; @@ -196,6 +199,7 @@ async function runTestCase(args: { testCase: args.testCase, testCaseHash: args.testCaseHash, fn: args.fn, + serializeDatasetItemId: args.serializeDatasetItemId, serializeTestCaseForHumanReview: args.serializeTestCaseForHumanReview, serializeOutputForHumanReview: args.serializeOutputForHumanReview, }); @@ -247,6 +251,7 @@ async function runTestSuiteForGridCombo(args: { | ((testCase: TestCaseType) => string); evaluators?: BaseTestEvaluator[]; fn: (args: { testCase: TestCaseType }) => OutputType | Promise; + serializeDatasetItemId?: (testCase: TestCaseType) => string; serializeTestCaseForHumanReview?: ( testCase: TestCaseType, ) => HumanReviewField[]; @@ -310,6 +315,7 @@ async function runTestSuiteForGridCombo(args: { testCaseHash, evaluators: args.evaluators || [], fn: args.fn, + serializeDatasetItemId: args.serializeDatasetItemId, serializeTestCaseForHumanReview: args.serializeTestCaseForHumanReview, serializeOutputForHumanReview: @@ -368,6 +374,7 @@ export async function runTestSuite< evaluators?: BaseTestEvaluator[]; // How many test cases to run concurrently maxTestCaseConcurrency?: number; + serializeDatasetItemId?: (testCase: TestCaseType) => string; // Get the dataset item id from the test case serializeTestCaseForHumanReview?: ( testCase: TestCaseType, ) => HumanReviewField[]; @@ -483,6 +490,7 @@ export async function runTestSuite< testCaseHash: args.testCaseHash, evaluators: args.evaluators, fn: args.fn, + serializeDatasetItemId: args.serializeDatasetItemId, serializeTestCaseForHumanReview: args.serializeTestCaseForHumanReview, serializeOutputForHumanReview: args.serializeOutputForHumanReview, }); @@ -527,6 +535,7 @@ export async function runTestSuite< testCaseHash: args.testCaseHash, evaluators: args.evaluators, fn: args.fn, + serializeDatasetItemId: args.serializeDatasetItemId, serializeTestCaseForHumanReview: args.serializeTestCaseForHumanReview, serializeOutputForHumanReview: args.serializeOutputForHumanReview, gridSearchRunGroupId,