Skip to content

Commit

Permalink
Add ability to send dataset item id
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnolte committed Sep 27, 2024
1 parent 09ece1f commit bc2f886
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/testing/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ export async function sendTestCaseResult<TestCaseType, OutputType>(args: {
testCaseHash: string;
testCaseOutput: OutputType;
testCaseDurationMs?: number;
datasetItemId?: string;
serializeTestCaseForHumanReview?: (
testCase: TestCaseType,
) => HumanReviewField[];
Expand Down Expand Up @@ -242,6 +243,7 @@ export async function sendTestCaseResult<TestCaseType, OutputType>(args: {
testCaseDurationMs: args.testCaseDurationMs,
testCaseHumanReviewInputFields: serializedHumanReviewInputFields,
testCaseHumanReviewOutputFields: serializedHumanReviewOutputFields,
datasetItemId: args.datasetItemId,
},
});
const resultId = resp.data.id;
Expand All @@ -261,6 +263,7 @@ export async function sendTestCaseResult<TestCaseType, OutputType>(args: {
body: {
testCaseHash: args.testCaseHash,
testCaseDurationMs: args.testCaseDurationMs,
datasetItemId: args.datasetItemId,
},
});
const resultId = resp.data.id;
Expand Down
9 changes: 9 additions & 0 deletions src/testing/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ async function runTestCaseUnsafe<TestCaseType, OutputType>(args: {
testCase: TestCaseType;
testCaseHash: string;
fn: (args: { testCase: TestCaseType }) => OutputType | Promise<OutputType>;
serializeDatasetItemId?: (testCase: TestCaseType) => string;
serializeTestCaseForHumanReview?: (
testCase: TestCaseType,
) => HumanReviewField[];
Expand All @@ -165,6 +166,7 @@ async function runTestCaseUnsafe<TestCaseType, OutputType>(args: {
testCaseHash: args.testCaseHash,
testCaseOutput: output,
testCaseDurationMs: durationMs,
datasetItemId: args.serializeDatasetItemId?.(args.testCase),
serializeTestCaseForHumanReview: args.serializeTestCaseForHumanReview,
serializeOutputForHumanReview: args.serializeOutputForHumanReview,
});
Expand All @@ -182,6 +184,7 @@ async function runTestCase<TestCaseType, OutputType>(args: {
testCaseHash: string;
evaluators: BaseTestEvaluator<TestCaseType, OutputType>[];
fn: (args: { testCase: TestCaseType }) => OutputType | Promise<OutputType>;
serializeDatasetItemId?: (testCase: TestCaseType) => string;
serializeTestCaseForHumanReview?: (
testCase: TestCaseType,
) => HumanReviewField[];
Expand All @@ -196,6 +199,7 @@ async function runTestCase<TestCaseType, OutputType>(args: {
testCase: args.testCase,
testCaseHash: args.testCaseHash,
fn: args.fn,
serializeDatasetItemId: args.serializeDatasetItemId,
serializeTestCaseForHumanReview: args.serializeTestCaseForHumanReview,
serializeOutputForHumanReview: args.serializeOutputForHumanReview,
});
Expand Down Expand Up @@ -247,6 +251,7 @@ async function runTestSuiteForGridCombo<TestCaseType, OutputType>(args: {
| ((testCase: TestCaseType) => string);
evaluators?: BaseTestEvaluator<TestCaseType, OutputType>[];
fn: (args: { testCase: TestCaseType }) => OutputType | Promise<OutputType>;
serializeDatasetItemId?: (testCase: TestCaseType) => string;
serializeTestCaseForHumanReview?: (
testCase: TestCaseType,
) => HumanReviewField[];
Expand Down Expand Up @@ -310,6 +315,7 @@ async function runTestSuiteForGridCombo<TestCaseType, OutputType>(args: {
testCaseHash,
evaluators: args.evaluators || [],
fn: args.fn,
serializeDatasetItemId: args.serializeDatasetItemId,
serializeTestCaseForHumanReview:
args.serializeTestCaseForHumanReview,
serializeOutputForHumanReview:
Expand Down Expand Up @@ -368,6 +374,7 @@ export async function runTestSuite<
evaluators?: BaseTestEvaluator<TestCaseType, OutputType>[];
// How many test cases to run concurrently
maxTestCaseConcurrency?: number;
serializeDatasetItemId?: (testCase: TestCaseType) => string; // Get the dataset item id from the test case
serializeTestCaseForHumanReview?: (
testCase: TestCaseType,
) => HumanReviewField[];
Expand Down Expand Up @@ -483,6 +490,7 @@ export async function runTestSuite<
testCaseHash: args.testCaseHash,
evaluators: args.evaluators,
fn: args.fn,
serializeDatasetItemId: args.serializeDatasetItemId,
serializeTestCaseForHumanReview: args.serializeTestCaseForHumanReview,
serializeOutputForHumanReview: args.serializeOutputForHumanReview,
});
Expand Down Expand Up @@ -527,6 +535,7 @@ export async function runTestSuite<
testCaseHash: args.testCaseHash,
evaluators: args.evaluators,
fn: args.fn,
serializeDatasetItemId: args.serializeDatasetItemId,
serializeTestCaseForHumanReview: args.serializeTestCaseForHumanReview,
serializeOutputForHumanReview: args.serializeOutputForHumanReview,
gridSearchRunGroupId,
Expand Down

0 comments on commit bc2f886

Please sign in to comment.