workspace

This module provides the core workspace functionality for running evaluations
against various AI models, managing system prompts, and analyzing results.

It is intended for internal use within the Evaluator.

Installation

dagger install github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886

Entrypoint

Return Type

Workspace

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \

func (m *MyModule) Example() *dagger.Workspace  {
	return dag.
			Workspace()
}

@function
def example() -> dagger.Workspace:
	return (
		dag.workspace()
	)

@func()
example(): Workspace {
	return dag
		.workspace()
}

Types

Workspace 🔗

systemPrompt() 🔗

The current system prompt.

Return Type

String !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 system-prompt

func (m *MyModule) Example(ctx context.Context) string  {
	return dag.
			Workspace().
			SystemPrompt(ctx)
}

@function
async def example() -> str:
	return await (
		dag.workspace()
		.system_prompt()
	)

@func()
async example(): Promise<string> {
	return dag
		.workspace()
		.systemPrompt()
}

disableDefaultSystemPrompt() 🔗

Whether to disable Dagger’s built-in system prompt.

Return Type

Boolean !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 disable-default-system-prompt

func (m *MyModule) Example(ctx context.Context) bool  {
	return dag.
			Workspace().
			DisableDefaultSystemPrompt(ctx)
}

@function
async def example() -> bool:
	return await (
		dag.workspace()
		.disable_default_system_prompt()
	)

@func()
async example(): Promise<boolean> {
	return dag
		.workspace()
		.disableDefaultSystemPrompt()
}

evals() 🔗

Evaluations to perform.

Return Type

[Interface ! ] !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 evals

func (m *MyModule) Example() []  {
	return dag.
			Workspace().
			Evals()
}

@function
def example() -> List[]:
	return (
		dag.workspace()
		.evals()
	)

@func()
example(): [] {
	return dag
		.workspace()
		.evals()
}

findings() 🔗

Observations made throughout running evaluations.

Return Type

[String ! ] !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 findings

func (m *MyModule) Example(ctx context.Context) []string  {
	return dag.
			Workspace().
			Findings(ctx)
}

@function
async def example() -> List[str]:
	return await (
		dag.workspace()
		.findings()
	)

@func()
async example(): Promise<string[]> {
	return dag
		.workspace()
		.findings()
}

withoutDefaultSystemPrompt() 🔗

Set the system prompt for future evaluations.

Return Type

Workspace !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 without-default-system-prompt

func (m *MyModule) Example() *dagger.Workspace  {
	return dag.
			Workspace().
			WithoutDefaultSystemPrompt()
}

@function
def example() -> dagger.Workspace:
	return (
		dag.workspace()
		.without_default_system_prompt()
	)

@func()
example(): Workspace {
	return dag
		.workspace()
		.withoutDefaultSystemPrompt()
}

withSystemPrompt() 🔗

Set the system prompt for future evaluations.

Return Type

Workspace !

Arguments

Name	Type	Default Value	Description
prompt	String !	-	The system prompt to use for evaluations.

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 with-system-prompt --prompt string

func (m *MyModule) Example(prompt string) *dagger.Workspace  {
	return dag.
			Workspace().
			WithSystemPrompt(prompt)
}

@function
def example(prompt: str) -> dagger.Workspace:
	return (
		dag.workspace()
		.with_system_prompt(prompt)
	)

@func()
example(prompt: string): Workspace {
	return dag
		.workspace()
		.withSystemPrompt(prompt)
}

withSystemPromptFile() 🔗

Set the system prompt for future evaluations.

Return Type

Workspace !

Arguments

Name	Type	Default Value	Description
file	File !	-	The file containing the system prompt to use.

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 with-system-prompt-file --file file:path

func (m *MyModule) Example(file *dagger.File) *dagger.Workspace  {
	return dag.
			Workspace().
			WithSystemPromptFile(file)
}

@function
def example(file: dagger.File) -> dagger.Workspace:
	return (
		dag.workspace()
		.with_system_prompt_file(file)
	)

@func()
example(file: File): Workspace {
	return dag
		.workspace()
		.withSystemPromptFile(file)
}

backoff() 🔗

Backoff sleeps for the given duration in seconds.

Use this if you’re getting rate limited and have nothing better to do.

Return Type

Workspace !

Arguments

Name	Type	Default Value	Description
seconds	Integer !	-	Number of seconds to sleep.

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 backoff --seconds integer

func (m *MyModule) Example(seconds int) *dagger.Workspace  {
	return dag.
			Workspace().
			Backoff(seconds)
}

@function
def example(seconds: int) -> dagger.Workspace:
	return (
		dag.workspace()
		.backoff(seconds)
	)

@func()
example(seconds: number): Workspace {
	return dag
		.workspace()
		.backoff(seconds)
}

withEval() 🔗

Return Type

Workspace !

Arguments

Name	Type	Default Value	Description
eval	Interface !	-	The evaluation to add to the workspace.

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 with-eval

func (m *MyModule) Example(eval ) *dagger.Workspace  {
	return dag.
			Workspace().
			WithEval(eval)
}

@function
def example(eval: ) -> dagger.Workspace:
	return (
		dag.workspace()
		.with_eval(eval)
	)

@func()
example(eval: ): Workspace {
	return dag
		.workspace()
		.withEval(eval)
}

withEvals() 🔗

Return Type

Workspace !

Arguments

Name	Type	Default Value	Description
evals	[Interface ! ] !	-	The list of evaluations to add to the workspace.

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 with-evals

func (m *MyModule) Example(evals []) *dagger.Workspace  {
	return dag.
			Workspace().
			WithEvals(evals)
}

@function
def example(evals: List[]) -> dagger.Workspace:
	return (
		dag.workspace()
		.with_evals(evals)
	)

@func()
example(evals: []): Workspace {
	return dag
		.workspace()
		.withEvals(evals)
}

evalNames() 🔗

The list of possible evals you can run.

Return Type

[String ! ] !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 eval-names

func (m *MyModule) Example(ctx context.Context) []string  {
	return dag.
			Workspace().
			EvalNames(ctx)
}

@function
async def example() -> List[str]:
	return await (
		dag.workspace()
		.eval_names()
	)

@func()
async example(): Promise<string[]> {
	return dag
		.workspace()
		.evalNames()
}

knownModels() 🔗

The list of models that you can run evaluations against.

Return Type

[String ! ] !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 known-models

func (m *MyModule) Example(ctx context.Context) []string  {
	return dag.
			Workspace().
			KnownModels(ctx)
}

@function
async def example() -> List[str]:
	return await (
		dag.workspace()
		.known_models()
	)

@func()
async example(): Promise<string[]> {
	return dag
		.workspace()
		.knownModels()
}

withFinding() 🔗

Record an interesting finding after performing evaluations.

Return Type

Workspace !

Arguments

Name	Type	Default Value	Description
finding	String !	-	The finding or observation to record.

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 with-finding --finding string

func (m *MyModule) Example(finding string) *dagger.Workspace  {
	return dag.
			Workspace().
			WithFinding(finding)
}

@function
def example(finding: str) -> dagger.Workspace:
	return (
		dag.workspace()
		.with_finding(finding)
	)

@func()
example(finding: string): Workspace {
	return dag
		.workspace()
		.withFinding(finding)
}

evaluate() 🔗

Run an evaluation and return its report.

Return Type

AttemptsReport !

Arguments

Name	Type	Default Value	Description
name	String !	-	The evaluation to run. For a list of possible values, call evalNames.
model	String !	""	The model to evaluate.
attempts	Integer	-	The number of attempts to evaluate across. Has a sane default per-provider.

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 evaluate --name string --model string

func (m *MyModule) Example(name string, model string) *dagger.WorkspaceAttemptsReport  {
	return dag.
			Workspace().
			Evaluate(name, model)
}

@function
def example(name: str, model: str) -> dagger.WorkspaceAttemptsReport:
	return (
		dag.workspace()
		.evaluate(name, model)
	)

@func()
example(name: string, model: string): WorkspaceAttemptsReport {
	return dag
		.workspace()
		.evaluate(name, model)
}

AttemptsReport 🔗

AttemptsReport contains the aggregated results from multiple evaluation attempts.

report() 🔗

Return Type

String !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 evaluate --name string --model string \
 report

func (m *MyModule) Example(ctx context.Context, name string, model string) string  {
	return dag.
			Workspace().
			Evaluate(name, model).
			Report(ctx)
}

@function
async def example(name: str, model: str) -> str:
	return await (
		dag.workspace()
		.evaluate(name, model)
		.report()
	)

@func()
async example(name: string, model: string): Promise<string> {
	return dag
		.workspace()
		.evaluate(name, model)
		.report()
}

successRate() 🔗

Return Type

Float !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 evaluate --name string --model string \
 success-rate

func (m *MyModule) Example(name string, model string)   {
	return dag.
			Workspace().
			Evaluate(name, model).
			SuccessRate()
}

@function
def example(name: str, model: str) -> :
	return (
		dag.workspace()
		.evaluate(name, model)
		.success_rate()
	)

@func()
example(name: string, model: string):  {
	return dag
		.workspace()
		.evaluate(name, model)
		.successRate()
}

succeededAttempts() 🔗

Return Type

Integer !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 evaluate --name string --model string \
 succeeded-attempts

func (m *MyModule) Example(ctx context.Context, name string, model string) int  {
	return dag.
			Workspace().
			Evaluate(name, model).
			SucceededAttempts(ctx)
}

@function
async def example(name: str, model: str) -> int:
	return await (
		dag.workspace()
		.evaluate(name, model)
		.succeeded_attempts()
	)

@func()
async example(name: string, model: string): Promise<number> {
	return dag
		.workspace()
		.evaluate(name, model)
		.succeededAttempts()
}

totalAttempts() 🔗

Return Type

Integer !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 evaluate --name string --model string \
 total-attempts

func (m *MyModule) Example(ctx context.Context, name string, model string) int  {
	return dag.
			Workspace().
			Evaluate(name, model).
			TotalAttempts(ctx)
}

@function
async def example(name: str, model: str) -> int:
	return await (
		dag.workspace()
		.evaluate(name, model)
		.total_attempts()
	)

@func()
async example(name: string, model: string): Promise<number> {
	return dag
		.workspace()
		.evaluate(name, model)
		.totalAttempts()
}

inputTokens() 🔗

Return Type

Integer !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 evaluate --name string --model string \
 input-tokens

func (m *MyModule) Example(ctx context.Context, name string, model string) int  {
	return dag.
			Workspace().
			Evaluate(name, model).
			InputTokens(ctx)
}

@function
async def example(name: str, model: str) -> int:
	return await (
		dag.workspace()
		.evaluate(name, model)
		.input_tokens()
	)

@func()
async example(name: string, model: string): Promise<number> {
	return dag
		.workspace()
		.evaluate(name, model)
		.inputTokens()
}

outputTokens() 🔗

Return Type

Integer !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 evaluate --name string --model string \
 output-tokens

func (m *MyModule) Example(ctx context.Context, name string, model string) int  {
	return dag.
			Workspace().
			Evaluate(name, model).
			OutputTokens(ctx)
}

@function
async def example(name: str, model: str) -> int:
	return await (
		dag.workspace()
		.evaluate(name, model)
		.output_tokens()
	)

@func()
async example(name: string, model: string): Promise<number> {
	return dag
		.workspace()
		.evaluate(name, model)
		.outputTokens()
}

cachedTokenReads() 🔗

Return Type

Integer !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 evaluate --name string --model string \
 cached-token-reads

func (m *MyModule) Example(ctx context.Context, name string, model string) int  {
	return dag.
			Workspace().
			Evaluate(name, model).
			CachedTokenReads(ctx)
}

@function
async def example(name: str, model: str) -> int:
	return await (
		dag.workspace()
		.evaluate(name, model)
		.cached_token_reads()
	)

@func()
async example(name: string, model: string): Promise<number> {
	return dag
		.workspace()
		.evaluate(name, model)
		.cachedTokenReads()
}

cachedTokenWrites() 🔗

Return Type

Integer !

Example

dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
 evaluate --name string --model string \
 cached-token-writes

func (m *MyModule) Example(ctx context.Context, name string, model string) int  {
	return dag.
			Workspace().
			Evaluate(name, model).
			CachedTokenWrites(ctx)
}

@function
async def example(name: str, model: str) -> int:
	return await (
		dag.workspace()
		.evaluate(name, model)
		.cached_token_writes()
	)

@func()
async example(name: string, model: string): Promise<number> {
	return dag
		.workspace()
		.evaluate(name, model)
		.cachedTokenWrites()
}