workspace
This module provides the core workspace functionality for running evaluationsagainst various AI models, managing system prompts, and analyzing results.
It is intended for internal use within the Evaluator.
Installation
dagger install github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886
Entrypoint
Return Type
Workspace
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
func (m *MyModule) Example() *dagger.Workspace {
return dag.
Workspace()
}
@function
def example() -> dagger.Workspace:
return (
dag.workspace()
)
@func()
example(): Workspace {
return dag
.workspace()
}
Types
Workspace 🔗
systemPrompt() 🔗
The current system prompt.
Return Type
String !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
system-prompt
func (m *MyModule) Example(ctx context.Context) string {
return dag.
Workspace().
SystemPrompt(ctx)
}
@function
async def example() -> str:
return await (
dag.workspace()
.system_prompt()
)
@func()
async example(): Promise<string> {
return dag
.workspace()
.systemPrompt()
}
disableDefaultSystemPrompt() 🔗
Whether to disable Dagger’s built-in system prompt.
Return Type
Boolean !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
disable-default-system-prompt
func (m *MyModule) Example(ctx context.Context) bool {
return dag.
Workspace().
DisableDefaultSystemPrompt(ctx)
}
@function
async def example() -> bool:
return await (
dag.workspace()
.disable_default_system_prompt()
)
@func()
async example(): Promise<boolean> {
return dag
.workspace()
.disableDefaultSystemPrompt()
}
evals() 🔗
Evaluations to perform.
Return Type
[Interface ! ] !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
evals
func (m *MyModule) Example() [] {
return dag.
Workspace().
Evals()
}
@function
def example() -> List[]:
return (
dag.workspace()
.evals()
)
@func()
example(): [] {
return dag
.workspace()
.evals()
}
findings() 🔗
Observations made throughout running evaluations.
Return Type
[String ! ] !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
findings
func (m *MyModule) Example(ctx context.Context) []string {
return dag.
Workspace().
Findings(ctx)
}
@function
async def example() -> List[str]:
return await (
dag.workspace()
.findings()
)
@func()
async example(): Promise<string[]> {
return dag
.workspace()
.findings()
}
withoutDefaultSystemPrompt() 🔗
Set the system prompt for future evaluations.
Return Type
Workspace !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
without-default-system-prompt
func (m *MyModule) Example() *dagger.Workspace {
return dag.
Workspace().
WithoutDefaultSystemPrompt()
}
@function
def example() -> dagger.Workspace:
return (
dag.workspace()
.without_default_system_prompt()
)
@func()
example(): Workspace {
return dag
.workspace()
.withoutDefaultSystemPrompt()
}
withSystemPrompt() 🔗
Set the system prompt for future evaluations.
Return Type
Workspace !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
prompt | String ! | - | The system prompt to use for evaluations. |
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
with-system-prompt --prompt string
func (m *MyModule) Example(prompt string) *dagger.Workspace {
return dag.
Workspace().
WithSystemPrompt(prompt)
}
@function
def example(prompt: str) -> dagger.Workspace:
return (
dag.workspace()
.with_system_prompt(prompt)
)
@func()
example(prompt: string): Workspace {
return dag
.workspace()
.withSystemPrompt(prompt)
}
withSystemPromptFile() 🔗
Set the system prompt for future evaluations.
Return Type
Workspace !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
file | File ! | - | The file containing the system prompt to use. |
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
with-system-prompt-file --file file:path
func (m *MyModule) Example(file *dagger.File) *dagger.Workspace {
return dag.
Workspace().
WithSystemPromptFile(file)
}
@function
def example(file: dagger.File) -> dagger.Workspace:
return (
dag.workspace()
.with_system_prompt_file(file)
)
@func()
example(file: File): Workspace {
return dag
.workspace()
.withSystemPromptFile(file)
}
backoff() 🔗
Backoff sleeps for the given duration in seconds.
Use this if you’re getting rate limited and have nothing better to do.
Return Type
Workspace !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
seconds | Integer ! | - | Number of seconds to sleep. |
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
backoff --seconds integer
func (m *MyModule) Example(seconds int) *dagger.Workspace {
return dag.
Workspace().
Backoff(seconds)
}
@function
def example(seconds: int) -> dagger.Workspace:
return (
dag.workspace()
.backoff(seconds)
)
@func()
example(seconds: number): Workspace {
return dag
.workspace()
.backoff(seconds)
}
withEval() 🔗
Register an eval to perform.
Return Type
Workspace !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
eval | Interface ! | - | The evaluation to add to the workspace. |
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
with-eval
func (m *MyModule) Example(eval ) *dagger.Workspace {
return dag.
Workspace().
WithEval(eval)
}
@function
def example(eval: ) -> dagger.Workspace:
return (
dag.workspace()
.with_eval(eval)
)
@func()
example(eval: ): Workspace {
return dag
.workspace()
.withEval(eval)
}
withEvals() 🔗
Register evals to perform.
Return Type
Workspace !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
evals | [Interface ! ] ! | - | The list of evaluations to add to the workspace. |
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
with-evals
func (m *MyModule) Example(evals []) *dagger.Workspace {
return dag.
Workspace().
WithEvals(evals)
}
@function
def example(evals: List[]) -> dagger.Workspace:
return (
dag.workspace()
.with_evals(evals)
)
@func()
example(evals: []): Workspace {
return dag
.workspace()
.withEvals(evals)
}
evalNames() 🔗
The list of possible evals you can run.
Return Type
[String ! ] !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
eval-names
func (m *MyModule) Example(ctx context.Context) []string {
return dag.
Workspace().
EvalNames(ctx)
}
@function
async def example() -> List[str]:
return await (
dag.workspace()
.eval_names()
)
@func()
async example(): Promise<string[]> {
return dag
.workspace()
.evalNames()
}
knownModels() 🔗
The list of models that you can run evaluations against.
Return Type
[String ! ] !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
known-models
func (m *MyModule) Example(ctx context.Context) []string {
return dag.
Workspace().
KnownModels(ctx)
}
@function
async def example() -> List[str]:
return await (
dag.workspace()
.known_models()
)
@func()
async example(): Promise<string[]> {
return dag
.workspace()
.knownModels()
}
withFinding() 🔗
Record an interesting finding after performing evaluations.
Return Type
Workspace !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
finding | String ! | - | The finding or observation to record. |
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
with-finding --finding string
func (m *MyModule) Example(finding string) *dagger.Workspace {
return dag.
Workspace().
WithFinding(finding)
}
@function
def example(finding: str) -> dagger.Workspace:
return (
dag.workspace()
.with_finding(finding)
)
@func()
example(finding: string): Workspace {
return dag
.workspace()
.withFinding(finding)
}
evaluate() 🔗
Run an evaluation and return its report.
Return Type
AttemptsReport !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
name | String ! | - | The evaluation to run. For a list of possible values, call evalNames. |
model | String ! | "" | The model to evaluate. |
attempts | Integer | - | The number of attempts to evaluate across. Has a sane default per-provider. |
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
evaluate --name string --model string
func (m *MyModule) Example(name string, model string) *dagger.WorkspaceAttemptsReport {
return dag.
Workspace().
Evaluate(name, model)
}
@function
def example(name: str, model: str) -> dagger.WorkspaceAttemptsReport:
return (
dag.workspace()
.evaluate(name, model)
)
@func()
example(name: string, model: string): WorkspaceAttemptsReport {
return dag
.workspace()
.evaluate(name, model)
}
AttemptsReport 🔗
AttemptsReport contains the aggregated results from multiple evaluation attempts.
report() 🔗
Return Type
String !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
evaluate --name string --model string \
report
func (m *MyModule) Example(ctx context.Context, name string, model string) string {
return dag.
Workspace().
Evaluate(name, model).
Report(ctx)
}
@function
async def example(name: str, model: str) -> str:
return await (
dag.workspace()
.evaluate(name, model)
.report()
)
@func()
async example(name: string, model: string): Promise<string> {
return dag
.workspace()
.evaluate(name, model)
.report()
}
successRate() 🔗
Return Type
Float !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
evaluate --name string --model string \
success-rate
func (m *MyModule) Example(name string, model string) {
return dag.
Workspace().
Evaluate(name, model).
SuccessRate()
}
@function
def example(name: str, model: str) -> :
return (
dag.workspace()
.evaluate(name, model)
.success_rate()
)
@func()
example(name: string, model: string): {
return dag
.workspace()
.evaluate(name, model)
.successRate()
}
succeededAttempts() 🔗
Return Type
Integer !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
evaluate --name string --model string \
succeeded-attempts
func (m *MyModule) Example(ctx context.Context, name string, model string) int {
return dag.
Workspace().
Evaluate(name, model).
SucceededAttempts(ctx)
}
@function
async def example(name: str, model: str) -> int:
return await (
dag.workspace()
.evaluate(name, model)
.succeeded_attempts()
)
@func()
async example(name: string, model: string): Promise<number> {
return dag
.workspace()
.evaluate(name, model)
.succeededAttempts()
}
totalAttempts() 🔗
Return Type
Integer !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
evaluate --name string --model string \
total-attempts
func (m *MyModule) Example(ctx context.Context, name string, model string) int {
return dag.
Workspace().
Evaluate(name, model).
TotalAttempts(ctx)
}
@function
async def example(name: str, model: str) -> int:
return await (
dag.workspace()
.evaluate(name, model)
.total_attempts()
)
@func()
async example(name: string, model: string): Promise<number> {
return dag
.workspace()
.evaluate(name, model)
.totalAttempts()
}
inputTokens() 🔗
Return Type
Integer !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
evaluate --name string --model string \
input-tokens
func (m *MyModule) Example(ctx context.Context, name string, model string) int {
return dag.
Workspace().
Evaluate(name, model).
InputTokens(ctx)
}
@function
async def example(name: str, model: str) -> int:
return await (
dag.workspace()
.evaluate(name, model)
.input_tokens()
)
@func()
async example(name: string, model: string): Promise<number> {
return dag
.workspace()
.evaluate(name, model)
.inputTokens()
}
outputTokens() 🔗
Return Type
Integer !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
evaluate --name string --model string \
output-tokens
func (m *MyModule) Example(ctx context.Context, name string, model string) int {
return dag.
Workspace().
Evaluate(name, model).
OutputTokens(ctx)
}
@function
async def example(name: str, model: str) -> int:
return await (
dag.workspace()
.evaluate(name, model)
.output_tokens()
)
@func()
async example(name: string, model: string): Promise<number> {
return dag
.workspace()
.evaluate(name, model)
.outputTokens()
}
cachedTokenReads() 🔗
Return Type
Integer !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
evaluate --name string --model string \
cached-token-reads
func (m *MyModule) Example(ctx context.Context, name string, model string) int {
return dag.
Workspace().
Evaluate(name, model).
CachedTokenReads(ctx)
}
@function
async def example(name: str, model: str) -> int:
return await (
dag.workspace()
.evaluate(name, model)
.cached_token_reads()
)
@func()
async example(name: string, model: string): Promise<number> {
return dag
.workspace()
.evaluate(name, model)
.cachedTokenReads()
}
cachedTokenWrites() 🔗
Return Type
Integer !
Example
dagger -m github.com/pythoninthegrass/dagger/modules/evaluator/workspace@2cee53d1d3e19fa5203b7dd91c9468d45ae04886 call \
evaluate --name string --model string \
cached-token-writes
func (m *MyModule) Example(ctx context.Context, name string, model string) int {
return dag.
Workspace().
Evaluate(name, model).
CachedTokenWrites(ctx)
}
@function
async def example(name: str, model: str) -> int:
return await (
dag.workspace()
.evaluate(name, model)
.cached_token_writes()
)
@func()
async example(name: string, model: string): Promise<number> {
return dag
.workspace()
.evaluate(name, model)
.cachedTokenWrites()
}