evals
No long description provided.
Installation
dagger install github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3Entrypoint
Return Type
Evals ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
func (m *MyModule) Example() *dagger.Evals {
return dag.
Evals()
}@function
def example() -> dagger.Evals:
return (
dag.evals()
)@func()
example(): Evals {
return dag
.evals()
}Types
Evals 🔗
model() 🔗
Return Type
String ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
modelfunc (m *MyModule) Example(ctx context.Context) string {
return dag.
Evals().
Model(ctx)
}@function
async def example() -> str:
return await (
dag.evals()
.model()
)@func()
async example(): Promise<string> {
return dag
.evals()
.model()
}attempt() 🔗
Return Type
Integer ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
attemptfunc (m *MyModule) Example(ctx context.Context) int {
return dag.
Evals().
Attempt(ctx)
}@function
async def example() -> int:
return await (
dag.evals()
.attempt()
)@func()
async example(): Promise<number> {
return dag
.evals()
.attempt()
}systemPrompt() 🔗
Return Type
String ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
system-promptfunc (m *MyModule) Example(ctx context.Context) string {
return dag.
Evals().
SystemPrompt(ctx)
}@function
async def example() -> str:
return await (
dag.evals()
.system_prompt()
)@func()
async example(): Promise<string> {
return dag
.evals()
.systemPrompt()
}withAttempt() 🔗
Return Type
Evals !Arguments
| Name | Type | Default Value | Description |
|---|---|---|---|
| attempt | Integer ! | - | No description provided |
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
with-attempt --attempt integerfunc (m *MyModule) Example(attempt int) *dagger.Evals {
return dag.
Evals().
WithAttempt(attempt)
}@function
def example(attempt: int) -> dagger.Evals:
return (
dag.evals()
.with_attempt(attempt)
)@func()
example(attempt: number): Evals {
return dag
.evals()
.withAttempt(attempt)
}withModel() 🔗
Return Type
Evals !Arguments
| Name | Type | Default Value | Description |
|---|---|---|---|
| model | String ! | - | No description provided |
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
with-model --model stringfunc (m *MyModule) Example(model string) *dagger.Evals {
return dag.
Evals().
WithModel(model)
}@function
def example(model: str) -> dagger.Evals:
return (
dag.evals()
.with_model(model)
)@func()
example(model: string): Evals {
return dag
.evals()
.withModel(model)
}withSystemPrompt() 🔗
Return Type
Evals !Arguments
| Name | Type | Default Value | Description |
|---|---|---|---|
| prompt | String ! | - | No description provided |
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
with-system-prompt --prompt stringfunc (m *MyModule) Example(prompt string) *dagger.Evals {
return dag.
Evals().
WithSystemPrompt(prompt)
}@function
def example(prompt: str) -> dagger.Evals:
return (
dag.evals()
.with_system_prompt(prompt)
)@func()
example(prompt: string): Evals {
return dag
.evals()
.withSystemPrompt(prompt)
}singleState() 🔗
Test that the model is conscious of a “current state” without needing explicit prompting.
Return Type
Report ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
single-statefunc (m *MyModule) Example() *dagger.EvalsReport {
return dag.
Evals().
SingleState()
}@function
def example() -> dagger.EvalsReport:
return (
dag.evals()
.single_state()
)@func()
example(): EvalsReport {
return dag
.evals()
.singleState()
}singleStateTransition() 🔗
Test that we’re able to transition back to our initial state, even when it’s not explicitly told its ID.
This tests that the state transition mechanic includes the previous state:
{"current":"Container#1","previous":"Hello#1"}
Return Type
Report ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
single-state-transitionfunc (m *MyModule) Example() *dagger.EvalsReport {
return dag.
Evals().
SingleStateTransition()
}@function
def example() -> dagger.EvalsReport:
return (
dag.evals()
.single_state_transition()
)@func()
example(): EvalsReport {
return dag
.evals()
.singleStateTransition()
}undoSingle() 🔗
Test the model’s eagerness to switch to prior states instead of mutating the current state to undo past actions.
Return Type
Report ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
undo-singlefunc (m *MyModule) Example() *dagger.EvalsReport {
return dag.
Evals().
UndoSingle()
}@function
def example() -> dagger.EvalsReport:
return (
dag.evals()
.undo_single()
)@func()
example(): EvalsReport {
return dag
.evals()
.undoSingle()
}buildMulti() 🔗
Test the model’s ability to pass objects around to one another and execute a series of operations given at once.
Return Type
Report ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
build-multifunc (m *MyModule) Example() *dagger.EvalsReport {
return dag.
Evals().
BuildMulti()
}@function
def example() -> dagger.EvalsReport:
return (
dag.evals()
.build_multi()
)@func()
example(): EvalsReport {
return dag
.evals()
.buildMulti()
}buildMultiNoVar() 🔗
BuildMulti is like BuildMulti but without explicitly referencing the relevant objects, leaving the LLM to figure it out.
Return Type
Report ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
build-multi-no-varfunc (m *MyModule) Example() *dagger.EvalsReport {
return dag.
Evals().
BuildMultiNoVar()
}@function
def example() -> dagger.EvalsReport:
return (
dag.evals()
.build_multi_no_var()
)@func()
example(): EvalsReport {
return dag
.evals()
.buildMultiNoVar()
}readImplicitVars() 🔗
Test that the LLM is able to access the content of variables without the user having to expand them in the prompt.
SUCCESS RATE (ballpark): - claude-3-7-sonnet-latest: 100% - gpt-4o: 100% - gemini-2.0-flash: 0%
Return Type
Report ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
read-implicit-varsfunc (m *MyModule) Example() *dagger.EvalsReport {
return dag.
Evals().
ReadImplicitVars()
}@function
def example() -> dagger.EvalsReport:
return (
dag.evals()
.read_implicit_vars()
)@func()
example(): EvalsReport {
return dag
.evals()
.readImplicitVars()
}lLm() 🔗
Return Type
LLM ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
l-l-mfunc (m *MyModule) Example() *dagger.LLM {
return dag.
Evals().
LLM()
}@function
def example() -> dagger.LLM:
return (
dag.evals()
.l_l_m()
)@func()
example(): LLM {
return dag
.evals()
.lLM()
}Report 🔗
succeeded() 🔗
Return Type
Boolean ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
read-implicit-vars \
succeededfunc (m *MyModule) Example(ctx context.Context) bool {
return dag.
Evals().
ReadImplicitVars().
Succeeded(ctx)
}@function
async def example() -> bool:
return await (
dag.evals()
.read_implicit_vars()
.succeeded()
)@func()
async example(): Promise<boolean> {
return dag
.evals()
.readImplicitVars()
.succeeded()
}report() 🔗
Return Type
String ! Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
read-implicit-vars \
reportfunc (m *MyModule) Example(ctx context.Context) string {
return dag.
Evals().
ReadImplicitVars().
Report(ctx)
}@function
async def example() -> str:
return await (
dag.evals()
.read_implicit_vars()
.report()
)@func()
async example(): Promise<string> {
return dag
.evals()
.readImplicitVars()
.report()
}