evals
No long description provided.
Installation
dagger install github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3
Entrypoint
Return Type
Evals !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
func (m *myModule) example() *Evals {
return dag.
Evals()
}
@function
def example() -> dag.Evals:
return (
dag.evals()
)
@func()
example(): Evals {
return dag
.evals()
}
Types
Evals 🔗
model() 🔗
Return Type
String !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
model
func (m *myModule) example(ctx context.Context) string {
return dag.
Evals().
Model(ctx)
}
@function
async def example() -> str:
return await (
dag.evals()
.model()
)
@func()
async example(): Promise<string> {
return dag
.evals()
.model()
}
attempt() 🔗
Return Type
Integer !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
attempt
func (m *myModule) example(ctx context.Context) int {
return dag.
Evals().
Attempt(ctx)
}
@function
async def example() -> int:
return await (
dag.evals()
.attempt()
)
@func()
async example(): Promise<number> {
return dag
.evals()
.attempt()
}
systemPrompt() 🔗
Return Type
String !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
system-prompt
func (m *myModule) example(ctx context.Context) string {
return dag.
Evals().
SystemPrompt(ctx)
}
@function
async def example() -> str:
return await (
dag.evals()
.system_prompt()
)
@func()
async example(): Promise<string> {
return dag
.evals()
.systemPrompt()
}
withAttempt() 🔗
Return Type
Evals !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
attempt | Integer ! | - | No description provided |
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
with-attempt --attempt integer
func (m *myModule) example(attempt int) *Evals {
return dag.
Evals().
WithAttempt(attempt)
}
@function
def example(attempt: int) -> dag.Evals:
return (
dag.evals()
.with_attempt(attempt)
)
@func()
example(attempt: number): Evals {
return dag
.evals()
.withAttempt(attempt)
}
withModel() 🔗
Return Type
Evals !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
model | String ! | - | No description provided |
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
with-model --model string
func (m *myModule) example(model string) *Evals {
return dag.
Evals().
WithModel(model)
}
@function
def example(model: str) -> dag.Evals:
return (
dag.evals()
.with_model(model)
)
@func()
example(model: string): Evals {
return dag
.evals()
.withModel(model)
}
withSystemPrompt() 🔗
Return Type
Evals !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
prompt | String ! | - | No description provided |
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
with-system-prompt --prompt string
func (m *myModule) example(prompt string) *Evals {
return dag.
Evals().
WithSystemPrompt(prompt)
}
@function
def example(prompt: str) -> dag.Evals:
return (
dag.evals()
.with_system_prompt(prompt)
)
@func()
example(prompt: string): Evals {
return dag
.evals()
.withSystemPrompt(prompt)
}
singleState() 🔗
Test that the model is conscious of a “current state” without needing explicit prompting.
Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
single-state
func (m *myModule) example() *EvalsReport {
return dag.
Evals().
SingleState()
}
@function
def example() -> dag.EvalsReport:
return (
dag.evals()
.single_state()
)
@func()
example(): EvalsReport {
return dag
.evals()
.singleState()
}
singleStateTransition() 🔗
Test that we’re able to transition back to our initial state, even when it’s not explicitly told its ID.
This tests that the state transition mechanic includes the previous state:
{"current":"Container#1","previous":"Hello#1"}
Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
single-state-transition
func (m *myModule) example() *EvalsReport {
return dag.
Evals().
SingleStateTransition()
}
@function
def example() -> dag.EvalsReport:
return (
dag.evals()
.single_state_transition()
)
@func()
example(): EvalsReport {
return dag
.evals()
.singleStateTransition()
}
undoSingle() 🔗
Test the model’s eagerness to switch to prior states instead of mutating the current state to undo past actions.
Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
undo-single
func (m *myModule) example() *EvalsReport {
return dag.
Evals().
UndoSingle()
}
@function
def example() -> dag.EvalsReport:
return (
dag.evals()
.undo_single()
)
@func()
example(): EvalsReport {
return dag
.evals()
.undoSingle()
}
buildMulti() 🔗
Test the model’s ability to pass objects around to one another and execute a series of operations given at once.
Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
build-multi
func (m *myModule) example() *EvalsReport {
return dag.
Evals().
BuildMulti()
}
@function
def example() -> dag.EvalsReport:
return (
dag.evals()
.build_multi()
)
@func()
example(): EvalsReport {
return dag
.evals()
.buildMulti()
}
buildMultiNoVar() 🔗
BuildMulti is like BuildMulti but without explicitly referencing the relevant objects, leaving the LLM to figure it out.
Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
build-multi-no-var
func (m *myModule) example() *EvalsReport {
return dag.
Evals().
BuildMultiNoVar()
}
@function
def example() -> dag.EvalsReport:
return (
dag.evals()
.build_multi_no_var()
)
@func()
example(): EvalsReport {
return dag
.evals()
.buildMultiNoVar()
}
readImplicitVars() 🔗
Test that the LLM is able to access the content of variables without the user having to expand them in the prompt.
SUCCESS RATE (ballpark): - claude-3-7-sonnet-latest: 100% - gpt-4o: 100% - gemini-2.0-flash: 0%
Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
read-implicit-vars
func (m *myModule) example() *EvalsReport {
return dag.
Evals().
ReadImplicitVars()
}
@function
def example() -> dag.EvalsReport:
return (
dag.evals()
.read_implicit_vars()
)
@func()
example(): EvalsReport {
return dag
.evals()
.readImplicitVars()
}
lLm() 🔗
Return Type
LLM !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
l-l-m
func (m *myModule) example() *LLM {
return dag.
Evals().
LLM()
}
@function
def example() -> dag.LLM:
return (
dag.evals()
.l_l_m()
)
@func()
example(): LLM {
return dag
.evals()
.lLM()
}
Report 🔗
succeeded() 🔗
Return Type
Boolean !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
read-implicit-vars \
succeeded
func (m *myModule) example(ctx context.Context) bool {
return dag.
Evals().
ReadImplicitVars().
Succeeded(ctx)
}
@function
async def example() -> bool:
return await (
dag.evals()
.read_implicit_vars()
.succeeded()
)
@func()
async example(): Promise<boolean> {
return dag
.evals()
.readImplicitVars()
.succeeded()
}
report() 🔗
Return Type
String !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
read-implicit-vars \
report
func (m *myModule) example(ctx context.Context) string {
return dag.
Evals().
ReadImplicitVars().
Report(ctx)
}
@function
async def example() -> str:
return await (
dag.evals()
.read_implicit_vars()
.report()
)
@func()
async example(): Promise<string> {
return dag
.evals()
.readImplicitVars()
.report()
}