Dagger
Search

evals

No long description provided.

Installation

dagger install github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3

Entrypoint

Return Type
Evals !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
func (m *myModule) example() *Evals  {
	return dag.
			Evals()
}
@function
def example() -> dag.Evals:
	return (
		dag.evals()
	)
@func()
example(): Evals {
	return dag
		.evals()
}

Types

Evals 🔗

model() 🔗

Return Type
String !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 model
func (m *myModule) example(ctx context.Context) string  {
	return dag.
			Evals().
			Model(ctx)
}
@function
async def example() -> str:
	return await (
		dag.evals()
		.model()
	)
@func()
async example(): Promise<string> {
	return dag
		.evals()
		.model()
}

attempt() 🔗

Return Type
Integer !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 attempt
func (m *myModule) example(ctx context.Context) int  {
	return dag.
			Evals().
			Attempt(ctx)
}
@function
async def example() -> int:
	return await (
		dag.evals()
		.attempt()
	)
@func()
async example(): Promise<number> {
	return dag
		.evals()
		.attempt()
}

systemPrompt() 🔗

Return Type
String !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 system-prompt
func (m *myModule) example(ctx context.Context) string  {
	return dag.
			Evals().
			SystemPrompt(ctx)
}
@function
async def example() -> str:
	return await (
		dag.evals()
		.system_prompt()
	)
@func()
async example(): Promise<string> {
	return dag
		.evals()
		.systemPrompt()
}

withAttempt() 🔗

Return Type
Evals !
Arguments
NameTypeDefault ValueDescription
attemptInteger !-No description provided
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 with-attempt --attempt integer
func (m *myModule) example(attempt int) *Evals  {
	return dag.
			Evals().
			WithAttempt(attempt)
}
@function
def example(attempt: int) -> dag.Evals:
	return (
		dag.evals()
		.with_attempt(attempt)
	)
@func()
example(attempt: number): Evals {
	return dag
		.evals()
		.withAttempt(attempt)
}

withModel() 🔗

Return Type
Evals !
Arguments
NameTypeDefault ValueDescription
modelString !-No description provided
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 with-model --model string
func (m *myModule) example(model string) *Evals  {
	return dag.
			Evals().
			WithModel(model)
}
@function
def example(model: str) -> dag.Evals:
	return (
		dag.evals()
		.with_model(model)
	)
@func()
example(model: string): Evals {
	return dag
		.evals()
		.withModel(model)
}

withSystemPrompt() 🔗

Return Type
Evals !
Arguments
NameTypeDefault ValueDescription
promptString !-No description provided
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 with-system-prompt --prompt string
func (m *myModule) example(prompt string) *Evals  {
	return dag.
			Evals().
			WithSystemPrompt(prompt)
}
@function
def example(prompt: str) -> dag.Evals:
	return (
		dag.evals()
		.with_system_prompt(prompt)
	)
@func()
example(prompt: string): Evals {
	return dag
		.evals()
		.withSystemPrompt(prompt)
}

singleState() 🔗

Test that the model is conscious of a “current state” without needing explicit prompting.

Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 single-state
func (m *myModule) example() *EvalsReport  {
	return dag.
			Evals().
			SingleState()
}
@function
def example() -> dag.EvalsReport:
	return (
		dag.evals()
		.single_state()
	)
@func()
example(): EvalsReport {
	return dag
		.evals()
		.singleState()
}

singleStateTransition() 🔗

Test that we’re able to transition back to our initial state, even when it’s not explicitly told its ID.

This tests that the state transition mechanic includes the previous state:

{"current":"Container#1","previous":"Hello#1"}
Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 single-state-transition
func (m *myModule) example() *EvalsReport  {
	return dag.
			Evals().
			SingleStateTransition()
}
@function
def example() -> dag.EvalsReport:
	return (
		dag.evals()
		.single_state_transition()
	)
@func()
example(): EvalsReport {
	return dag
		.evals()
		.singleStateTransition()
}

undoSingle() 🔗

Test the model’s eagerness to switch to prior states instead of mutating the current state to undo past actions.

Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 undo-single
func (m *myModule) example() *EvalsReport  {
	return dag.
			Evals().
			UndoSingle()
}
@function
def example() -> dag.EvalsReport:
	return (
		dag.evals()
		.undo_single()
	)
@func()
example(): EvalsReport {
	return dag
		.evals()
		.undoSingle()
}

buildMulti() 🔗

Test the model’s ability to pass objects around to one another and execute a series of operations given at once.

Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 build-multi
func (m *myModule) example() *EvalsReport  {
	return dag.
			Evals().
			BuildMulti()
}
@function
def example() -> dag.EvalsReport:
	return (
		dag.evals()
		.build_multi()
	)
@func()
example(): EvalsReport {
	return dag
		.evals()
		.buildMulti()
}

buildMultiNoVar() 🔗

BuildMulti is like BuildMulti but without explicitly referencing the relevant objects, leaving the LLM to figure it out.

Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 build-multi-no-var
func (m *myModule) example() *EvalsReport  {
	return dag.
			Evals().
			BuildMultiNoVar()
}
@function
def example() -> dag.EvalsReport:
	return (
		dag.evals()
		.build_multi_no_var()
	)
@func()
example(): EvalsReport {
	return dag
		.evals()
		.buildMultiNoVar()
}

readImplicitVars() 🔗

Test that the LLM is able to access the content of variables without the user having to expand them in the prompt.

SUCCESS RATE (ballpark): - claude-3-7-sonnet-latest: 100% - gpt-4o: 100% - gemini-2.0-flash: 0%

Return Type
Report !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 read-implicit-vars
func (m *myModule) example() *EvalsReport  {
	return dag.
			Evals().
			ReadImplicitVars()
}
@function
def example() -> dag.EvalsReport:
	return (
		dag.evals()
		.read_implicit_vars()
	)
@func()
example(): EvalsReport {
	return dag
		.evals()
		.readImplicitVars()
}

lLm() 🔗

Return Type
LLM !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 l-l-m
func (m *myModule) example() *LLM  {
	return dag.
			Evals().
			LLM()
}
@function
def example() -> dag.LLM:
	return (
		dag.evals()
		.l_l_m()
	)
@func()
example(): LLM {
	return dag
		.evals()
		.lLM()
}

Report 🔗

succeeded() 🔗

Return Type
Boolean !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 read-implicit-vars \
 succeeded
func (m *myModule) example(ctx context.Context) bool  {
	return dag.
			Evals().
			ReadImplicitVars().
			Succeeded(ctx)
}
@function
async def example() -> bool:
	return await (
		dag.evals()
		.read_implicit_vars()
		.succeeded()
	)
@func()
async example(): Promise<boolean> {
	return dag
		.evals()
		.readImplicitVars()
		.succeeded()
}

report() 🔗

Return Type
String !
Example
dagger -m github.com/vito/daggerverse/botsbuildingbots/evals@ac90240ed0651b92328d78dea13ae96d8791e1c3 call \
 read-implicit-vars \
 report
func (m *myModule) example(ctx context.Context) string  {
	return dag.
			Evals().
			ReadImplicitVars().
			Report(ctx)
}
@function
async def example() -> str:
	return await (
		dag.evals()
		.read_implicit_vars()
		.report()
	)
@func()
async example(): Promise<string> {
	return dag
		.evals()
		.readImplicitVars()
		.report()
}