evaluator

No long description provided.

Installation

dagger install github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1

Entrypoint

Return Type

Evaluator !

Arguments

Name	Type	Default Value	Description
model	String	-	Model to use for the evaluator agent.

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \

func (m *MyModule) Example() *dagger.Evaluator  {
	return dag.
			Evaluator()
}

@function
def example() -> dagger.Evaluator:
	return (
		dag.evaluator()
	)

@func()
example(): Evaluator {
	return dag
		.evaluator()
}

Types

Evaluator 🔗

docs() 🔗

The documentation for the tool calling scheme to generate a prompt for.

Return Type

File !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 docs

func (m *MyModule) Example() *dagger.File  {
	return dag.
			Evaluator().
			Docs()
}

@function
def example() -> dagger.File:
	return (
		dag.evaluator()
		.docs()
	)

@func()
example(): File {
	return dag
		.evaluator()
		.docs()
}

systemPrompt() 🔗

A system prompt to apply to all evals.

Return Type

File !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 system-prompt

func (m *MyModule) Example() *dagger.File  {
	return dag.
			Evaluator().
			SystemPrompt()
}

@function
def example() -> dagger.File:
	return (
		dag.evaluator()
		.system_prompt()
	)

@func()
example(): File {
	return dag
		.evaluator()
		.systemPrompt()
}

disableDefaultSystemPrompt() 🔗

Whether to disable the default system prompt.

Return Type

Boolean !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 disable-default-system-prompt

func (m *MyModule) Example(ctx context.Context) bool  {
	return dag.
			Evaluator().
			DisableDefaultSystemPrompt(ctx)
}

@function
async def example() -> bool:
	return await (
		dag.evaluator()
		.disable_default_system_prompt()
	)

@func()
async example(): Promise<boolean> {
	return dag
		.evaluator()
		.disableDefaultSystemPrompt()
}

evaluatorModel() 🔗

Return Type

String !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 evaluator-model

func (m *MyModule) Example(ctx context.Context) string  {
	return dag.
			Evaluator().
			EvaluatorModel(ctx)
}

@function
async def example() -> str:
	return await (
		dag.evaluator()
		.evaluator_model()
	)

@func()
async example(): Promise<string> {
	return dag
		.evaluator()
		.evaluatorModel()
}

compare() 🔗

Return Type

String !

Arguments

Name	Type	Default Value	Description
before	File !	-	No description provided
after	File !	-	No description provided

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 compare --before file:path --after file:path

func (m *MyModule) Example(ctx context.Context, before *dagger.File, after *dagger.File) string  {
	return dag.
			Evaluator().
			Compare(ctx, before, after)
}

@function
async def example(before: dagger.File, after: dagger.File) -> str:
	return await (
		dag.evaluator()
		.compare(before, after)
	)

@func()
async example(before: File, after: File): Promise<string> {
	return dag
		.evaluator()
		.compare(before, after)
}

withSystemPrompt() 🔗

Set a system prompt to be provided to the evals.

Return Type

Evaluator !

Arguments

Name	Type	Default Value	Description
prompt	String !	-	No description provided

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 with-system-prompt --prompt string

func (m *MyModule) Example(prompt string) *dagger.Evaluator  {
	return dag.
			Evaluator().
			WithSystemPrompt(prompt)
}

@function
def example(prompt: str) -> dagger.Evaluator:
	return (
		dag.evaluator()
		.with_system_prompt(prompt)
	)

@func()
example(prompt: string): Evaluator {
	return dag
		.evaluator()
		.withSystemPrompt(prompt)
}

withSystemPromptFile() 🔗

Set a system prompt to be provided to the evals.

Return Type

Evaluator !

Arguments

Name	Type	Default Value	Description
file	File !	-	No description provided

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 with-system-prompt-file --file file:path

func (m *MyModule) Example(file *dagger.File) *dagger.Evaluator  {
	return dag.
			Evaluator().
			WithSystemPromptFile(file)
}

@function
def example(file: dagger.File) -> dagger.Evaluator:
	return (
		dag.evaluator()
		.with_system_prompt_file(file)
	)

@func()
example(file: File): Evaluator {
	return dag
		.evaluator()
		.withSystemPromptFile(file)
}

withoutDefaultSystemPrompt() 🔗

Disable Dagger’s built-in system prompt.

You probably don’t need to use this - Dagger’s system prompt provides the fundamentals for how the agent interacts with Dagger objects. This is primarily exposed so that we (Dagger) can iteratively test the default system prompt itself.

Return Type

Evaluator !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 without-default-system-prompt

func (m *MyModule) Example() *dagger.Evaluator  {
	return dag.
			Evaluator().
			WithoutDefaultSystemPrompt()
}

@function
def example() -> dagger.Evaluator:
	return (
		dag.evaluator()
		.without_default_system_prompt()
	)

@func()
example(): Evaluator {
	return dag
		.evaluator()
		.withoutDefaultSystemPrompt()
}

withDocs() 🔗

Set the full documentation the system prompt intends to effectuate.

Return Type

Evaluator !

Arguments

Name	Type	Default Value	Description
prompt	String !	-	No description provided

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 with-docs --prompt string

func (m *MyModule) Example(prompt string) *dagger.Evaluator  {
	return dag.
			Evaluator().
			WithDocs(prompt)
}

@function
def example(prompt: str) -> dagger.Evaluator:
	return (
		dag.evaluator()
		.with_docs(prompt)
	)

@func()
example(prompt: string): Evaluator {
	return dag
		.evaluator()
		.withDocs(prompt)
}

withDocsFile() 🔗

Set the full documentation the system prompt intends to effectuate.

Return Type

Evaluator !

Arguments

Name	Type	Default Value	Description
file	File !	-	No description provided

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 with-docs-file --file file:path

func (m *MyModule) Example(file *dagger.File) *dagger.Evaluator  {
	return dag.
			Evaluator().
			WithDocsFile(file)
}

@function
def example(file: dagger.File) -> dagger.Evaluator:
	return (
		dag.evaluator()
		.with_docs_file(file)
	)

@func()
example(file: File): Evaluator {
	return dag
		.evaluator()
		.withDocsFile(file)
}

withEval() 🔗

Return Type

Evaluator !

Arguments

Name	Type	Default Value	Description
eval	Interface !	-	No description provided

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 with-eval

func (m *MyModule) Example(eval ) *dagger.Evaluator  {
	return dag.
			Evaluator().
			WithEval(eval)
}

@function
def example(eval: ) -> dagger.Evaluator:
	return (
		dag.evaluator()
		.with_eval(eval)
	)

@func()
example(eval: ): Evaluator {
	return dag
		.evaluator()
		.withEval(eval)
}

withEvals() 🔗

Return Type

Evaluator !

Arguments

Name	Type	Default Value	Description
evals	[Interface ! ] !	-	No description provided

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 with-evals

func (m *MyModule) Example(evals []) *dagger.Evaluator  {
	return dag.
			Evaluator().
			WithEvals(evals)
}

@function
def example(evals: List[]) -> dagger.Evaluator:
	return (
		dag.evaluator()
		.with_evals(evals)
	)

@func()
example(evals: []): Evaluator {
	return dag
		.evaluator()
		.withEvals(evals)
}

evalsAcrossModels() 🔗

Run evals across models.

Models run in parallel, and evals run in series, with all attempts in parallel.

Return Type

EvalsAcrossModels !

Arguments

Name	Type	Default Value	Description
evals	[String ! ]	-	Evals to run. Defaults to all.
models	[String ! ]	-	Models to run evals across. Defaults to all.
attempts	Integer	-	Attempts to run each eval. Defaults to a per-provider value.

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 evals-across-models

func (m *MyModule) Example() *dagger.EvaluatorEvalsAcrossModels  {
	return dag.
			Evaluator().
			EvalsAcrossModels()
}

@function
def example() -> dagger.EvaluatorEvalsAcrossModels:
	return (
		dag.evaluator()
		.evals_across_models()
	)

@func()
example(): EvaluatorEvalsAcrossModels {
	return dag
		.evaluator()
		.evalsAcrossModels()
}

explore() 🔗

Return Type

[String ! ] !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 explore

func (m *MyModule) Example(ctx context.Context) []string  {
	return dag.
			Evaluator().
			Explore(ctx)
}

@function
async def example() -> List[str]:
	return await (
		dag.evaluator()
		.explore()
	)

@func()
async example(): Promise<string[]> {
	return dag
		.evaluator()
		.explore()
}

generateSystemPrompt() 🔗

Return Type

String !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 generate-system-prompt

func (m *MyModule) Example(ctx context.Context) string  {
	return dag.
			Evaluator().
			GenerateSystemPrompt(ctx)
}

@function
async def example() -> str:
	return await (
		dag.evaluator()
		.generate_system_prompt()
	)

@func()
async example(): Promise<string> {
	return dag
		.evaluator()
		.generateSystemPrompt()
}

iterate() 🔗

Iterate runs all evals across all models in a loop until all of the evals succeed, analyzing the failures and generating a new system prompt to course-correct.

Return Type

String !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 iterate

func (m *MyModule) Example(ctx context.Context) string  {
	return dag.
			Evaluator().
			Iterate(ctx)
}

@function
async def example() -> str:
	return await (
		dag.evaluator()
		.iterate()
	)

@func()
async example(): Promise<string> {
	return dag
		.evaluator()
		.iterate()
}

EvalsAcrossModels 🔗

traceId() 🔗

Return Type

String !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 evals-across-models \
 trace-id

func (m *MyModule) Example(ctx context.Context) string  {
	return dag.
			Evaluator().
			EvalsAcrossModels().
			TraceId(ctx)
}

@function
async def example() -> str:
	return await (
		dag.evaluator()
		.evals_across_models()
		.trace_id()
	)

@func()
async example(): Promise<string> {
	return dag
		.evaluator()
		.evalsAcrossModels()
		.traceId()
}

modelResults() 🔗

Return Type

[ModelResult ! ] !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 evals-across-models \
 model-results

func (m *MyModule) Example() []*dagger.EvaluatorModelResult  {
	return dag.
			Evaluator().
			EvalsAcrossModels().
			ModelResults()
}

@function
def example() -> List[dagger.EvaluatorModelResult]:
	return (
		dag.evaluator()
		.evals_across_models()
		.model_results()
	)

@func()
example(): EvaluatorModelResult[] {
	return dag
		.evaluator()
		.evalsAcrossModels()
		.modelResults()
}

csv() 🔗

Return Type

String !

Arguments

Name	Type	Default Value	Description
noHeader	Boolean !	false	Don't include a header.

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 evals-across-models \
 csv --no-header boolean

func (m *MyModule) Example(ctx context.Context, noHeader bool) string  {
	return dag.
			Evaluator().
			EvalsAcrossModels().
			Csv(ctx, noHeader)
}

@function
async def example(no_header: bool) -> str:
	return await (
		dag.evaluator()
		.evals_across_models()
		.csv(no_header)
	)

@func()
async example(noHeader: boolean): Promise<string> {
	return dag
		.evaluator()
		.evalsAcrossModels()
		.csv(noHeader)
}

check() 🔗

Return Type

Void !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 evals-across-models \
 check

func (m *MyModule) Example(ctx context.Context)   {
	return dag.
			Evaluator().
			EvalsAcrossModels().
			Check(ctx)
}

@function
async def example() -> None:
	return await (
		dag.evaluator()
		.evals_across_models()
		.check()
	)

@func()
async example(): Promise<void> {
	return dag
		.evaluator()
		.evalsAcrossModels()
		.check()
}

analyzeAndGenerateSystemPrompt() 🔗

Return Type

String !

Example

dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
 evals-across-models \
 analyze-and-generate-system-prompt

func (m *MyModule) Example(ctx context.Context) string  {
	return dag.
			Evaluator().
			EvalsAcrossModels().
			AnalyzeAndGenerateSystemPrompt(ctx)
}

@function
async def example() -> str:
	return await (
		dag.evaluator()
		.evals_across_models()
		.analyze_and_generate_system_prompt()
	)

@func()
async example(): Promise<string> {
	return dag
		.evaluator()
		.evalsAcrossModels()
		.analyzeAndGenerateSystemPrompt()
}

ModelResult 🔗

modelName() 🔗

Return Type

String !

Example

Function EvaluatorModelResult.modelName is not accessible from the evaluator module

Function EvaluatorModelResult.modelName is not accessible from the evaluator module

Function EvaluatorModelResult.modelName is not accessible from the evaluator module

Function EvaluatorModelResult.modelName is not accessible from the evaluator module

spanId() 🔗

Return Type

String !

Example

Function EvaluatorModelResult.spanId is not accessible from the evaluator module

Function EvaluatorModelResult.spanId is not accessible from the evaluator module

Function EvaluatorModelResult.spanId is not accessible from the evaluator module

Function EvaluatorModelResult.spanId is not accessible from the evaluator module

evalReports() 🔗

Return Type

[EvalResult ! ] !

Example

Function EvaluatorModelResult.evalReports is not accessible from the evaluator module

Function EvaluatorModelResult.evalReports is not accessible from the evaluator module

Function EvaluatorModelResult.evalReports is not accessible from the evaluator module

Function EvaluatorModelResult.evalReports is not accessible from the evaluator module

check() 🔗

Return Type

Void !

Example

Function EvaluatorModelResult.check is not accessible from the evaluator module

Function EvaluatorModelResult.check is not accessible from the evaluator module

Function EvaluatorModelResult.check is not accessible from the evaluator module

Function EvaluatorModelResult.check is not accessible from the evaluator module

EvalResult 🔗

name() 🔗

Return Type

String !

Example

Function EvaluatorEvalResult.name is not accessible from the evaluator module

Function EvaluatorEvalResult.name is not accessible from the evaluator module

Function EvaluatorEvalResult.name is not accessible from the evaluator module

Function EvaluatorEvalResult.name is not accessible from the evaluator module

spanId() 🔗

Return Type

String !

Example

Function EvaluatorEvalResult.spanId is not accessible from the evaluator module

Function EvaluatorEvalResult.spanId is not accessible from the evaluator module

Function EvaluatorEvalResult.spanId is not accessible from the evaluator module

Function EvaluatorEvalResult.spanId is not accessible from the evaluator module

error() 🔗

Return Type

String !

Example

Function EvaluatorEvalResult.error is not accessible from the evaluator module

Function EvaluatorEvalResult.error is not accessible from the evaluator module

Function EvaluatorEvalResult.error is not accessible from the evaluator module

Function EvaluatorEvalResult.error is not accessible from the evaluator module

report() 🔗

Return Type

String !

Example

Function EvaluatorEvalResult.report is not accessible from the evaluator module

Function EvaluatorEvalResult.report is not accessible from the evaluator module

Function EvaluatorEvalResult.report is not accessible from the evaluator module

Function EvaluatorEvalResult.report is not accessible from the evaluator module

successRate() 🔗

Return Type

Float !

Example

Function EvaluatorEvalResult.successRate is not accessible from the evaluator module

Function EvaluatorEvalResult.successRate is not accessible from the evaluator module

Function EvaluatorEvalResult.successRate is not accessible from the evaluator module

Function EvaluatorEvalResult.successRate is not accessible from the evaluator module

totalAttempts() 🔗

Return Type

Integer !

Example

Function EvaluatorEvalResult.totalAttempts is not accessible from the evaluator module

Function EvaluatorEvalResult.totalAttempts is not accessible from the evaluator module

Function EvaluatorEvalResult.totalAttempts is not accessible from the evaluator module

Function EvaluatorEvalResult.totalAttempts is not accessible from the evaluator module

inputTokens() 🔗

Return Type

Integer !

Example

Function EvaluatorEvalResult.inputTokens is not accessible from the evaluator module

Function EvaluatorEvalResult.inputTokens is not accessible from the evaluator module

Function EvaluatorEvalResult.inputTokens is not accessible from the evaluator module

Function EvaluatorEvalResult.inputTokens is not accessible from the evaluator module

outputTokens() 🔗

Return Type

Integer !

Example

Function EvaluatorEvalResult.outputTokens is not accessible from the evaluator module

Function EvaluatorEvalResult.outputTokens is not accessible from the evaluator module

Function EvaluatorEvalResult.outputTokens is not accessible from the evaluator module

Function EvaluatorEvalResult.outputTokens is not accessible from the evaluator module

check() 🔗

Return Type

Void !

Example

Function EvaluatorEvalResult.check is not accessible from the evaluator module

Function EvaluatorEvalResult.check is not accessible from the evaluator module

Function EvaluatorEvalResult.check is not accessible from the evaluator module

Function EvaluatorEvalResult.check is not accessible from the evaluator module