evaluator
No long description provided.
Installation
dagger install github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1
Entrypoint
Return Type
Evaluator !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
model | String | - | Model to use for the evaluator agent. |
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
func (m *MyModule) Example() *dagger.Evaluator {
return dag.
Evaluator()
}
@function
def example() -> dagger.Evaluator:
return (
dag.evaluator()
)
@func()
example(): Evaluator {
return dag
.evaluator()
}
Types
Evaluator 🔗
docs() 🔗
The documentation for the tool calling scheme to generate a prompt for.
Return Type
File !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
docs
func (m *MyModule) Example() *dagger.File {
return dag.
Evaluator().
Docs()
}
@function
def example() -> dagger.File:
return (
dag.evaluator()
.docs()
)
@func()
example(): File {
return dag
.evaluator()
.docs()
}
systemPrompt() 🔗
A system prompt to apply to all evals.
Return Type
File !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
system-prompt
func (m *MyModule) Example() *dagger.File {
return dag.
Evaluator().
SystemPrompt()
}
@function
def example() -> dagger.File:
return (
dag.evaluator()
.system_prompt()
)
@func()
example(): File {
return dag
.evaluator()
.systemPrompt()
}
disableDefaultSystemPrompt() 🔗
Whether to disable the default system prompt.
Return Type
Boolean !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
disable-default-system-prompt
func (m *MyModule) Example(ctx context.Context) bool {
return dag.
Evaluator().
DisableDefaultSystemPrompt(ctx)
}
@function
async def example() -> bool:
return await (
dag.evaluator()
.disable_default_system_prompt()
)
@func()
async example(): Promise<boolean> {
return dag
.evaluator()
.disableDefaultSystemPrompt()
}
evaluatorModel() 🔗
Return Type
String !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
evaluator-model
func (m *MyModule) Example(ctx context.Context) string {
return dag.
Evaluator().
EvaluatorModel(ctx)
}
@function
async def example() -> str:
return await (
dag.evaluator()
.evaluator_model()
)
@func()
async example(): Promise<string> {
return dag
.evaluator()
.evaluatorModel()
}
compare() 🔗
Return Type
String !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
before | File ! | - | No description provided |
after | File ! | - | No description provided |
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
compare --before file:path --after file:path
func (m *MyModule) Example(ctx context.Context, before *dagger.File, after *dagger.File) string {
return dag.
Evaluator().
Compare(ctx, before, after)
}
@function
async def example(before: dagger.File, after: dagger.File) -> str:
return await (
dag.evaluator()
.compare(before, after)
)
@func()
async example(before: File, after: File): Promise<string> {
return dag
.evaluator()
.compare(before, after)
}
withSystemPrompt() 🔗
Set a system prompt to be provided to the evals.
Return Type
Evaluator !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
prompt | String ! | - | No description provided |
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
with-system-prompt --prompt string
func (m *MyModule) Example(prompt string) *dagger.Evaluator {
return dag.
Evaluator().
WithSystemPrompt(prompt)
}
@function
def example(prompt: str) -> dagger.Evaluator:
return (
dag.evaluator()
.with_system_prompt(prompt)
)
@func()
example(prompt: string): Evaluator {
return dag
.evaluator()
.withSystemPrompt(prompt)
}
withSystemPromptFile() 🔗
Set a system prompt to be provided to the evals.
Return Type
Evaluator !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
file | File ! | - | No description provided |
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
with-system-prompt-file --file file:path
func (m *MyModule) Example(file *dagger.File) *dagger.Evaluator {
return dag.
Evaluator().
WithSystemPromptFile(file)
}
@function
def example(file: dagger.File) -> dagger.Evaluator:
return (
dag.evaluator()
.with_system_prompt_file(file)
)
@func()
example(file: File): Evaluator {
return dag
.evaluator()
.withSystemPromptFile(file)
}
withoutDefaultSystemPrompt() 🔗
Disable Dagger’s built-in system prompt.
You probably don’t need to use this - Dagger’s system prompt provides the fundamentals for how the agent interacts with Dagger objects. This is primarily exposed so that we (Dagger) can iteratively test the default system prompt itself.
Return Type
Evaluator !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
without-default-system-prompt
func (m *MyModule) Example() *dagger.Evaluator {
return dag.
Evaluator().
WithoutDefaultSystemPrompt()
}
@function
def example() -> dagger.Evaluator:
return (
dag.evaluator()
.without_default_system_prompt()
)
@func()
example(): Evaluator {
return dag
.evaluator()
.withoutDefaultSystemPrompt()
}
withDocs() 🔗
Set the full documentation the system prompt intends to effectuate.
Return Type
Evaluator !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
prompt | String ! | - | No description provided |
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
with-docs --prompt string
func (m *MyModule) Example(prompt string) *dagger.Evaluator {
return dag.
Evaluator().
WithDocs(prompt)
}
@function
def example(prompt: str) -> dagger.Evaluator:
return (
dag.evaluator()
.with_docs(prompt)
)
@func()
example(prompt: string): Evaluator {
return dag
.evaluator()
.withDocs(prompt)
}
withDocsFile() 🔗
Set the full documentation the system prompt intends to effectuate.
Return Type
Evaluator !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
file | File ! | - | No description provided |
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
with-docs-file --file file:path
func (m *MyModule) Example(file *dagger.File) *dagger.Evaluator {
return dag.
Evaluator().
WithDocsFile(file)
}
@function
def example(file: dagger.File) -> dagger.Evaluator:
return (
dag.evaluator()
.with_docs_file(file)
)
@func()
example(file: File): Evaluator {
return dag
.evaluator()
.withDocsFile(file)
}
withEval() 🔗
Return Type
Evaluator !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
eval | Interface ! | - | No description provided |
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
with-eval
func (m *MyModule) Example(eval ) *dagger.Evaluator {
return dag.
Evaluator().
WithEval(eval)
}
@function
def example(eval: ) -> dagger.Evaluator:
return (
dag.evaluator()
.with_eval(eval)
)
@func()
example(eval: ): Evaluator {
return dag
.evaluator()
.withEval(eval)
}
withEvals() 🔗
Return Type
Evaluator !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
evals | [Interface ! ] ! | - | No description provided |
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
with-evals
func (m *MyModule) Example(evals []) *dagger.Evaluator {
return dag.
Evaluator().
WithEvals(evals)
}
@function
def example(evals: List[]) -> dagger.Evaluator:
return (
dag.evaluator()
.with_evals(evals)
)
@func()
example(evals: []): Evaluator {
return dag
.evaluator()
.withEvals(evals)
}
evalsAcrossModels() 🔗
Run evals across models.
Models run in parallel, and evals run in series, with all attempts in parallel.
Return Type
EvalsAcrossModels !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
evals | [String ! ] | - | Evals to run. Defaults to all. |
models | [String ! ] | - | Models to run evals across. Defaults to all. |
attempts | Integer | - | Attempts to run each eval. Defaults to a per-provider value. |
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
evals-across-models
func (m *MyModule) Example() *dagger.EvaluatorEvalsAcrossModels {
return dag.
Evaluator().
EvalsAcrossModels()
}
@function
def example() -> dagger.EvaluatorEvalsAcrossModels:
return (
dag.evaluator()
.evals_across_models()
)
@func()
example(): EvaluatorEvalsAcrossModels {
return dag
.evaluator()
.evalsAcrossModels()
}
explore() 🔗
Return Type
[String ! ] !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
explore
func (m *MyModule) Example(ctx context.Context) []string {
return dag.
Evaluator().
Explore(ctx)
}
@function
async def example() -> List[str]:
return await (
dag.evaluator()
.explore()
)
@func()
async example(): Promise<string[]> {
return dag
.evaluator()
.explore()
}
generateSystemPrompt() 🔗
Return Type
String !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
generate-system-prompt
func (m *MyModule) Example(ctx context.Context) string {
return dag.
Evaluator().
GenerateSystemPrompt(ctx)
}
@function
async def example() -> str:
return await (
dag.evaluator()
.generate_system_prompt()
)
@func()
async example(): Promise<string> {
return dag
.evaluator()
.generateSystemPrompt()
}
iterate() 🔗
Iterate runs all evals across all models in a loop until all of the evals succeed, analyzing the failures and generating a new system prompt to course-correct.
Return Type
String !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
iterate
func (m *MyModule) Example(ctx context.Context) string {
return dag.
Evaluator().
Iterate(ctx)
}
@function
async def example() -> str:
return await (
dag.evaluator()
.iterate()
)
@func()
async example(): Promise<string> {
return dag
.evaluator()
.iterate()
}
EvalsAcrossModels 🔗
traceId() 🔗
Return Type
String !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
evals-across-models \
trace-id
func (m *MyModule) Example(ctx context.Context) string {
return dag.
Evaluator().
EvalsAcrossModels().
TraceId(ctx)
}
@function
async def example() -> str:
return await (
dag.evaluator()
.evals_across_models()
.trace_id()
)
@func()
async example(): Promise<string> {
return dag
.evaluator()
.evalsAcrossModels()
.traceId()
}
modelResults() 🔗
Return Type
[ModelResult ! ] !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
evals-across-models \
model-results
func (m *MyModule) Example() []*dagger.EvaluatorModelResult {
return dag.
Evaluator().
EvalsAcrossModels().
ModelResults()
}
@function
def example() -> List[dagger.EvaluatorModelResult]:
return (
dag.evaluator()
.evals_across_models()
.model_results()
)
@func()
example(): EvaluatorModelResult[] {
return dag
.evaluator()
.evalsAcrossModels()
.modelResults()
}
csv() 🔗
Return Type
String !
Arguments
Name | Type | Default Value | Description |
---|---|---|---|
noHeader | Boolean ! | false | Don't include a header. |
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
evals-across-models \
csv --no-header boolean
func (m *MyModule) Example(ctx context.Context, noHeader bool) string {
return dag.
Evaluator().
EvalsAcrossModels().
Csv(ctx, noHeader)
}
@function
async def example(no_header: bool) -> str:
return await (
dag.evaluator()
.evals_across_models()
.csv(no_header)
)
@func()
async example(noHeader: boolean): Promise<string> {
return dag
.evaluator()
.evalsAcrossModels()
.csv(noHeader)
}
check() 🔗
Return Type
Void !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
evals-across-models \
check
func (m *MyModule) Example(ctx context.Context) {
return dag.
Evaluator().
EvalsAcrossModels().
Check(ctx)
}
@function
async def example() -> None:
return await (
dag.evaluator()
.evals_across_models()
.check()
)
@func()
async example(): Promise<void> {
return dag
.evaluator()
.evalsAcrossModels()
.check()
}
analyzeAndGenerateSystemPrompt() 🔗
Return Type
String !
Example
dagger -m github.com/dagger/dagger/modules/evaluator@fd3753a7ba07e713ef353bf0dc93de2b72f807a1 call \
evals-across-models \
analyze-and-generate-system-prompt
func (m *MyModule) Example(ctx context.Context) string {
return dag.
Evaluator().
EvalsAcrossModels().
AnalyzeAndGenerateSystemPrompt(ctx)
}
@function
async def example() -> str:
return await (
dag.evaluator()
.evals_across_models()
.analyze_and_generate_system_prompt()
)
@func()
async example(): Promise<string> {
return dag
.evaluator()
.evalsAcrossModels()
.analyzeAndGenerateSystemPrompt()
}
ModelResult 🔗
modelName() 🔗
Return Type
String !
Example
Function EvaluatorModelResult.modelName is not accessible from the evaluator module
Function EvaluatorModelResult.modelName is not accessible from the evaluator module
Function EvaluatorModelResult.modelName is not accessible from the evaluator module
Function EvaluatorModelResult.modelName is not accessible from the evaluator module
spanId() 🔗
Return Type
String !
Example
Function EvaluatorModelResult.spanId is not accessible from the evaluator module
Function EvaluatorModelResult.spanId is not accessible from the evaluator module
Function EvaluatorModelResult.spanId is not accessible from the evaluator module
Function EvaluatorModelResult.spanId is not accessible from the evaluator module
evalReports() 🔗
Return Type
[EvalResult ! ] !
Example
Function EvaluatorModelResult.evalReports is not accessible from the evaluator module
Function EvaluatorModelResult.evalReports is not accessible from the evaluator module
Function EvaluatorModelResult.evalReports is not accessible from the evaluator module
Function EvaluatorModelResult.evalReports is not accessible from the evaluator module
check() 🔗
Return Type
Void !
Example
Function EvaluatorModelResult.check is not accessible from the evaluator module
Function EvaluatorModelResult.check is not accessible from the evaluator module
Function EvaluatorModelResult.check is not accessible from the evaluator module
Function EvaluatorModelResult.check is not accessible from the evaluator module
EvalResult 🔗
name() 🔗
Return Type
String !
Example
Function EvaluatorEvalResult.name is not accessible from the evaluator module
Function EvaluatorEvalResult.name is not accessible from the evaluator module
Function EvaluatorEvalResult.name is not accessible from the evaluator module
Function EvaluatorEvalResult.name is not accessible from the evaluator module
spanId() 🔗
Return Type
String !
Example
Function EvaluatorEvalResult.spanId is not accessible from the evaluator module
Function EvaluatorEvalResult.spanId is not accessible from the evaluator module
Function EvaluatorEvalResult.spanId is not accessible from the evaluator module
Function EvaluatorEvalResult.spanId is not accessible from the evaluator module
error() 🔗
Return Type
String !
Example
Function EvaluatorEvalResult.error is not accessible from the evaluator module
Function EvaluatorEvalResult.error is not accessible from the evaluator module
Function EvaluatorEvalResult.error is not accessible from the evaluator module
Function EvaluatorEvalResult.error is not accessible from the evaluator module
report() 🔗
Return Type
String !
Example
Function EvaluatorEvalResult.report is not accessible from the evaluator module
Function EvaluatorEvalResult.report is not accessible from the evaluator module
Function EvaluatorEvalResult.report is not accessible from the evaluator module
Function EvaluatorEvalResult.report is not accessible from the evaluator module
successRate() 🔗
Return Type
Float !
Example
Function EvaluatorEvalResult.successRate is not accessible from the evaluator module
Function EvaluatorEvalResult.successRate is not accessible from the evaluator module
Function EvaluatorEvalResult.successRate is not accessible from the evaluator module
Function EvaluatorEvalResult.successRate is not accessible from the evaluator module
totalAttempts() 🔗
Return Type
Integer !
Example
Function EvaluatorEvalResult.totalAttempts is not accessible from the evaluator module
Function EvaluatorEvalResult.totalAttempts is not accessible from the evaluator module
Function EvaluatorEvalResult.totalAttempts is not accessible from the evaluator module
Function EvaluatorEvalResult.totalAttempts is not accessible from the evaluator module
inputTokens() 🔗
Return Type
Integer !
Example
Function EvaluatorEvalResult.inputTokens is not accessible from the evaluator module
Function EvaluatorEvalResult.inputTokens is not accessible from the evaluator module
Function EvaluatorEvalResult.inputTokens is not accessible from the evaluator module
Function EvaluatorEvalResult.inputTokens is not accessible from the evaluator module
outputTokens() 🔗
Return Type
Integer !
Example
Function EvaluatorEvalResult.outputTokens is not accessible from the evaluator module
Function EvaluatorEvalResult.outputTokens is not accessible from the evaluator module
Function EvaluatorEvalResult.outputTokens is not accessible from the evaluator module
Function EvaluatorEvalResult.outputTokens is not accessible from the evaluator module
check() 🔗
Return Type
Void !
Example
Function EvaluatorEvalResult.check is not accessible from the evaluator module
Function EvaluatorEvalResult.check is not accessible from the evaluator module
Function EvaluatorEvalResult.check is not accessible from the evaluator module
Function EvaluatorEvalResult.check is not accessible from the evaluator module