from langsmith import Client# 'inputs' will come from your dataset.def dummy_target(inputs: dict) -> dict: return {"foo": 1, "bar": "two"}# 'inputs' will come from your dataset.# 'outputs' will come from your target function.def evaluator_one(inputs: dict, outputs: dict) -> bool: return outputs["foo"] == 2def evaluator_two(inputs: dict, outputs: dict) -> bool: return len(outputs["bar"]) < 3client = Client()results = client.evaluate( dummy_target, # <-- target function data="your-dataset-name", evaluators=[evaluator_one, evaluator_two], ...)
from langsmith import wrappersfrom openai import OpenAI# Optionally wrap the OpenAI client to automatically# trace all model calls.oai_client = wrappers.wrap_openai(OpenAI())def target(inputs: dict) -> dict: # This assumes your dataset has inputs with a 'messages' key. # You can update to match your dataset schema. messages = inputs["messages"] response = oai_client.chat.completions.create( messages=messages, model="gpt-4o-mini", ) return {"answer": response.choices[0].message.content}
from my_agent import agent # This is the function you will evaluate.def target(inputs: dict) -> dict: # This assumes your dataset has inputs with a `messages` key messages = inputs["messages"] # Replace `invoke` with whatever you use to call your agent response = agent.invoke({"messages": messages}) # This assumes your agent output is in the right format return response