メトリクス ◦ LLM-as-a-Judge 76 from ddtrace.llmobs import LLMObs dataset = LLMObs.pull_dataset( project_name="project_name", dataset_name="dataset_name", ) experiment = LLMObs.experiment( name="experiment_name", description="description", task=task, dataset=dataset, evaluators=[evaluator], config={"version": "0.1.0"}, ) results = experiment.run()