Skip to content
This repository was archived by the owner on Apr 14, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.DS_Store
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,18 @@
"outputs": [],
"source": [
"# Dependencies needed to build the Llama Index RAG application\n",
"!pip install -qq gcsfs llama-index-llms-openai llama-index-embeddings-openai\n",
"%pip install -qq gcsfs llama-index-llms-openai llama-index-embeddings-openai\n",
"\n",
"# Dependencies needed to export spans and send them to our collector: Phoenix\n",
"!pip install -qq \"llama-index-callbacks-arize-phoenix>=0.1.2\"\n",
"%pip install -qq \"llama-index-callbacks-arize-phoenix>=0.1.2\"\n",
"\n",
"# Install Phoenix to generate evaluations\n",
"!pip install -qq \"arize-phoenix[evals]\"\n",
"%pip install -qq arize-phoenix-client arize-phoenix-evals\n",
"\n",
"# Install Arize SDK with `Tracing` extra dependencies to export Phoenix data to Arize\n",
"!pip install -qq \"arize[Tracing]>=7.12.0\""
"%pip install -qq \"arize[Tracing]>=7.12.0\"\n",
"\n",
"%pip install -qq \"ipywidgets\""
]
},
{
Expand All @@ -70,9 +72,14 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import phoenix as px\n",
"\n",
"session = px.launch_app()"
"session = px.launch_app()\n",
"\n",
"# Update the environment variable to point to the Phoenix instance that we just launched\n",
"os.environ[\"PHOENIX_COLLECTOR_ENDPOINT\"] = session.url"
]
},
{
Expand Down Expand Up @@ -168,7 +175,7 @@
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
"\n",
"\n",
"Settings.llm = OpenAI(model=\"gpt-4-turbo-preview\")\n",
"Settings.llm = OpenAI(model=\"gpt-5-nano\")\n",
"Settings.embed_model = OpenAIEmbedding(model=\"text-embedding-ada-002\")\n",
"index = load_index_from_storage(\n",
" storage_context,\n",
Expand Down Expand Up @@ -275,12 +282,12 @@
"metadata": {},
"outputs": [],
"source": [
"from phoenix.session.evaluation import get_qa_with_reference\n",
"from phoenix.client import Client\n",
"\n",
"px_client = px.Client() # Define phoenix client\n",
"queries_df = get_qa_with_reference(\n",
" px_client\n",
") # Get question, answer and reference data from phoenix"
"px_client = Client() # Define phoenix client\n",
"queries_df = px_client.spans.get_spans_dataframe(root_spans_only=True) # Get root spans (queries) as a dataframe\n",
"\n",
"queries_df.head()"
]
},
{
Expand Down Expand Up @@ -314,23 +321,29 @@
"metadata": {},
"outputs": [],
"source": [
"from phoenix.evals import (\n",
" HallucinationEvaluator,\n",
" OpenAIModel,\n",
" QAEvaluator,\n",
" run_evals,\n",
"from phoenix.evals.metrics.correctness import CorrectnessEvaluator\n",
"from phoenix.evals import LLM, async_evaluate_dataframe\n",
"\n",
"eval_model = LLM(\n",
" provider=\"openai\",\n",
" model=\"gpt-5-nano\",\n",
")\n",
"\n",
"eval_model = OpenAIModel(\n",
" model=\"gpt-4-turbo-preview\",\n",
"correctness_evaluator = CorrectnessEvaluator(eval_model)\n",
"\n",
"eval_df = queries_df[[\"context.span_id\", \"attributes.input.value\", \"attributes.output.value\"]].copy()\n",
"eval_df.rename(\n",
" columns={\n",
" \"attributes.input.value\": \"input\",\n",
" \"attributes.output.value\": \"output\",\n",
" },\n",
" inplace=True,\n",
")\n",
"hallucination_evaluator = HallucinationEvaluator(eval_model)\n",
"qa_correctness_evaluator = QAEvaluator(eval_model)\n",
"eval_df.set_index(\"context.span_id\", inplace=True)\n",
"\n",
"hallucination_eval_df, qa_correctness_eval_df = run_evals(\n",
" dataframe=queries_df,\n",
" evaluators=[hallucination_evaluator, qa_correctness_evaluator],\n",
" provide_explanation=True,\n",
"correctness_eval_df = await async_evaluate_dataframe(\n",
" dataframe=eval_df,\n",
" evaluators=[correctness_evaluator]\n",
")"
]
},
Expand All @@ -347,14 +360,10 @@
"metadata": {},
"outputs": [],
"source": [
"from phoenix.trace import SpanEvaluations\n",
"from phoenix.evals.utils import to_annotation_dataframe\n",
"\n",
"px_client.log_evaluations(\n",
" SpanEvaluations(eval_name=\"Hallucination\", dataframe=hallucination_eval_df),\n",
" SpanEvaluations(\n",
" eval_name=\"QA_Correctness\", dataframe=qa_correctness_eval_df\n",
" ),\n",
")"
"annotation_df = to_annotation_dataframe(dataframe=correctness_eval_df)\n",
"Client().spans.log_span_annotations_dataframe(dataframe=annotation_df)"
]
},
{
Expand All @@ -379,8 +388,7 @@
"metadata": {},
"outputs": [],
"source": [
"tds = px_client.get_trace_dataset()\n",
"spans_df = tds.get_spans_dataframe(include_evaluations=False)\n",
"spans_df = px_client.spans.get_spans_dataframe()\n",
"spans_df.head()"
]
},
Expand All @@ -390,7 +398,26 @@
"metadata": {},
"outputs": [],
"source": [
"evals_df = tds.get_evals_dataframe()\n",
"evals_df = (\n",
" px_client.spans.get_span_annotations_dataframe(spans_dataframe=spans_df)\n",
" .reset_index()\n",
" .rename(\n",
" columns={\n",
" \"span_id\": \"context.span_id\",\n",
" \"result.label\": \"eval.Correctness.label\",\n",
" \"result.score\": \"eval.Correctness.score\",\n",
" \"result.explanation\": \"eval.Correctness.explanation\",\n",
" }\n",
" )\n",
" .set_index(\"context.span_id\")[\n",
" [\n",
" \"eval.Correctness.label\",\n",
" \"eval.Correctness.score\",\n",
" \"eval.Correctness.explanation\",\n",
" ]\n",
" ]\n",
")\n",
"\n",
Comment on lines +403 to +420
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Entirely out of curiosity, why was all this verbosity not previously required?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤷 Not sure if it is a v7 vs v8 sdk thing, or a server side thing.

"evals_df.head()"
]
},
Expand All @@ -407,7 +434,7 @@
"metadata": {},
"outputs": [],
"source": [
"from arize.pandas.logger import Client"
"from arize import ArizeClient"
]
},
{
Expand Down Expand Up @@ -435,8 +462,7 @@
" \"✅ Import and Setup Arize Client Done! Now we can start using Arize!\"\n",
" )\n",
"\n",
"arize_client = Client(\n",
" space_id=SPACE_ID,\n",
"arize_client = ArizeClient(\n",
" api_key=API_KEY,\n",
")\n",
"model_id = \"tutorial-tracing-llama-index-rag-export-from-phoenix\"\n",
Expand All @@ -456,11 +482,11 @@
"metadata": {},
"outputs": [],
"source": [
"response = arize_client.log_spans(\n",
"response = arize_client.spans.log(\n",
" space_id=SPACE_ID,\n",
" project_name=\"tracing-tutorial\",\n",
" dataframe=spans_df,\n",
" evals_dataframe=evals_df,\n",
" model_id=model_id,\n",
" model_version=model_version,\n",
")\n",
"\n",
"# If successful, the server will return a status_code of 200\n",
Expand All @@ -469,13 +495,27 @@
" f\"❌ logging failed with response code {response.status_code}, {response.text}\"\n",
" )\n",
"else:\n",
" print(\"✅ You have successfully logged traces set to Arize\")"
" print(\"✅ You have successfully logged traces and evaluations to Arize\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "3.12.13",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.13"
}
},
"nbformat": 4,
Expand Down