diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e43b0f9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store diff --git a/Arize_Tutorials/Tracing/Arize_Tutorial_Export_Phoenix_Traces_and_Evals_to_Arize.ipynb b/Arize_Tutorials/Tracing/Arize_Tutorial_Export_Phoenix_Traces_and_Evals_to_Arize.ipynb index d7a635c..a7ad950 100644 --- a/Arize_Tutorials/Tracing/Arize_Tutorial_Export_Phoenix_Traces_and_Evals_to_Arize.ipynb +++ b/Arize_Tutorials/Tracing/Arize_Tutorial_Export_Phoenix_Traces_and_Evals_to_Arize.ipynb @@ -43,16 +43,18 @@ "outputs": [], "source": [ "# Dependencies needed to build the Llama Index RAG application\n", - "!pip install -qq gcsfs llama-index-llms-openai llama-index-embeddings-openai\n", + "%pip install -qq gcsfs llama-index-llms-openai llama-index-embeddings-openai\n", "\n", "# Dependencies needed to export spans and send them to our collector: Phoenix\n", - "!pip install -qq \"llama-index-callbacks-arize-phoenix>=0.1.2\"\n", + "%pip install -qq \"llama-index-callbacks-arize-phoenix>=0.1.2\"\n", "\n", "# Install Phoenix to generate evaluations\n", - "!pip install -qq \"arize-phoenix[evals]\"\n", + "%pip install -qq arize-phoenix-client arize-phoenix-evals\n", "\n", "# Install Arize SDK with `Tracing` extra dependencies to export Phoenix data to Arize\n", - "!pip install -qq \"arize[Tracing]>=7.12.0\"" + "%pip install -qq \"arize[Tracing]>=7.12.0\"\n", + "\n", + "%pip install -qq \"ipywidgets\"" ] }, { @@ -70,9 +72,14 @@ "metadata": {}, "outputs": [], "source": [ + "import os\n", + "\n", "import phoenix as px\n", "\n", - "session = px.launch_app()" + "session = px.launch_app()\n", + "\n", + "# Update the environment variable to point to the Phoenix instance that we just launched\n", + "os.environ[\"PHOENIX_COLLECTOR_ENDPOINT\"] = session.url" ] }, { @@ -168,7 +175,7 @@ "from llama_index.embeddings.openai import OpenAIEmbedding\n", "\n", "\n", - "Settings.llm = OpenAI(model=\"gpt-4-turbo-preview\")\n", + "Settings.llm = OpenAI(model=\"gpt-5-nano\")\n", "Settings.embed_model = OpenAIEmbedding(model=\"text-embedding-ada-002\")\n", "index = load_index_from_storage(\n", " storage_context,\n", @@ -275,12 +282,12 @@ "metadata": {}, "outputs": [], "source": [ - "from phoenix.session.evaluation import get_qa_with_reference\n", + "from phoenix.client import Client\n", "\n", - "px_client = px.Client() # Define phoenix client\n", - "queries_df = get_qa_with_reference(\n", - " px_client\n", - ") # Get question, answer and reference data from phoenix" + "px_client = Client() # Define phoenix client\n", + "queries_df = px_client.spans.get_spans_dataframe(root_spans_only=True) # Get root spans (queries) as a dataframe\n", + "\n", + "queries_df.head()" ] }, { @@ -314,23 +321,29 @@ "metadata": {}, "outputs": [], "source": [ - "from phoenix.evals import (\n", - " HallucinationEvaluator,\n", - " OpenAIModel,\n", - " QAEvaluator,\n", - " run_evals,\n", + "from phoenix.evals.metrics.correctness import CorrectnessEvaluator\n", + "from phoenix.evals import LLM, async_evaluate_dataframe\n", + "\n", + "eval_model = LLM(\n", + " provider=\"openai\",\n", + " model=\"gpt-5-nano\",\n", ")\n", "\n", - "eval_model = OpenAIModel(\n", - " model=\"gpt-4-turbo-preview\",\n", + "correctness_evaluator = CorrectnessEvaluator(eval_model)\n", + "\n", + "eval_df = queries_df[[\"context.span_id\", \"attributes.input.value\", \"attributes.output.value\"]].copy()\n", + "eval_df.rename(\n", + " columns={\n", + " \"attributes.input.value\": \"input\",\n", + " \"attributes.output.value\": \"output\",\n", + " },\n", + " inplace=True,\n", ")\n", - "hallucination_evaluator = HallucinationEvaluator(eval_model)\n", - "qa_correctness_evaluator = QAEvaluator(eval_model)\n", + "eval_df.set_index(\"context.span_id\", inplace=True)\n", "\n", - "hallucination_eval_df, qa_correctness_eval_df = run_evals(\n", - " dataframe=queries_df,\n", - " evaluators=[hallucination_evaluator, qa_correctness_evaluator],\n", - " provide_explanation=True,\n", + "correctness_eval_df = await async_evaluate_dataframe(\n", + " dataframe=eval_df,\n", + " evaluators=[correctness_evaluator]\n", ")" ] }, @@ -347,14 +360,10 @@ "metadata": {}, "outputs": [], "source": [ - "from phoenix.trace import SpanEvaluations\n", + "from phoenix.evals.utils import to_annotation_dataframe\n", "\n", - "px_client.log_evaluations(\n", - " SpanEvaluations(eval_name=\"Hallucination\", dataframe=hallucination_eval_df),\n", - " SpanEvaluations(\n", - " eval_name=\"QA_Correctness\", dataframe=qa_correctness_eval_df\n", - " ),\n", - ")" + "annotation_df = to_annotation_dataframe(dataframe=correctness_eval_df)\n", + "Client().spans.log_span_annotations_dataframe(dataframe=annotation_df)" ] }, { @@ -379,8 +388,7 @@ "metadata": {}, "outputs": [], "source": [ - "tds = px_client.get_trace_dataset()\n", - "spans_df = tds.get_spans_dataframe(include_evaluations=False)\n", + "spans_df = px_client.spans.get_spans_dataframe()\n", "spans_df.head()" ] }, @@ -390,7 +398,26 @@ "metadata": {}, "outputs": [], "source": [ - "evals_df = tds.get_evals_dataframe()\n", + "evals_df = (\n", + " px_client.spans.get_span_annotations_dataframe(spans_dataframe=spans_df)\n", + " .reset_index()\n", + " .rename(\n", + " columns={\n", + " \"span_id\": \"context.span_id\",\n", + " \"result.label\": \"eval.Correctness.label\",\n", + " \"result.score\": \"eval.Correctness.score\",\n", + " \"result.explanation\": \"eval.Correctness.explanation\",\n", + " }\n", + " )\n", + " .set_index(\"context.span_id\")[\n", + " [\n", + " \"eval.Correctness.label\",\n", + " \"eval.Correctness.score\",\n", + " \"eval.Correctness.explanation\",\n", + " ]\n", + " ]\n", + ")\n", + "\n", "evals_df.head()" ] }, @@ -407,7 +434,7 @@ "metadata": {}, "outputs": [], "source": [ - "from arize.pandas.logger import Client" + "from arize import ArizeClient" ] }, { @@ -435,8 +462,7 @@ " \"✅ Import and Setup Arize Client Done! Now we can start using Arize!\"\n", " )\n", "\n", - "arize_client = Client(\n", - " space_id=SPACE_ID,\n", + "arize_client = ArizeClient(\n", " api_key=API_KEY,\n", ")\n", "model_id = \"tutorial-tracing-llama-index-rag-export-from-phoenix\"\n", @@ -456,11 +482,11 @@ "metadata": {}, "outputs": [], "source": [ - "response = arize_client.log_spans(\n", + "response = arize_client.spans.log(\n", + " space_id=SPACE_ID,\n", + " project_name=\"tracing-tutorial\",\n", " dataframe=spans_df,\n", " evals_dataframe=evals_df,\n", - " model_id=model_id,\n", - " model_version=model_version,\n", ")\n", "\n", "# If successful, the server will return a status_code of 200\n", @@ -469,13 +495,27 @@ " f\"❌ logging failed with response code {response.status_code}, {response.text}\"\n", " )\n", "else:\n", - " print(\"✅ You have successfully logged traces set to Arize\")" + " print(\"✅ You have successfully logged traces and evaluations to Arize\")" ] } ], "metadata": { + "kernelspec": { + "display_name": "3.12.13", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" } }, "nbformat": 4,