arm · yejseo01 · Jun 25, 2026 · Jun 24, 2026 · Jun 25, 2026 · Jun 25, 2026
@@ -42,18 +42,10 @@
       "ref": "c2b2e4a672cda67832372f77aeb1d1f71beee9a7"
     },
     {
-      "name": "Topo CPU AI Chat",
-      "description": "Complete LLM chat application optimized for Arm CPU inference.\n\nThis project demonstrates running large language models on CPU\nusing llama.cpp compiled with Arm baseline optimizations and\naccelerated using NEON SIMD and SVE (when supported and enabled).\n\nThe stack includes:\n- llama.cpp server with Arm NEON optimizations (SVE optional)\n- Quantized SmolLM2-135M-Instruct model bundled in the image\n- Simple web-based chat interface\n- No GPU required - pure CPU inference\n\nPerfect for demos and testing! The bundled SmolLM2-135M-Instruct model\nallows the project to run immediately without downloading additional models.\n\nIdeal for testing LLM workloads on Arm hardware without GPU dependencies,\nshowcasing how far you can push NEON acceleration. Rebuild with SVE enabled\nwhen wider vectors are available.\n",
-      "features": [
-        "SVE",
-        "NEON"
-      ],
+      "name": "Topo llama.cpp WebUI Chat",
+      "description": "LLM chat application with Arm CPU inference provided by llama.cpp.\n\nThis project demonstrates running large language models on CPU\nwith inference provided by the llama.cpp server.\n\nThe upstream Linux Arm64 image includes architecture-specific CPU\nbackend variants for Armv8.0 baseline, Armv8.2 dot product/FP16/SVE,\nArmv8.6 int8 matrix multiply/SVE2, and Armv9.2 SME-capable CPUs.\n\nThe stack includes:\n- llama.cpp\n- Quantized SmolLM2 135M default model loaded through llama.cpp\n- Built-in web chat interface\n- No GPU required - pure CPU inference\n\nPerfect for demos and testing! The default SmolLM2-135M-Instruct model\ngives the project a small ready-to-use model reference by default.\n\nIdeal for testing LLM workloads on Arm hardware without GPU dependencies\nwhile avoiding a source build during Template deployment.\n",
+      "features": null,
       "args": {
-        "ENABLE_SVE": {
-          "description": "Enables building with SVE instructions (OFF/ON)",
-          "default": "OFF",
-          "example": "ON"
-        },
         "MODEL": {
           "description": "Model artifact reference. Use a Hugging Face GGUF repo ID, repo ID plus filename separated by ':', or a direct .gguf URL.",
           "default": "unsloth/SmolLM2-135M-Instruct-GGUF",
@@ -64,8 +56,8 @@
           }
         }
       },
-      "url": "https://github.com/Arm-Examples/topo-cpu-ai-chat.git",
-      "ref": "4eeaca0d65d952d4555cf6ac9cbecf75884a3812"
+      "url": "https://github.com/Arm-Examples/topo-llama-web-ui.git",
+      "ref": "99561df2838081fc7f7af9229b8e677b898bbd88"
     },
     {
       "name": "SIMD Visual Benchmark",

@@ -1,6 +1,6 @@
 [
 	{"repo": "Arm-Examples/topo-welcome", "sha": "8303e66db59a7a11e64877121f3db1b688d2011f"},
 	{"repo": "Arm-Examples/topo-lightbulb-moment", "sha": "c2b2e4a672cda67832372f77aeb1d1f71beee9a7"},
-	{"repo": "Arm-Examples/topo-cpu-ai-chat", "sha": "4eeaca0d65d952d4555cf6ac9cbecf75884a3812"},
+	{"repo": "Arm-Examples/topo-llama-web-ui", "sha": "99561df2838081fc7f7af9229b8e677b898bbd88"},
 	{"repo": "Arm-Examples/topo-simd-visual-benchmark", "sha": "f0cd31621ce79b4643df7e9bdd8eff26c20b338c"}
 ]