From f871d70f7f0f31e95130a76df9c3ea30ddef4f8b Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Thu, 25 Jun 2026 11:45:06 -0400 Subject: [PATCH 01/12] Implement Restart Agent from GUI --- comp/core/gui/impl/platform_darwin.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/comp/core/gui/impl/platform_darwin.go b/comp/core/gui/impl/platform_darwin.go index ef6f88be834e..b4a4e92d71db 100644 --- a/comp/core/gui/impl/platform_darwin.go +++ b/comp/core/gui/impl/platform_darwin.go @@ -7,6 +7,7 @@ package guiimpl import ( "errors" + "os/exec" template "github.com/DataDog/datadog-agent/pkg/template/html" ) @@ -26,9 +27,9 @@ const instructionTemplate = `{{define "loginInstruction" }} {{end}}` func restartEnabled() bool { - return false + return true } func restart() error { - return errors.New("restarting the agent is not implemented on non-windows platforms") + return exec.Command("/bin/launchctl", "kickstart", "-k", "system/com.datadoghq.agent").Start() } From d67e3639089d5411a3b2ad632e432aa1b851fc38 Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Thu, 25 Jun 2026 14:31:53 -0400 Subject: [PATCH 02/12] Remove unused import --- comp/core/gui/impl/platform_darwin.go | 1 - 1 file changed, 1 deletion(-) diff --git a/comp/core/gui/impl/platform_darwin.go b/comp/core/gui/impl/platform_darwin.go index b4a4e92d71db..b71fef8e0e48 100644 --- a/comp/core/gui/impl/platform_darwin.go +++ b/comp/core/gui/impl/platform_darwin.go @@ -6,7 +6,6 @@ package guiimpl import ( - "errors" "os/exec" template "github.com/DataDog/datadog-agent/pkg/template/html" From 5710f3e142a7c6a8af0e3ebff6757583101c6c22 Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Thu, 25 Jun 2026 15:16:53 -0400 Subject: [PATCH 03/12] Implement code review --- comp/core/gui/impl/platform_darwin.go | 2 +- comp/core/gui/impl/platform_darwin_test.go | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/comp/core/gui/impl/platform_darwin.go b/comp/core/gui/impl/platform_darwin.go index b71fef8e0e48..b924c1a4a3dc 100644 --- a/comp/core/gui/impl/platform_darwin.go +++ b/comp/core/gui/impl/platform_darwin.go @@ -30,5 +30,5 @@ func restartEnabled() bool { } func restart() error { - return exec.Command("/bin/launchctl", "kickstart", "-k", "system/com.datadoghq.agent").Start() + return exec.Command("/bin/launchctl", "kickstart", "-k", "system/com.datadoghq.agent").Run() } diff --git a/comp/core/gui/impl/platform_darwin_test.go b/comp/core/gui/impl/platform_darwin_test.go index 83b170ffca96..33ec30b2f5fd 100644 --- a/comp/core/gui/impl/platform_darwin_test.go +++ b/comp/core/gui/impl/platform_darwin_test.go @@ -65,6 +65,11 @@ const expectedBody = `   Flare + +
From 310864ea6bab69a3bd9c2cb6a94c7edab8517f99 Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Fri, 26 Jun 2026 10:05:26 -0400 Subject: [PATCH 04/12] Restart sysprob and agent thorugh sysprob call --- cmd/system-probe/api/agentrestart_darwin.go | 30 +++++++++++++++++++++ cmd/system-probe/api/agentrestart_others.go | 14 ++++++++++ cmd/system-probe/api/server.go | 4 +++ comp/core/gui/impl/platform_darwin.go | 22 +++++++++++++-- 4 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 cmd/system-probe/api/agentrestart_darwin.go create mode 100644 cmd/system-probe/api/agentrestart_others.go diff --git a/cmd/system-probe/api/agentrestart_darwin.go b/cmd/system-probe/api/agentrestart_darwin.go new file mode 100644 index 000000000000..420e460eeaef --- /dev/null +++ b/cmd/system-probe/api/agentrestart_darwin.go @@ -0,0 +1,30 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build darwin + +package api + +import ( + "net/http" + "os/exec" +) + +var agentServices = []string{ + "system/com.datadoghq.agent", + "system/com.datadoghq.sysprobe", +} + +func handleAgentRestart(w http.ResponseWriter, r *http.Request) { + for _, service := range agentServices { + cmd := exec.Command("/bin/launchctl", "kickstart", "-k", service) + out, err := cmd.CombinedOutput() + if err != nil { + http.Error(w, string(out), http.StatusInternalServerError) + return + } + } + w.WriteHeader(http.StatusOK) +} diff --git a/cmd/system-probe/api/agentrestart_others.go b/cmd/system-probe/api/agentrestart_others.go new file mode 100644 index 000000000000..0e213f545563 --- /dev/null +++ b/cmd/system-probe/api/agentrestart_others.go @@ -0,0 +1,14 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build !darwin + +package api + +import "net/http" + +func handleAgentRestart(w http.ResponseWriter, _ *http.Request) { + http.Error(w, "not supported on this platform", http.StatusNotImplemented) +} diff --git a/cmd/system-probe/api/server.go b/cmd/system-probe/api/server.go index f9763dbb4f40..dfb1d0610135 100644 --- a/cmd/system-probe/api/server.go +++ b/cmd/system-probe/api/server.go @@ -64,6 +64,10 @@ func StartServer(cfg *sysconfigtypes.Config, settings settings.Component, rcclie mux.HandleFunc("/debug/selinux_semodule_list", debug.HandleSelinuxSemoduleList) } + if runtime.GOOS == "darwin" { + mux.HandleFunc("POST /agent-restart", handleAgentRestart) + } + // Register /coverage endpoint for computing code coverage (e2ecoverage build only). coverage.SetupCoverageHandler(mux) diff --git a/comp/core/gui/impl/platform_darwin.go b/comp/core/gui/impl/platform_darwin.go index b924c1a4a3dc..7de938698eb8 100644 --- a/comp/core/gui/impl/platform_darwin.go +++ b/comp/core/gui/impl/platform_darwin.go @@ -6,8 +6,12 @@ package guiimpl import ( - "os/exec" + "fmt" + "io" + "net/http" + sysprobeclient "github.com/DataDog/datadog-agent/pkg/system-probe/api/client" + "github.com/DataDog/datadog-agent/pkg/util/defaultpaths" template "github.com/DataDog/datadog-agent/pkg/template/html" ) @@ -30,5 +34,19 @@ func restartEnabled() bool { } func restart() error { - return exec.Command("/bin/launchctl", "kickstart", "-k", "system/com.datadoghq.agent").Run() + socketPath := defaultpaths.GetDefaultSystemProbeAddress() + client := sysprobeclient.Get(socketPath) + + url := sysprobeclient.URL("/agent-restart") + resp, err := client.Post(url, "", nil) + if err != nil { + return fmt.Errorf("could not reach system-probe: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("system-probe agent restart failed: %s", string(body)) + } + return nil } From b4cbda8a29496730d3af73eafccdfefdc4479ba5 Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Fri, 26 Jun 2026 11:27:52 -0400 Subject: [PATCH 05/12] Apply code review --- cmd/system-probe/api/agentrestart_darwin.go | 32 ++++++++++++++------- cmd/system-probe/api/server.go | 4 +-- comp/core/gui/impl/gui.go | 8 ++++++ comp/core/gui/impl/platform_darwin.go | 24 +++++++++++++--- 4 files changed, 50 insertions(+), 18 deletions(-) diff --git a/cmd/system-probe/api/agentrestart_darwin.go b/cmd/system-probe/api/agentrestart_darwin.go index 420e460eeaef..db62a33f9074 100644 --- a/cmd/system-probe/api/agentrestart_darwin.go +++ b/cmd/system-probe/api/agentrestart_darwin.go @@ -8,23 +8,33 @@ package api import ( + "fmt" "net/http" "os/exec" + "time" ) -var agentServices = []string{ - "system/com.datadoghq.agent", - "system/com.datadoghq.sysprobe", +func kickstart(service string) error { + cmd := exec.Command("/bin/launchctl", "kickstart", "-k", service) + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("%s", string(out)) + } + return nil } func handleAgentRestart(w http.ResponseWriter, r *http.Request) { - for _, service := range agentServices { - cmd := exec.Command("/bin/launchctl", "kickstart", "-k", service) - out, err := cmd.CombinedOutput() - if err != nil { - http.Error(w, string(out), http.StatusInternalServerError) - return - } - } + // Reply 200 immediately so the client receives the response before launchd + // tears down this process when sysprobe is restarted. w.WriteHeader(http.StatusOK) + if f, ok := w.(http.Flusher); ok { + f.Flush() + } + + // Restart both services after a short delay so the HTTP response has time + // to be delivered before launchd sends SIGTERM to this process. + time.AfterFunc(100*time.Millisecond, func() { + _ = kickstart("system/com.datadoghq.agent") + _ = kickstart("system/com.datadoghq.sysprobe") + }) } diff --git a/cmd/system-probe/api/server.go b/cmd/system-probe/api/server.go index dfb1d0610135..ec0eca2026d8 100644 --- a/cmd/system-probe/api/server.go +++ b/cmd/system-probe/api/server.go @@ -64,9 +64,7 @@ func StartServer(cfg *sysconfigtypes.Config, settings settings.Component, rcclie mux.HandleFunc("/debug/selinux_semodule_list", debug.HandleSelinuxSemoduleList) } - if runtime.GOOS == "darwin" { - mux.HandleFunc("POST /agent-restart", handleAgentRestart) - } + mux.Handle("POST /agent-restart", deps.Ipc.HTTPMiddleware(http.HandlerFunc(handleAgentRestart))) // Register /coverage endpoint for computing code coverage (e2ecoverage build only). coverage.SetupCoverageHandler(mux) diff --git a/comp/core/gui/impl/gui.go b/comp/core/gui/impl/gui.go index 27ca5ece63cc..5534f787fffc 100644 --- a/comp/core/gui/impl/gui.go +++ b/comp/core/gui/impl/gui.go @@ -29,6 +29,7 @@ import ( "github.com/DataDog/datadog-agent/comp/core/flare" guidef "github.com/DataDog/datadog-agent/comp/core/gui/def" "github.com/DataDog/datadog-agent/comp/core/hostname/hostnameinterface/def" + ipc "github.com/DataDog/datadog-agent/comp/core/ipc/def" log "github.com/DataDog/datadog-agent/comp/core/log/def" "github.com/DataDog/datadog-agent/comp/core/status" compdef "github.com/DataDog/datadog-agent/comp/def" @@ -74,6 +75,7 @@ type Requires struct { Status status.Component Lc compdef.Lifecycle Hostname hostnameinterface.Component + Ipc ipc.Component } // Provides defines the output of the gui component. @@ -120,6 +122,12 @@ func NewComponent(deps Requires) Provides { sessionExpiration := deps.Config.GetDuration("GUI_session_expiration") g.auth = newAuthenticator(authToken, sessionExpiration) + setRestartAuthToken(deps.Ipc.GetAuthToken()) + socketPath := deps.Config.GetString("system_probe_config.sysprobe_socket") + if socketPath == "" { + socketPath = defaultpaths.GetDefaultSystemProbeAddress() + } + setSysprobeSocketPath(socketPath) // register the public routes publicRouter.HandleFunc("GET /{$}", renderIndexPage) diff --git a/comp/core/gui/impl/platform_darwin.go b/comp/core/gui/impl/platform_darwin.go index 7de938698eb8..92a38e8579da 100644 --- a/comp/core/gui/impl/platform_darwin.go +++ b/comp/core/gui/impl/platform_darwin.go @@ -11,7 +11,6 @@ import ( "net/http" sysprobeclient "github.com/DataDog/datadog-agent/pkg/system-probe/api/client" - "github.com/DataDog/datadog-agent/pkg/util/defaultpaths" template "github.com/DataDog/datadog-agent/pkg/template/html" ) @@ -29,16 +28,33 @@ const instructionTemplate = `{{define "loginInstruction" }}

Note: If you would like to adjust the GUI session timeout, you can modify the GUI_session_expiration parameter in datadog.yaml {{end}}` +// restartAuthToken and sysprobeSocketPath are set once at GUI component startup. +var restartAuthToken string +var sysprobeSocketPath string + +func setRestartAuthToken(token string) { + restartAuthToken = token +} + +func setSysprobeSocketPath(path string) { + sysprobeSocketPath = path +} + func restartEnabled() bool { return true } func restart() error { - socketPath := defaultpaths.GetDefaultSystemProbeAddress() - client := sysprobeclient.Get(socketPath) + client := sysprobeclient.Get(sysprobeSocketPath) url := sysprobeclient.URL("/agent-restart") - resp, err := client.Post(url, "", nil) + req, err := http.NewRequest(http.MethodPost, url, nil) + if err != nil { + return fmt.Errorf("could not build restart request: %w", err) + } + req.Header.Set("Authorization", "Bearer "+restartAuthToken) + + resp, err := client.Do(req) if err != nil { return fmt.Errorf("could not reach system-probe: %w", err) } From b11bcff94a45761acef4dc7f371584c3b619b5ed Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Fri, 26 Jun 2026 13:51:31 -0400 Subject: [PATCH 06/12] Add Tests --- cmd/system-probe/api/agentrestart_darwin.go | 2 +- .../api/agentrestart_darwin_test.go | 89 ++++++++++++++ .../api/agentrestart_others_test.go | 26 +++++ comp/core/gui/impl/platform_darwin_test.go | 109 ++++++++++++++++++ 4 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 cmd/system-probe/api/agentrestart_darwin_test.go create mode 100644 cmd/system-probe/api/agentrestart_others_test.go diff --git a/cmd/system-probe/api/agentrestart_darwin.go b/cmd/system-probe/api/agentrestart_darwin.go index db62a33f9074..d7ae0cee37e7 100644 --- a/cmd/system-probe/api/agentrestart_darwin.go +++ b/cmd/system-probe/api/agentrestart_darwin.go @@ -14,7 +14,7 @@ import ( "time" ) -func kickstart(service string) error { +var kickstart = func(service string) error { cmd := exec.Command("/bin/launchctl", "kickstart", "-k", service) out, err := cmd.CombinedOutput() if err != nil { diff --git a/cmd/system-probe/api/agentrestart_darwin_test.go b/cmd/system-probe/api/agentrestart_darwin_test.go new file mode 100644 index 000000000000..ef3b083e7054 --- /dev/null +++ b/cmd/system-probe/api/agentrestart_darwin_test.go @@ -0,0 +1,89 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build darwin + +package api + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func withMockKickstart(t *testing.T, mock func(string) error) { + t.Helper() + orig := kickstart + kickstart = mock + t.Cleanup(func() { kickstart = orig }) +} + +func TestHandleAgentRestart_Returns200Immediately(t *testing.T) { + withMockKickstart(t, func(string) error { return nil }) + + req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil) + rr := httptest.NewRecorder() + + handleAgentRestart(rr, req) + + assert.Equal(t, http.StatusOK, rr.Code) +} + +func TestHandleAgentRestart_KickstartsServicesAsynchronously(t *testing.T) { + var called []string + done := make(chan struct{}) + + withMockKickstart(t, func(svc string) error { + called = append(called, svc) + if len(called) == 2 { + close(done) + } + return nil + }) + + req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil) + rr := httptest.NewRecorder() + + handleAgentRestart(rr, req) + + // Response must be 200 before the goroutine fires. + assert.Equal(t, http.StatusOK, rr.Code) + + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("kickstart was not called within timeout") + } + + assert.Equal(t, []string{"system/com.datadoghq.agent", "system/com.datadoghq.sysprobe"}, called) +} + +func TestHandleAgentRestart_KickstartsAgentBeforeSysprobe(t *testing.T) { + var order []string + done := make(chan struct{}) + + withMockKickstart(t, func(svc string) error { + order = append(order, svc) + if len(order) == 2 { + close(done) + } + return nil + }) + + req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil) + handleAgentRestart(httptest.NewRecorder(), req) + + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("kickstart was not called within timeout") + } + + assert.Equal(t, "system/com.datadoghq.agent", order[0], "agent should be restarted before sysprobe") + assert.Equal(t, "system/com.datadoghq.sysprobe", order[1]) +} diff --git a/cmd/system-probe/api/agentrestart_others_test.go b/cmd/system-probe/api/agentrestart_others_test.go new file mode 100644 index 000000000000..e691dfb48c89 --- /dev/null +++ b/cmd/system-probe/api/agentrestart_others_test.go @@ -0,0 +1,26 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build !darwin + +package api + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestHandleAgentRestart_NotSupportedOnNonDarwin(t *testing.T) { + req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil) + rr := httptest.NewRecorder() + + handleAgentRestart(rr, req) + + assert.Equal(t, http.StatusNotImplemented, rr.Code) + assert.Contains(t, rr.Body.String(), "not supported on this platform") +} diff --git a/comp/core/gui/impl/platform_darwin_test.go b/comp/core/gui/impl/platform_darwin_test.go index 33ec30b2f5fd..00ae195c8046 100644 --- a/comp/core/gui/impl/platform_darwin_test.go +++ b/comp/core/gui/impl/platform_darwin_test.go @@ -6,12 +6,16 @@ package guiimpl import ( + "fmt" "io" + "net" "net/http" "net/http/httptest" + "os" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) const expectedBody = ` @@ -133,6 +137,111 @@ const expectedBody = ` ` +// startUnixServer starts an HTTP server on a temp Unix socket and returns its path. +// Uses os.CreateTemp under /tmp to stay within the 108-char Unix socket path limit on macOS. +func startUnixServer(t *testing.T, handler http.Handler) string { + t.Helper() + f, err := os.CreateTemp("", "gui-test-*.sock") + require.NoError(t, err) + socketPath := f.Name() + f.Close() + os.Remove(socketPath) + t.Cleanup(func() { os.Remove(socketPath) }) + + l, err := net.Listen("unix", socketPath) + require.NoError(t, err) + srv := &http.Server{Handler: handler} + go srv.Serve(l) //nolint:errcheck + t.Cleanup(func() { srv.Close() }) + return socketPath +} + +func TestRestartEnabled(t *testing.T) { + assert.True(t, restartEnabled()) +} + +func TestSetRestartAuthToken(t *testing.T) { + orig := restartAuthToken + t.Cleanup(func() { restartAuthToken = orig }) + + setRestartAuthToken("test-token") + assert.Equal(t, "test-token", restartAuthToken) +} + +func TestSetSysprobeSocketPath(t *testing.T) { + orig := sysprobeSocketPath + t.Cleanup(func() { sysprobeSocketPath = orig }) + + setSysprobeSocketPath("/tmp/test.sock") + assert.Equal(t, "/tmp/test.sock", sysprobeSocketPath) +} + +func TestRestart_Success(t *testing.T) { + socketPath := startUnixServer(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodPost, r.Method) + assert.Equal(t, "/agent-restart", r.URL.Path) + assert.Equal(t, "Bearer test-token", r.Header.Get("Authorization")) + w.WriteHeader(http.StatusOK) + })) + + origSocket := sysprobeSocketPath + origToken := restartAuthToken + t.Cleanup(func() { + sysprobeSocketPath = origSocket + restartAuthToken = origToken + }) + setSysprobeSocketPath(socketPath) + setRestartAuthToken("test-token") + + err := restart() + assert.NoError(t, err) +} + +func TestRestart_SysprobeUnreachable(t *testing.T) { + orig := sysprobeSocketPath + t.Cleanup(func() { sysprobeSocketPath = orig }) + setSysprobeSocketPath("/tmp/gui-test-nonexistent.sock") + + err := restart() + require.Error(t, err) + assert.Contains(t, err.Error(), "could not reach system-probe") +} + +func TestRestart_SysprobeReturnsError(t *testing.T) { + socketPath := startUnixServer(t, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + http.Error(w, "launchctl failed", http.StatusInternalServerError) + })) + + orig := sysprobeSocketPath + t.Cleanup(func() { sysprobeSocketPath = orig }) + setSysprobeSocketPath(socketPath) + + err := restart() + require.Error(t, err) + assert.Contains(t, err.Error(), "system-probe agent restart failed") + assert.Contains(t, err.Error(), "launchctl failed") +} + +func TestRestart_SendsAuthorizationHeader(t *testing.T) { + var receivedAuth string + socketPath := startUnixServer(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedAuth = r.Header.Get("Authorization") + w.WriteHeader(http.StatusOK) + })) + + origSocket := sysprobeSocketPath + origToken := restartAuthToken + t.Cleanup(func() { + sysprobeSocketPath = origSocket + restartAuthToken = origToken + }) + setSysprobeSocketPath(socketPath) + setRestartAuthToken("secret-ipc-token") + + require.NoError(t, restart()) + assert.Equal(t, fmt.Sprintf("Bearer %s", "secret-ipc-token"), receivedAuth) +} + func TestRenderIndexPage(t *testing.T) { req, err := http.NewRequest("GET", "/", nil) if err != nil { From 3a312eeac8a1632ab7c0c99df59c8849671f13db Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Fri, 26 Jun 2026 14:31:59 -0400 Subject: [PATCH 07/12] Add Tests and Fix code review --- .../api/agentrestart_darwin_test.go | 38 +++++-------------- comp/core/gui/impl/platform_nix.go | 3 ++ comp/core/gui/impl/platform_windows.go | 3 ++ 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/cmd/system-probe/api/agentrestart_darwin_test.go b/cmd/system-probe/api/agentrestart_darwin_test.go index ef3b083e7054..43856916a006 100644 --- a/cmd/system-probe/api/agentrestart_darwin_test.go +++ b/cmd/system-probe/api/agentrestart_darwin_test.go @@ -34,13 +34,20 @@ func TestHandleAgentRestart_Returns200Immediately(t *testing.T) { assert.Equal(t, http.StatusOK, rr.Code) } -func TestHandleAgentRestart_KickstartsServicesAsynchronously(t *testing.T) { +func TestHandleAgentRestart_ServiceRestartSequence(t *testing.T) { + // expectedServices defines the exact order in which launchd services must be restarted. + // Agent must come before sysprobe because restarting sysprobe sends SIGTERM to this process. + expectedServices := []string{ + "system/com.datadoghq.agent", + "system/com.datadoghq.sysprobe", + } + var called []string done := make(chan struct{}) withMockKickstart(t, func(svc string) error { called = append(called, svc) - if len(called) == 2 { + if len(called) == len(expectedServices) { close(done) } return nil @@ -60,30 +67,5 @@ func TestHandleAgentRestart_KickstartsServicesAsynchronously(t *testing.T) { t.Fatal("kickstart was not called within timeout") } - assert.Equal(t, []string{"system/com.datadoghq.agent", "system/com.datadoghq.sysprobe"}, called) -} - -func TestHandleAgentRestart_KickstartsAgentBeforeSysprobe(t *testing.T) { - var order []string - done := make(chan struct{}) - - withMockKickstart(t, func(svc string) error { - order = append(order, svc) - if len(order) == 2 { - close(done) - } - return nil - }) - - req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil) - handleAgentRestart(httptest.NewRecorder(), req) - - select { - case <-done: - case <-time.After(2 * time.Second): - t.Fatal("kickstart was not called within timeout") - } - - assert.Equal(t, "system/com.datadoghq.agent", order[0], "agent should be restarted before sysprobe") - assert.Equal(t, "system/com.datadoghq.sysprobe", order[1]) + assert.Equal(t, expectedServices, called) } diff --git a/comp/core/gui/impl/platform_nix.go b/comp/core/gui/impl/platform_nix.go index 0f93fa02b077..51d08296a9e0 100644 --- a/comp/core/gui/impl/platform_nix.go +++ b/comp/core/gui/impl/platform_nix.go @@ -22,6 +22,9 @@ const instructionTemplate = `{{define "loginInstruction" }}

Note: If you would like to adjust the GUI session timeout, you can modify the GUI_session_expiration parameter in datadog.yaml {{end}}` +func setRestartAuthToken(_ string) {} +func setSysprobeSocketPath(_ string) {} + func restartEnabled() bool { return false } diff --git a/comp/core/gui/impl/platform_windows.go b/comp/core/gui/impl/platform_windows.go index 4c4165c952d5..be1d7040e74e 100644 --- a/comp/core/gui/impl/platform_windows.go +++ b/comp/core/gui/impl/platform_windows.go @@ -33,6 +33,9 @@ const instructionTemplate = `{{define "loginInstruction" }}

Note: If you would like to adjust the GUI session timeout, you can modify the GUI_session_expiration parameter in datadog.yaml {{end}}` +func setRestartAuthToken(_ string) {} +func setSysprobeSocketPath(_ string) {} + func restartEnabled() bool { return true } From f5a405974d8379a251859ab8d55fcaa61d60468f Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Fri, 26 Jun 2026 15:20:40 -0400 Subject: [PATCH 08/12] Print log when the service fail to restart --- cmd/system-probe/api/agentrestart_darwin.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cmd/system-probe/api/agentrestart_darwin.go b/cmd/system-probe/api/agentrestart_darwin.go index d7ae0cee37e7..8b9dcb98796a 100644 --- a/cmd/system-probe/api/agentrestart_darwin.go +++ b/cmd/system-probe/api/agentrestart_darwin.go @@ -12,6 +12,8 @@ import ( "net/http" "os/exec" "time" + + "github.com/DataDog/datadog-agent/pkg/util/log" ) var kickstart = func(service string) error { @@ -34,7 +36,11 @@ func handleAgentRestart(w http.ResponseWriter, r *http.Request) { // Restart both services after a short delay so the HTTP response has time // to be delivered before launchd sends SIGTERM to this process. time.AfterFunc(100*time.Millisecond, func() { - _ = kickstart("system/com.datadoghq.agent") - _ = kickstart("system/com.datadoghq.sysprobe") + if err := kickstart("system/com.datadoghq.agent"); err != nil { + log.Errorf("agent-restart: failed to restart com.datadoghq.agent: %v", err) + } + if err := kickstart("system/com.datadoghq.sysprobe"); err != nil { + log.Errorf("agent-restart: failed to restart com.datadoghq.sysprobe: %v", err) + } }) } From 5f8690060f1ac1fb5e9e83f68d02e0d024080718 Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Fri, 26 Jun 2026 15:27:46 -0400 Subject: [PATCH 09/12] Fix code review --- cmd/system-probe/api/agentrestart_darwin.go | 4 ++- .../api/agentrestart_darwin_test.go | 26 +++++++++---------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/cmd/system-probe/api/agentrestart_darwin.go b/cmd/system-probe/api/agentrestart_darwin.go index 8b9dcb98796a..deace920499b 100644 --- a/cmd/system-probe/api/agentrestart_darwin.go +++ b/cmd/system-probe/api/agentrestart_darwin.go @@ -16,6 +16,8 @@ import ( "github.com/DataDog/datadog-agent/pkg/util/log" ) +var afterFunc = time.AfterFunc + var kickstart = func(service string) error { cmd := exec.Command("/bin/launchctl", "kickstart", "-k", service) out, err := cmd.CombinedOutput() @@ -35,7 +37,7 @@ func handleAgentRestart(w http.ResponseWriter, r *http.Request) { // Restart both services after a short delay so the HTTP response has time // to be delivered before launchd sends SIGTERM to this process. - time.AfterFunc(100*time.Millisecond, func() { + afterFunc(100*time.Millisecond, func() { if err := kickstart("system/com.datadoghq.agent"); err != nil { log.Errorf("agent-restart: failed to restart com.datadoghq.agent: %v", err) } diff --git a/cmd/system-probe/api/agentrestart_darwin_test.go b/cmd/system-probe/api/agentrestart_darwin_test.go index 43856916a006..5d8f5c84a0bc 100644 --- a/cmd/system-probe/api/agentrestart_darwin_test.go +++ b/cmd/system-probe/api/agentrestart_darwin_test.go @@ -23,7 +23,18 @@ func withMockKickstart(t *testing.T, mock func(string) error) { t.Cleanup(func() { kickstart = orig }) } +// withSyncAfterFunc replaces the timer so the callback runs synchronously inside +// handleAgentRestart, before the function returns. This prevents the real kickstart +// from being restored by t.Cleanup before the timer fires. +func withSyncAfterFunc(t *testing.T) { + t.Helper() + orig := afterFunc + afterFunc = func(_ time.Duration, f func()) *time.Timer { f(); return nil } + t.Cleanup(func() { afterFunc = orig }) +} + func TestHandleAgentRestart_Returns200Immediately(t *testing.T) { + withSyncAfterFunc(t) withMockKickstart(t, func(string) error { return nil }) req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil) @@ -42,14 +53,11 @@ func TestHandleAgentRestart_ServiceRestartSequence(t *testing.T) { "system/com.datadoghq.sysprobe", } - var called []string - done := make(chan struct{}) + withSyncAfterFunc(t) + var called []string withMockKickstart(t, func(svc string) error { called = append(called, svc) - if len(called) == len(expectedServices) { - close(done) - } return nil }) @@ -58,14 +66,6 @@ func TestHandleAgentRestart_ServiceRestartSequence(t *testing.T) { handleAgentRestart(rr, req) - // Response must be 200 before the goroutine fires. assert.Equal(t, http.StatusOK, rr.Code) - - select { - case <-done: - case <-time.After(2 * time.Second): - t.Fatal("kickstart was not called within timeout") - } - assert.Equal(t, expectedServices, called) } From 405e19fb860ea8fc6b568541691f27be7da77c84 Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Fri, 26 Jun 2026 15:55:41 -0400 Subject: [PATCH 10/12] Fix CRSF Vulnerability --- comp/core/gui/impl/gui.go | 12 ++++ comp/core/gui/impl/gui_csrf_test.go | 102 ++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 comp/core/gui/impl/gui_csrf_test.go diff --git a/comp/core/gui/impl/gui.go b/comp/core/gui/impl/gui.go index 5534f787fffc..b8eebacaa43e 100644 --- a/comp/core/gui/impl/gui.go +++ b/comp/core/gui/impl/gui.go @@ -289,6 +289,7 @@ func (g *gui) getAccessToken(w http.ResponseWriter, r *http.Request) { Value: accessToken, Path: "/", HttpOnly: true, + SameSite: http.SameSiteStrictMode, MaxAge: 31536000, // 1 year }) http.Redirect(w, r, "/", http.StatusFound) @@ -300,6 +301,17 @@ func (g *gui) authMiddleware(next http.Handler) http.Handler { // Disable caching w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate") + // For state-changing requests, reject any cross-origin Origin header to prevent CSRF. + // Same-origin requests from the GUI itself either omit Origin or match the server address. + if r.Method != http.MethodGet && r.Method != http.MethodHead { + if origin := r.Header.Get("Origin"); origin != "" { + if origin != "http://"+g.address { + http.Error(w, "invalid origin", http.StatusForbidden) + return + } + } + } + cookie, _ := r.Cookie("accessToken") if cookie == nil { http.Error(w, "missing accessToken", http.StatusUnauthorized) diff --git a/comp/core/gui/impl/gui_csrf_test.go b/comp/core/gui/impl/gui_csrf_test.go new file mode 100644 index 000000000000..d79de5668824 --- /dev/null +++ b/comp/core/gui/impl/gui_csrf_test.go @@ -0,0 +1,102 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package guiimpl + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newTestGUI(t *testing.T) *gui { + t.Helper() + return &gui{ + address: "localhost:5002", + auth: newAuthenticator("test-secret", 5*time.Minute), + intentTokens: make(map[string]bool), + } +} + +func TestGetAccessToken_CookieHasSameSiteStrict(t *testing.T) { + g := newTestGUI(t) + g.intentTokens["test-intent"] = true + + req := httptest.NewRequest(http.MethodGet, "/auth?intent=test-intent", nil) + rr := httptest.NewRecorder() + + g.getAccessToken(rr, req) + + var accessCookie *http.Cookie + for _, c := range rr.Result().Cookies() { + if c.Name == "accessToken" { + accessCookie = c + break + } + } + require.NotNil(t, accessCookie, "accessToken cookie must be set") + assert.Equal(t, http.SameSiteStrictMode, accessCookie.SameSite) + assert.True(t, accessCookie.HttpOnly) +} + +func TestAuthMiddleware_OriginCheck(t *testing.T) { + g := newTestGUI(t) + token := g.auth.GenerateAccessToken() + + okHandler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + }) + + tests := []struct { + name string + method string + origin string + expectedStatus int + }{ + { + name: "POST without Origin is allowed (same-origin browser request)", + method: http.MethodPost, + origin: "", + expectedStatus: http.StatusOK, + }, + { + name: "POST with matching Origin is allowed", + method: http.MethodPost, + origin: "http://localhost:5002", + expectedStatus: http.StatusOK, + }, + { + name: "POST with cross-origin Origin is rejected", + method: http.MethodPost, + origin: "http://evil.com", + expectedStatus: http.StatusForbidden, + }, + { + name: "GET with cross-origin Origin is allowed (safe method)", + method: http.MethodGet, + origin: "http://evil.com", + expectedStatus: http.StatusOK, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := httptest.NewRequest(tt.method, "/agent/restart", nil) + req.AddCookie(&http.Cookie{Name: "accessToken", Value: token}) + if tt.origin != "" { + req.Header.Set("Origin", tt.origin) + } + + rr := httptest.NewRecorder() + g.authMiddleware(okHandler).ServeHTTP(rr, req) + + assert.Equal(t, tt.expectedStatus, rr.Code) + }) + } +} From 910ee19858cd72f13f9990f1b88e1ec6268da837 Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Fri, 26 Jun 2026 16:00:27 -0400 Subject: [PATCH 11/12] Fix Bear Token --- comp/core/gui/impl/gui.go | 2 +- comp/core/gui/impl/platform_darwin.go | 12 ++++--- comp/core/gui/impl/platform_darwin_test.go | 40 ++++++++++++++-------- comp/core/gui/impl/platform_nix.go | 4 +-- comp/core/gui/impl/platform_windows.go | 4 +-- 5 files changed, 37 insertions(+), 25 deletions(-) diff --git a/comp/core/gui/impl/gui.go b/comp/core/gui/impl/gui.go index b8eebacaa43e..d0b405e9d0fd 100644 --- a/comp/core/gui/impl/gui.go +++ b/comp/core/gui/impl/gui.go @@ -122,7 +122,7 @@ func NewComponent(deps Requires) Provides { sessionExpiration := deps.Config.GetDuration("GUI_session_expiration") g.auth = newAuthenticator(authToken, sessionExpiration) - setRestartAuthToken(deps.Ipc.GetAuthToken()) + setGetAuthToken(deps.Ipc.GetAuthToken) socketPath := deps.Config.GetString("system_probe_config.sysprobe_socket") if socketPath == "" { socketPath = defaultpaths.GetDefaultSystemProbeAddress() diff --git a/comp/core/gui/impl/platform_darwin.go b/comp/core/gui/impl/platform_darwin.go index 92a38e8579da..b43df36d2e1f 100644 --- a/comp/core/gui/impl/platform_darwin.go +++ b/comp/core/gui/impl/platform_darwin.go @@ -28,12 +28,14 @@ const instructionTemplate = `{{define "loginInstruction" }}

Note: If you would like to adjust the GUI session timeout, you can modify the GUI_session_expiration parameter in datadog.yaml {{end}}` -// restartAuthToken and sysprobeSocketPath are set once at GUI component startup. -var restartAuthToken string +// getAuthToken is a function that fetches the IPC auth token on each call, +// avoiding storage of the credential as a long-lived global. +// sysprobeSocketPath holds the Unix socket path, set once at startup. +var getAuthToken func() string var sysprobeSocketPath string -func setRestartAuthToken(token string) { - restartAuthToken = token +func setGetAuthToken(f func() string) { + getAuthToken = f } func setSysprobeSocketPath(path string) { @@ -52,7 +54,7 @@ func restart() error { if err != nil { return fmt.Errorf("could not build restart request: %w", err) } - req.Header.Set("Authorization", "Bearer "+restartAuthToken) + req.Header.Set("Authorization", "Bearer "+getAuthToken()) resp, err := client.Do(req) if err != nil { diff --git a/comp/core/gui/impl/platform_darwin_test.go b/comp/core/gui/impl/platform_darwin_test.go index 00ae195c8046..87792424f141 100644 --- a/comp/core/gui/impl/platform_darwin_test.go +++ b/comp/core/gui/impl/platform_darwin_test.go @@ -160,12 +160,12 @@ func TestRestartEnabled(t *testing.T) { assert.True(t, restartEnabled()) } -func TestSetRestartAuthToken(t *testing.T) { - orig := restartAuthToken - t.Cleanup(func() { restartAuthToken = orig }) +func TestSetGetAuthToken(t *testing.T) { + orig := getAuthToken + t.Cleanup(func() { getAuthToken = orig }) - setRestartAuthToken("test-token") - assert.Equal(t, "test-token", restartAuthToken) + setGetAuthToken(func() string { return "test-token" }) + assert.Equal(t, "test-token", getAuthToken()) } func TestSetSysprobeSocketPath(t *testing.T) { @@ -185,22 +185,27 @@ func TestRestart_Success(t *testing.T) { })) origSocket := sysprobeSocketPath - origToken := restartAuthToken + origToken := getAuthToken t.Cleanup(func() { sysprobeSocketPath = origSocket - restartAuthToken = origToken + getAuthToken = origToken }) setSysprobeSocketPath(socketPath) - setRestartAuthToken("test-token") + setGetAuthToken(func() string { return "test-token" }) err := restart() assert.NoError(t, err) } func TestRestart_SysprobeUnreachable(t *testing.T) { - orig := sysprobeSocketPath - t.Cleanup(func() { sysprobeSocketPath = orig }) + origSocket := sysprobeSocketPath + origToken := getAuthToken + t.Cleanup(func() { + sysprobeSocketPath = origSocket + getAuthToken = origToken + }) setSysprobeSocketPath("/tmp/gui-test-nonexistent.sock") + setGetAuthToken(func() string { return "token" }) err := restart() require.Error(t, err) @@ -212,9 +217,14 @@ func TestRestart_SysprobeReturnsError(t *testing.T) { http.Error(w, "launchctl failed", http.StatusInternalServerError) })) - orig := sysprobeSocketPath - t.Cleanup(func() { sysprobeSocketPath = orig }) + origSocket := sysprobeSocketPath + origToken := getAuthToken + t.Cleanup(func() { + sysprobeSocketPath = origSocket + getAuthToken = origToken + }) setSysprobeSocketPath(socketPath) + setGetAuthToken(func() string { return "token" }) err := restart() require.Error(t, err) @@ -230,13 +240,13 @@ func TestRestart_SendsAuthorizationHeader(t *testing.T) { })) origSocket := sysprobeSocketPath - origToken := restartAuthToken + origToken := getAuthToken t.Cleanup(func() { sysprobeSocketPath = origSocket - restartAuthToken = origToken + getAuthToken = origToken }) setSysprobeSocketPath(socketPath) - setRestartAuthToken("secret-ipc-token") + setGetAuthToken(func() string { return "secret-ipc-token" }) require.NoError(t, restart()) assert.Equal(t, fmt.Sprintf("Bearer %s", "secret-ipc-token"), receivedAuth) diff --git a/comp/core/gui/impl/platform_nix.go b/comp/core/gui/impl/platform_nix.go index 51d08296a9e0..c499cf9604d4 100644 --- a/comp/core/gui/impl/platform_nix.go +++ b/comp/core/gui/impl/platform_nix.go @@ -22,8 +22,8 @@ const instructionTemplate = `{{define "loginInstruction" }}

Note: If you would like to adjust the GUI session timeout, you can modify the GUI_session_expiration parameter in datadog.yaml {{end}}` -func setRestartAuthToken(_ string) {} -func setSysprobeSocketPath(_ string) {} +func setGetAuthToken(_ func() string) {} +func setSysprobeSocketPath(_ string) {} func restartEnabled() bool { return false diff --git a/comp/core/gui/impl/platform_windows.go b/comp/core/gui/impl/platform_windows.go index be1d7040e74e..8c1b44d6f1f2 100644 --- a/comp/core/gui/impl/platform_windows.go +++ b/comp/core/gui/impl/platform_windows.go @@ -33,8 +33,8 @@ const instructionTemplate = `{{define "loginInstruction" }}

Note: If you would like to adjust the GUI session timeout, you can modify the GUI_session_expiration parameter in datadog.yaml {{end}}` -func setRestartAuthToken(_ string) {} -func setSysprobeSocketPath(_ string) {} +func setGetAuthToken(_ func() string) {} +func setSysprobeSocketPath(_ string) {} func restartEnabled() bool { return true From be3f3085787e1e98ff1c93736f47ff62c7ec8d13 Mon Sep 17 00:00:00 2001 From: "pedro.cordeiro" Date: Fri, 26 Jun 2026 16:02:35 -0400 Subject: [PATCH 12/12] Fix response body --- comp/core/gui/impl/platform_darwin.go | 4 +--- comp/core/gui/impl/platform_darwin_test.go | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/comp/core/gui/impl/platform_darwin.go b/comp/core/gui/impl/platform_darwin.go index b43df36d2e1f..d381a920eefc 100644 --- a/comp/core/gui/impl/platform_darwin.go +++ b/comp/core/gui/impl/platform_darwin.go @@ -7,7 +7,6 @@ package guiimpl import ( "fmt" - "io" "net/http" sysprobeclient "github.com/DataDog/datadog-agent/pkg/system-probe/api/client" @@ -63,8 +62,7 @@ func restart() error { defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - body, _ := io.ReadAll(resp.Body) - return fmt.Errorf("system-probe agent restart failed: %s", string(body)) + return fmt.Errorf("system-probe agent restart failed with status %d; see system-probe logs for details", resp.StatusCode) } return nil } diff --git a/comp/core/gui/impl/platform_darwin_test.go b/comp/core/gui/impl/platform_darwin_test.go index 87792424f141..da57e487daaf 100644 --- a/comp/core/gui/impl/platform_darwin_test.go +++ b/comp/core/gui/impl/platform_darwin_test.go @@ -228,8 +228,7 @@ func TestRestart_SysprobeReturnsError(t *testing.T) { err := restart() require.Error(t, err) - assert.Contains(t, err.Error(), "system-probe agent restart failed") - assert.Contains(t, err.Error(), "launchctl failed") + assert.Contains(t, err.Error(), "system-probe agent restart failed with status 500") } func TestRestart_SendsAuthorizationHeader(t *testing.T) {