Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions cmd/system-probe/api/agentrestart_darwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

//go:build darwin

package api

import (
"fmt"
"net/http"
"os/exec"
"time"

"github.com/DataDog/datadog-agent/pkg/util/log"
)

var afterFunc = time.AfterFunc

var kickstart = func(service string) error {
cmd := exec.Command("/bin/launchctl", "kickstart", "-k", service)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("%s", string(out))
}
return nil
}

func handleAgentRestart(w http.ResponseWriter, r *http.Request) {
// Reply 200 immediately so the client receives the response before launchd
// tears down this process when sysprobe is restarted.
w.WriteHeader(http.StatusOK)
if f, ok := w.(http.Flusher); ok {
f.Flush()
Comment on lines +33 to +35

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Report launchctl failures before returning success

When launchctl kickstart fails, for example because the service label is not bootstrapped, this sends and flushes a 200 before either restart is attempted; the later failures are only logged. The GUI treats the 200 response from /agent-restart as Success, so users can be told the restart worked even though neither service was restarted. Please perform the agent restart before the OK response, and only defer the self-terminating sysprobe restart, or otherwise surface failures before returning success.

Useful? React with 👍 / 👎.

}

// Restart both services after a short delay so the HTTP response has time
// to be delivered before launchd sends SIGTERM to this process.
afterFunc(100*time.Millisecond, func() {
if err := kickstart("system/com.datadoghq.agent"); err != nil {
log.Errorf("agent-restart: failed to restart com.datadoghq.agent: %v", err)
}
if err := kickstart("system/com.datadoghq.sysprobe"); err != nil {
log.Errorf("agent-restart: failed to restart com.datadoghq.sysprobe: %v", err)
}
})
}
71 changes: 71 additions & 0 deletions cmd/system-probe/api/agentrestart_darwin_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

//go:build darwin

package api

import (
"net/http"
"net/http/httptest"
"testing"
"time"

"github.com/stretchr/testify/assert"
)

func withMockKickstart(t *testing.T, mock func(string) error) {
t.Helper()
orig := kickstart
kickstart = mock
t.Cleanup(func() { kickstart = orig })
}

// withSyncAfterFunc replaces the timer so the callback runs synchronously inside
// handleAgentRestart, before the function returns. This prevents the real kickstart
// from being restored by t.Cleanup before the timer fires.
func withSyncAfterFunc(t *testing.T) {
t.Helper()
orig := afterFunc
afterFunc = func(_ time.Duration, f func()) *time.Timer { f(); return nil }
t.Cleanup(func() { afterFunc = orig })
}

func TestHandleAgentRestart_Returns200Immediately(t *testing.T) {
withSyncAfterFunc(t)
withMockKickstart(t, func(string) error { return nil })

req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil)
rr := httptest.NewRecorder()

handleAgentRestart(rr, req)
Comment thread
PedroCordeiroDataDog marked this conversation as resolved.

assert.Equal(t, http.StatusOK, rr.Code)
}

func TestHandleAgentRestart_ServiceRestartSequence(t *testing.T) {
// expectedServices defines the exact order in which launchd services must be restarted.
// Agent must come before sysprobe because restarting sysprobe sends SIGTERM to this process.
expectedServices := []string{
"system/com.datadoghq.agent",
"system/com.datadoghq.sysprobe",
}

withSyncAfterFunc(t)

var called []string
withMockKickstart(t, func(svc string) error {
called = append(called, svc)
return nil
})

req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil)
rr := httptest.NewRecorder()

handleAgentRestart(rr, req)

assert.Equal(t, http.StatusOK, rr.Code)
assert.Equal(t, expectedServices, called)
}
14 changes: 14 additions & 0 deletions cmd/system-probe/api/agentrestart_others.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

//go:build !darwin

package api

import "net/http"

func handleAgentRestart(w http.ResponseWriter, _ *http.Request) {
http.Error(w, "not supported on this platform", http.StatusNotImplemented)
}
26 changes: 26 additions & 0 deletions cmd/system-probe/api/agentrestart_others_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

//go:build !darwin

package api

import (
"net/http"
"net/http/httptest"
"testing"

"github.com/stretchr/testify/assert"
)

func TestHandleAgentRestart_NotSupportedOnNonDarwin(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil)
rr := httptest.NewRecorder()

handleAgentRestart(rr, req)

assert.Equal(t, http.StatusNotImplemented, rr.Code)
assert.Contains(t, rr.Body.String(), "not supported on this platform")
}
2 changes: 2 additions & 0 deletions cmd/system-probe/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ func StartServer(cfg *sysconfigtypes.Config, settings settings.Component, rcclie
mux.HandleFunc("/debug/selinux_semodule_list", debug.HandleSelinuxSemoduleList)
}

mux.Handle("POST /agent-restart", deps.Ipc.HTTPMiddleware(http.HandlerFunc(handleAgentRestart)))

// Register /coverage endpoint for computing code coverage (e2ecoverage build only).
coverage.SetupCoverageHandler(mux)

Expand Down
20 changes: 20 additions & 0 deletions comp/core/gui/impl/gui.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/DataDog/datadog-agent/comp/core/flare"
guidef "github.com/DataDog/datadog-agent/comp/core/gui/def"
"github.com/DataDog/datadog-agent/comp/core/hostname/hostnameinterface/def"
ipc "github.com/DataDog/datadog-agent/comp/core/ipc/def"
log "github.com/DataDog/datadog-agent/comp/core/log/def"
"github.com/DataDog/datadog-agent/comp/core/status"
compdef "github.com/DataDog/datadog-agent/comp/def"
Expand Down Expand Up @@ -74,6 +75,7 @@ type Requires struct {
Status status.Component
Lc compdef.Lifecycle
Hostname hostnameinterface.Component
Ipc ipc.Component
}

// Provides defines the output of the gui component.
Expand Down Expand Up @@ -120,6 +122,12 @@ func NewComponent(deps Requires) Provides {

sessionExpiration := deps.Config.GetDuration("GUI_session_expiration")
g.auth = newAuthenticator(authToken, sessionExpiration)
setGetAuthToken(deps.Ipc.GetAuthToken)
socketPath := deps.Config.GetString("system_probe_config.sysprobe_socket")

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Use the system-probe config for the socket path

In macOS deployments where system-probe.yaml or DD_SYSPROBE_SOCKET overrides the system-probe socket, this reads the core Agent config instead of the system-probe config component. That setting is bound on the system-probe config side, so the GUI can fall back to the default path and restart() posts to the wrong Unix socket, making restart fail even though system-probe is running on the configured socket. Please source this from the sysprobe config component.

Useful? React with 👍 / 👎.

if socketPath == "" {
socketPath = defaultpaths.GetDefaultSystemProbeAddress()
}
setSysprobeSocketPath(socketPath)

// register the public routes
publicRouter.HandleFunc("GET /{$}", renderIndexPage)
Expand Down Expand Up @@ -281,6 +289,7 @@ func (g *gui) getAccessToken(w http.ResponseWriter, r *http.Request) {
Value: accessToken,
Path: "/",
HttpOnly: true,
SameSite: http.SameSiteStrictMode,
MaxAge: 31536000, // 1 year
})
http.Redirect(w, r, "/", http.StatusFound)
Expand All @@ -292,6 +301,17 @@ func (g *gui) authMiddleware(next http.Handler) http.Handler {
// Disable caching
w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate")

// For state-changing requests, reject any cross-origin Origin header to prevent CSRF.
// Same-origin requests from the GUI itself either omit Origin or match the server address.
if r.Method != http.MethodGet && r.Method != http.MethodHead {
if origin := r.Header.Get("Origin"); origin != "" {
if origin != "http://"+g.address {
http.Error(w, "invalid origin", http.StatusForbidden)
return
}
}
}

cookie, _ := r.Cookie("accessToken")
if cookie == nil {
http.Error(w, "missing accessToken", http.StatusUnauthorized)
Expand Down
102 changes: 102 additions & 0 deletions comp/core/gui/impl/gui_csrf_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

package guiimpl

import (
"net/http"
"net/http/httptest"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func newTestGUI(t *testing.T) *gui {
t.Helper()
return &gui{
address: "localhost:5002",
auth: newAuthenticator("test-secret", 5*time.Minute),
intentTokens: make(map[string]bool),
}
}

func TestGetAccessToken_CookieHasSameSiteStrict(t *testing.T) {
g := newTestGUI(t)
g.intentTokens["test-intent"] = true

req := httptest.NewRequest(http.MethodGet, "/auth?intent=test-intent", nil)
rr := httptest.NewRecorder()

g.getAccessToken(rr, req)

var accessCookie *http.Cookie
for _, c := range rr.Result().Cookies() {
if c.Name == "accessToken" {
accessCookie = c
break
}
}
require.NotNil(t, accessCookie, "accessToken cookie must be set")
assert.Equal(t, http.SameSiteStrictMode, accessCookie.SameSite)
assert.True(t, accessCookie.HttpOnly)
}

func TestAuthMiddleware_OriginCheck(t *testing.T) {
g := newTestGUI(t)
token := g.auth.GenerateAccessToken()

okHandler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
})

tests := []struct {
name string
method string
origin string
expectedStatus int
}{
{
name: "POST without Origin is allowed (same-origin browser request)",
method: http.MethodPost,
origin: "",
expectedStatus: http.StatusOK,
},
{
name: "POST with matching Origin is allowed",
method: http.MethodPost,
origin: "http://localhost:5002",
expectedStatus: http.StatusOK,
},
{
name: "POST with cross-origin Origin is rejected",
method: http.MethodPost,
origin: "http://evil.com",
expectedStatus: http.StatusForbidden,
},
{
name: "GET with cross-origin Origin is allowed (safe method)",
method: http.MethodGet,
origin: "http://evil.com",
expectedStatus: http.StatusOK,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
req := httptest.NewRequest(tt.method, "/agent/restart", nil)
req.AddCookie(&http.Cookie{Name: "accessToken", Value: token})
if tt.origin != "" {
req.Header.Set("Origin", tt.origin)
}

rr := httptest.NewRecorder()
g.authMiddleware(okHandler).ServeHTTP(rr, req)

assert.Equal(t, tt.expectedStatus, rr.Code)
})
}
}
40 changes: 37 additions & 3 deletions comp/core/gui/impl/platform_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
package guiimpl

import (
"errors"
"fmt"
"net/http"

sysprobeclient "github.com/DataDog/datadog-agent/pkg/system-probe/api/client"
template "github.com/DataDog/datadog-agent/pkg/template/html"
)

Expand All @@ -25,10 +27,42 @@ const instructionTemplate = `{{define "loginInstruction" }}
<p>Note: If you would like to adjust the GUI session timeout, you can modify the <code>GUI_session_expiration</code> parameter in <code>datadog.yaml</code>
{{end}}`

// getAuthToken is a function that fetches the IPC auth token on each call,
// avoiding storage of the credential as a long-lived global.
// sysprobeSocketPath holds the Unix socket path, set once at startup.
var getAuthToken func() string
var sysprobeSocketPath string

func setGetAuthToken(f func() string) {
getAuthToken = f
}

func setSysprobeSocketPath(path string) {
sysprobeSocketPath = path
}

func restartEnabled() bool {
return false
return true
Comment thread
PedroCordeiroDataDog marked this conversation as resolved.
}
Comment thread
PedroCordeiroDataDog marked this conversation as resolved.

func restart() error {
return errors.New("restarting the agent is not implemented on non-windows platforms")
client := sysprobeclient.Get(sysprobeSocketPath)

url := sysprobeclient.URL("/agent-restart")
req, err := http.NewRequest(http.MethodPost, url, nil)
if err != nil {
return fmt.Errorf("could not build restart request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+getAuthToken())

resp, err := client.Do(req)
if err != nil {
return fmt.Errorf("could not reach system-probe: %w", err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("system-probe agent restart failed with status %d; see system-probe logs for details", resp.StatusCode)
}
return nil
}
Loading
Loading