-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Pedro.cordeiro/macos restart agent #52813
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
f871d70
d67e363
24eebaa
5710f3e
310864e
b4cbda8
b11bcff
3a312ee
fc0a14c
f5a4059
e876a8a
5f86900
405e19f
910ee19
be3f308
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| // Unless explicitly stated otherwise all files in this repository are licensed | ||
| // under the Apache License Version 2.0. | ||
| // This product includes software developed at Datadog (https://www.datadoghq.com/). | ||
| // Copyright 2016-present Datadog, Inc. | ||
|
|
||
| //go:build darwin | ||
|
|
||
| package api | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "net/http" | ||
| "os/exec" | ||
| "time" | ||
|
|
||
| "github.com/DataDog/datadog-agent/pkg/util/log" | ||
| ) | ||
|
|
||
| var afterFunc = time.AfterFunc | ||
|
|
||
| var kickstart = func(service string) error { | ||
| cmd := exec.Command("/bin/launchctl", "kickstart", "-k", service) | ||
| out, err := cmd.CombinedOutput() | ||
| if err != nil { | ||
| return fmt.Errorf("%s", string(out)) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func handleAgentRestart(w http.ResponseWriter, r *http.Request) { | ||
| // Reply 200 immediately so the client receives the response before launchd | ||
| // tears down this process when sysprobe is restarted. | ||
| w.WriteHeader(http.StatusOK) | ||
| if f, ok := w.(http.Flusher); ok { | ||
| f.Flush() | ||
| } | ||
|
|
||
| // Restart both services after a short delay so the HTTP response has time | ||
| // to be delivered before launchd sends SIGTERM to this process. | ||
| afterFunc(100*time.Millisecond, func() { | ||
| if err := kickstart("system/com.datadoghq.agent"); err != nil { | ||
| log.Errorf("agent-restart: failed to restart com.datadoghq.agent: %v", err) | ||
| } | ||
| if err := kickstart("system/com.datadoghq.sysprobe"); err != nil { | ||
| log.Errorf("agent-restart: failed to restart com.datadoghq.sysprobe: %v", err) | ||
| } | ||
| }) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| // Unless explicitly stated otherwise all files in this repository are licensed | ||
| // under the Apache License Version 2.0. | ||
| // This product includes software developed at Datadog (https://www.datadoghq.com/). | ||
| // Copyright 2016-present Datadog, Inc. | ||
|
|
||
| //go:build darwin | ||
|
|
||
| package api | ||
|
|
||
| import ( | ||
| "net/http" | ||
| "net/http/httptest" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| ) | ||
|
|
||
| func withMockKickstart(t *testing.T, mock func(string) error) { | ||
| t.Helper() | ||
| orig := kickstart | ||
| kickstart = mock | ||
| t.Cleanup(func() { kickstart = orig }) | ||
| } | ||
|
|
||
| // withSyncAfterFunc replaces the timer so the callback runs synchronously inside | ||
| // handleAgentRestart, before the function returns. This prevents the real kickstart | ||
| // from being restored by t.Cleanup before the timer fires. | ||
| func withSyncAfterFunc(t *testing.T) { | ||
| t.Helper() | ||
| orig := afterFunc | ||
| afterFunc = func(_ time.Duration, f func()) *time.Timer { f(); return nil } | ||
| t.Cleanup(func() { afterFunc = orig }) | ||
| } | ||
|
|
||
| func TestHandleAgentRestart_Returns200Immediately(t *testing.T) { | ||
| withSyncAfterFunc(t) | ||
| withMockKickstart(t, func(string) error { return nil }) | ||
|
|
||
| req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil) | ||
| rr := httptest.NewRecorder() | ||
|
|
||
| handleAgentRestart(rr, req) | ||
|
PedroCordeiroDataDog marked this conversation as resolved.
|
||
|
|
||
| assert.Equal(t, http.StatusOK, rr.Code) | ||
| } | ||
|
|
||
| func TestHandleAgentRestart_ServiceRestartSequence(t *testing.T) { | ||
| // expectedServices defines the exact order in which launchd services must be restarted. | ||
| // Agent must come before sysprobe because restarting sysprobe sends SIGTERM to this process. | ||
| expectedServices := []string{ | ||
| "system/com.datadoghq.agent", | ||
| "system/com.datadoghq.sysprobe", | ||
| } | ||
|
|
||
| withSyncAfterFunc(t) | ||
|
|
||
| var called []string | ||
| withMockKickstart(t, func(svc string) error { | ||
| called = append(called, svc) | ||
| return nil | ||
| }) | ||
|
|
||
| req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil) | ||
| rr := httptest.NewRecorder() | ||
|
|
||
| handleAgentRestart(rr, req) | ||
|
|
||
| assert.Equal(t, http.StatusOK, rr.Code) | ||
| assert.Equal(t, expectedServices, called) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| // Unless explicitly stated otherwise all files in this repository are licensed | ||
| // under the Apache License Version 2.0. | ||
| // This product includes software developed at Datadog (https://www.datadoghq.com/). | ||
| // Copyright 2016-present Datadog, Inc. | ||
|
|
||
| //go:build !darwin | ||
|
|
||
| package api | ||
|
|
||
| import "net/http" | ||
|
|
||
| func handleAgentRestart(w http.ResponseWriter, _ *http.Request) { | ||
| http.Error(w, "not supported on this platform", http.StatusNotImplemented) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| // Unless explicitly stated otherwise all files in this repository are licensed | ||
| // under the Apache License Version 2.0. | ||
| // This product includes software developed at Datadog (https://www.datadoghq.com/). | ||
| // Copyright 2016-present Datadog, Inc. | ||
|
|
||
| //go:build !darwin | ||
|
|
||
| package api | ||
|
|
||
| import ( | ||
| "net/http" | ||
| "net/http/httptest" | ||
| "testing" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| ) | ||
|
|
||
| func TestHandleAgentRestart_NotSupportedOnNonDarwin(t *testing.T) { | ||
| req := httptest.NewRequest(http.MethodPost, "/agent-restart", nil) | ||
| rr := httptest.NewRecorder() | ||
|
|
||
| handleAgentRestart(rr, req) | ||
|
|
||
| assert.Equal(t, http.StatusNotImplemented, rr.Code) | ||
| assert.Contains(t, rr.Body.String(), "not supported on this platform") | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ import ( | |
| "github.com/DataDog/datadog-agent/comp/core/flare" | ||
| guidef "github.com/DataDog/datadog-agent/comp/core/gui/def" | ||
| "github.com/DataDog/datadog-agent/comp/core/hostname/hostnameinterface/def" | ||
| ipc "github.com/DataDog/datadog-agent/comp/core/ipc/def" | ||
| log "github.com/DataDog/datadog-agent/comp/core/log/def" | ||
| "github.com/DataDog/datadog-agent/comp/core/status" | ||
| compdef "github.com/DataDog/datadog-agent/comp/def" | ||
|
|
@@ -74,6 +75,7 @@ type Requires struct { | |
| Status status.Component | ||
| Lc compdef.Lifecycle | ||
| Hostname hostnameinterface.Component | ||
| Ipc ipc.Component | ||
| } | ||
|
|
||
| // Provides defines the output of the gui component. | ||
|
|
@@ -120,6 +122,12 @@ func NewComponent(deps Requires) Provides { | |
|
|
||
| sessionExpiration := deps.Config.GetDuration("GUI_session_expiration") | ||
| g.auth = newAuthenticator(authToken, sessionExpiration) | ||
| setGetAuthToken(deps.Ipc.GetAuthToken) | ||
| socketPath := deps.Config.GetString("system_probe_config.sysprobe_socket") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In macOS deployments where Useful? React with 👍 / 👎. |
||
| if socketPath == "" { | ||
| socketPath = defaultpaths.GetDefaultSystemProbeAddress() | ||
| } | ||
| setSysprobeSocketPath(socketPath) | ||
|
|
||
| // register the public routes | ||
| publicRouter.HandleFunc("GET /{$}", renderIndexPage) | ||
|
|
@@ -281,6 +289,7 @@ func (g *gui) getAccessToken(w http.ResponseWriter, r *http.Request) { | |
| Value: accessToken, | ||
| Path: "/", | ||
| HttpOnly: true, | ||
| SameSite: http.SameSiteStrictMode, | ||
| MaxAge: 31536000, // 1 year | ||
| }) | ||
| http.Redirect(w, r, "/", http.StatusFound) | ||
|
|
@@ -292,6 +301,17 @@ func (g *gui) authMiddleware(next http.Handler) http.Handler { | |
| // Disable caching | ||
| w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate") | ||
|
|
||
| // For state-changing requests, reject any cross-origin Origin header to prevent CSRF. | ||
| // Same-origin requests from the GUI itself either omit Origin or match the server address. | ||
| if r.Method != http.MethodGet && r.Method != http.MethodHead { | ||
| if origin := r.Header.Get("Origin"); origin != "" { | ||
| if origin != "http://"+g.address { | ||
| http.Error(w, "invalid origin", http.StatusForbidden) | ||
| return | ||
| } | ||
| } | ||
| } | ||
|
|
||
| cookie, _ := r.Cookie("accessToken") | ||
| if cookie == nil { | ||
| http.Error(w, "missing accessToken", http.StatusUnauthorized) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| // Unless explicitly stated otherwise all files in this repository are licensed | ||
| // under the Apache License Version 2.0. | ||
| // This product includes software developed at Datadog (https://www.datadoghq.com/). | ||
| // Copyright 2016-present Datadog, Inc. | ||
|
|
||
| package guiimpl | ||
|
|
||
| import ( | ||
| "net/http" | ||
| "net/http/httptest" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| func newTestGUI(t *testing.T) *gui { | ||
| t.Helper() | ||
| return &gui{ | ||
| address: "localhost:5002", | ||
| auth: newAuthenticator("test-secret", 5*time.Minute), | ||
| intentTokens: make(map[string]bool), | ||
| } | ||
| } | ||
|
|
||
| func TestGetAccessToken_CookieHasSameSiteStrict(t *testing.T) { | ||
| g := newTestGUI(t) | ||
| g.intentTokens["test-intent"] = true | ||
|
|
||
| req := httptest.NewRequest(http.MethodGet, "/auth?intent=test-intent", nil) | ||
| rr := httptest.NewRecorder() | ||
|
|
||
| g.getAccessToken(rr, req) | ||
|
|
||
| var accessCookie *http.Cookie | ||
| for _, c := range rr.Result().Cookies() { | ||
| if c.Name == "accessToken" { | ||
| accessCookie = c | ||
| break | ||
| } | ||
| } | ||
| require.NotNil(t, accessCookie, "accessToken cookie must be set") | ||
| assert.Equal(t, http.SameSiteStrictMode, accessCookie.SameSite) | ||
| assert.True(t, accessCookie.HttpOnly) | ||
| } | ||
|
|
||
| func TestAuthMiddleware_OriginCheck(t *testing.T) { | ||
| g := newTestGUI(t) | ||
| token := g.auth.GenerateAccessToken() | ||
|
|
||
| okHandler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { | ||
| w.WriteHeader(http.StatusOK) | ||
| }) | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| method string | ||
| origin string | ||
| expectedStatus int | ||
| }{ | ||
| { | ||
| name: "POST without Origin is allowed (same-origin browser request)", | ||
| method: http.MethodPost, | ||
| origin: "", | ||
| expectedStatus: http.StatusOK, | ||
| }, | ||
| { | ||
| name: "POST with matching Origin is allowed", | ||
| method: http.MethodPost, | ||
| origin: "http://localhost:5002", | ||
| expectedStatus: http.StatusOK, | ||
| }, | ||
| { | ||
| name: "POST with cross-origin Origin is rejected", | ||
| method: http.MethodPost, | ||
| origin: "http://evil.com", | ||
| expectedStatus: http.StatusForbidden, | ||
| }, | ||
| { | ||
| name: "GET with cross-origin Origin is allowed (safe method)", | ||
| method: http.MethodGet, | ||
| origin: "http://evil.com", | ||
| expectedStatus: http.StatusOK, | ||
| }, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| req := httptest.NewRequest(tt.method, "/agent/restart", nil) | ||
| req.AddCookie(&http.Cookie{Name: "accessToken", Value: token}) | ||
| if tt.origin != "" { | ||
| req.Header.Set("Origin", tt.origin) | ||
| } | ||
|
|
||
| rr := httptest.NewRecorder() | ||
| g.authMiddleware(okHandler).ServeHTTP(rr, req) | ||
|
|
||
| assert.Equal(t, tt.expectedStatus, rr.Code) | ||
| }) | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When
launchctl kickstartfails, for example because the service label is not bootstrapped, this sends and flushes a 200 before either restart is attempted; the later failures are only logged. The GUI treats the 200 response from/agent-restartasSuccess, so users can be told the restart worked even though neither service was restarted. Please perform the agent restart before the OK response, and only defer the self-terminating sysprobe restart, or otherwise surface failures before returning success.Useful? React with 👍 / 👎.