Files
edubox/agent/tailscale.go
T
EduBox Dev a414f03a59 feat(agent): v0.3.5 Windows inbound forwarding, UI actions, lifecycle
- Configure tailscale serve automatically for each instance on Windows userspace networking.
- Add local UI buttons: start/stop/reset/delete instances (stop/start preserve volumes).
- Clean shutdown: stop tailscaled and instances, notify server with instance_stopped.
- Restart tailscaled on agent boot using persisted state when pre-auth key is absent.
- Sync instance stopped/deleted status to dashboard (server/lib/websocket.ts).
- Security: include prior authz/scoping changes across API routes, ephemeral pre-auth keys, ACL policy, internal API key.
- Update SUIVI_VPN_ONDEMAND.md and docs/ONBOARDING_CLIENT.md.
- Bump agent version to 0.3.5.
2026-06-25 22:59:09 +00:00

283 lines
7.3 KiB
Go

package main
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"sync"
"syscall"
"time"
)
var (
tsCmd *exec.Cmd
tsCmdMu sync.Mutex
tsIP string
tsDataDir string
tsSocket string
)
type tailscaleStatus struct {
Self struct {
TailscaleIPs []string `json:"TailscaleIPs"`
} `json:"Self"`
}
func tailscaleBin(name string) string {
// Prefer bundled binaries (tailscale-bin/<os>/tailscaled etc.).
ex, err := os.Executable()
if err == nil {
bundled := filepath.Join(filepath.Dir(ex), "tailscale-bin", runtime.GOOS, name)
if runtime.GOOS == "windows" {
bundled += ".exe"
}
if _, err := os.Stat(bundled); err == nil {
return bundled
}
}
if p, err := exec.LookPath(name); err == nil {
return p
}
return name
}
func startTailscale(dataDir, nodeID, headscaleURL, authKey string) (string, error) {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
if tsCmd != nil {
return tsIP, nil
}
if dataDir == "" {
return "", fmt.Errorf("tailscale data dir is empty")
}
tsDataDir = filepath.Join(dataDir, "tailscale")
if err := os.MkdirAll(tsDataDir, 0700); err != nil {
return "", fmt.Errorf("create tailscale dir: %w", err)
}
// Make sure a previous tailscaled (e.g. left behind after a crash or
// force-kill) does not block the new daemon on the same socket/state.
killStaleTailscaled(tsDataDir)
if runtime.GOOS == "windows" {
// Windows uses named pipes for tailscaled IPC, not Unix sockets.
tsSocket = `\\.\pipe\studioe5-tailscaled`
} else {
tsSocket = filepath.Join(tsDataDir, "tailscaled.sock")
}
stateFile := filepath.Join(tsDataDir, "tailscaled.state")
log.Printf("Starting tailscaled for node %s (socket=%s)", nodeID, tsSocket)
tsCmd = exec.Command(tailscaleBin("tailscaled"),
"--state="+stateFile,
"--socket="+tsSocket,
"--tun=userspace-networking",
)
hideWindow(tsCmd)
// Redirect tailscaled output to a dedicated log file.
tsLogPath := filepath.Join(tsDataDir, "tailscaled.log")
if tsLogFile, err := os.OpenFile(tsLogPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644); err == nil {
tsCmd.Stdout = tsLogFile
tsCmd.Stderr = tsLogFile
} else {
log.Printf("Cannot open tailscaled log file %s: %v", tsLogPath, err)
}
if err := tsCmd.Start(); err != nil {
tsCmd = nil
return "", fmt.Errorf("start tailscaled: %w", err)
}
if err := os.WriteFile(filepath.Join(tsDataDir, "tailscaled.pid"), []byte(strconv.Itoa(tsCmd.Process.Pid)), 0644); err != nil {
log.Printf("Cannot write tailscaled pid file: %v", err)
}
// Give tailscaled a moment to start listening.
time.Sleep(1 * time.Second)
// Bring the interface up with the auth key.
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
upArgs := []string{
"--socket=" + tsSocket,
"up",
"--login-server=" + headscaleURL,
"--hostname=" + nodeID,
"--accept-dns=false",
}
// The auth key is omitted on reconnect: Tailscale reuses the existing state.
if authKey != "" {
upArgs = append(upArgs, "--authkey="+authKey)
}
if runtime.GOOS == "windows" {
// On Windows, keep the VPN up even after the tailscale.exe CLI client disconnects.
upArgs = append(upArgs, "--unattended")
} else {
// --operator is only meaningful on Unix systems.
upArgs = append(upArgs, "--operator=root")
}
upCmd := exec.CommandContext(ctx, tailscaleBin("tailscale"), upArgs...)
hideWindow(upCmd)
upCmd.Stdout = log.Writer()
upCmd.Stderr = log.Writer()
if err := upCmd.Run(); err != nil {
_ = tsCmd.Process.Kill()
_ = tsCmd.Wait()
tsCmd = nil
return "", fmt.Errorf("tailscale up: %w", err)
}
// Wait for an IP address.
for {
statusCmd := exec.CommandContext(ctx, tailscaleBin("tailscale"),
"--socket="+tsSocket,
"status", "--json",
)
hideWindow(statusCmd)
out, err := statusCmd.Output()
if err != nil {
select {
case <-ctx.Done():
_ = tsCmd.Process.Kill()
_ = tsCmd.Wait()
tsCmd = nil
return "", fmt.Errorf("tailscale status: %w", err)
default:
time.Sleep(1 * time.Second)
continue
}
}
var st tailscaleStatus
if err := json.Unmarshal(out, &st); err == nil && len(st.Self.TailscaleIPs) > 0 {
tsIP = st.Self.TailscaleIPs[0]
break
}
select {
case <-ctx.Done():
_ = tsCmd.Process.Kill()
_ = tsCmd.Wait()
tsCmd = nil
return "", fmt.Errorf("tailscale IP timeout")
default:
time.Sleep(1 * time.Second)
}
}
log.Printf("Tailscale started with IP: %s", tsIP)
return tsIP, nil
}
func stopTailscale() {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
stopTailscaleLocked()
}
func stopTailscaleLocked() {
if tsCmd == nil || tsCmd.Process == nil {
return
}
if tsSocket != "" {
downCmd := exec.Command(tailscaleBin("tailscale"), "--socket="+tsSocket, "down")
hideWindow(downCmd)
_ = downCmd.Run()
}
_ = tsCmd.Process.Kill()
_ = tsCmd.Wait()
tsCmd = nil
tsIP = ""
if tsDataDir != "" {
_ = os.Remove(filepath.Join(tsDataDir, "tailscaled.pid"))
}
log.Printf("Tailscale stopped")
}
func isTailscaleRunning() bool {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
if tsCmd == nil || tsCmd.Process == nil {
return false
}
// Signal 0 checks process existence without affecting it.
return tsCmd.Process.Signal(syscall.Signal(0)) == nil
}
func getTailscaleIP() string {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
return tsIP
}
// setupTailscaleServe configures Tailscale to proxy inbound Tailnet traffic
// on the given TCP port to localhost:<port>. This is required on Windows
// because userspace networking does not forward incoming connections to
// loopback by default.
func setupTailscaleServe(port int) error {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
if tsSocket == "" {
return fmt.Errorf("tailscale socket not initialized")
}
portStr := strconv.Itoa(port)
// Clean up any stale config for this port first.
offCmd := exec.Command(tailscaleBin("tailscale"), "--socket="+tsSocket, "serve", "--bg", "--tcp="+portStr, "off")
hideWindow(offCmd)
_ = offCmd.Run()
serveCmd := exec.Command(tailscaleBin("tailscale"), "--socket="+tsSocket, "serve", "--bg", "--tcp="+portStr, "tcp://localhost:"+portStr)
hideWindow(serveCmd)
out, err := serveCmd.CombinedOutput()
if err != nil {
return fmt.Errorf("tailscale serve: %w: %s", err, string(out))
}
log.Printf("Tailscale serve configured for port %s", portStr)
return nil
}
// removeTailscaleServe removes the Tailscale serve proxy for a port when an
// instance is stopped or deleted.
func removeTailscaleServe(port int) {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
if tsSocket == "" {
return
}
portStr := strconv.Itoa(port)
offCmd := exec.Command(tailscaleBin("tailscale"), "--socket="+tsSocket, "serve", "--bg", "--tcp="+portStr, "off")
hideWindow(offCmd)
_ = offCmd.Run()
log.Printf("Tailscale serve removed for port %s", portStr)
}
// killStaleTailscaled terminates a previously started tailscaled process that
// may have been left running after the agent was force-killed.
func killStaleTailscaled(tsDataDir string) {
pidFile := filepath.Join(tsDataDir, "tailscaled.pid")
data, err := os.ReadFile(pidFile)
if err != nil {
return
}
var pid int
if _, err := fmt.Sscanf(string(data), "%d", &pid); err != nil {
return
}
proc, err := os.FindProcess(pid)
if err != nil {
return
}
if err := proc.Signal(syscall.Signal(0)); err == nil {
log.Printf("Killing stale tailscaled process %d", pid)
_ = proc.Kill()
_, _ = proc.Wait()
}
_ = os.Remove(pidFile)
}