Files
edubox/agent/tailscale.go
T

310 lines
8.0 KiB
Go

package main
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"sync"
"syscall"
"time"
)
var (
tsCmd *exec.Cmd
tsCmdMu sync.Mutex
tsIP string
tsDataDir string
tsSocket string
)
type tailscaleStatus struct {
Self struct {
TailscaleIPs []string `json:"TailscaleIPs"`
} `json:"Self"`
}
func tailscaleBin(name string) string {
// Prefer bundled binaries (tailscale-bin/<os>/tailscaled etc.).
ex, err := os.Executable()
if err == nil {
bundled := filepath.Join(filepath.Dir(ex), "tailscale-bin", runtime.GOOS, name)
if runtime.GOOS == "windows" {
bundled += ".exe"
}
if _, err := os.Stat(bundled); err == nil {
return bundled
}
}
if p, err := exec.LookPath(name); err == nil {
return p
}
return name
}
func startTailscale(dataDir, nodeID, headscaleURL, authKey string) (string, error) {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
if tsCmd != nil {
return tsIP, nil
}
if dataDir == "" {
return "", fmt.Errorf("tailscale data dir is empty")
}
tsDataDir = filepath.Join(dataDir, "tailscale")
if err := os.MkdirAll(tsDataDir, 0700); err != nil {
return "", fmt.Errorf("create tailscale dir: %w", err)
}
// Make sure a previous tailscaled (e.g. left behind after a crash or
// force-kill) does not block the new daemon on the same socket/state.
killStaleTailscaled(tsDataDir)
if runtime.GOOS == "windows" {
// Windows uses named pipes for tailscaled IPC, not Unix sockets.
tsSocket = `\\.\pipe\studioe5-tailscaled`
} else {
tsSocket = filepath.Join(tsDataDir, "tailscaled.sock")
}
stateFile := filepath.Join(tsDataDir, "tailscaled.state")
log.Printf("Starting tailscaled for node %s (socket=%s)", nodeID, tsSocket)
tsCmd = exec.Command(tailscaleBin("tailscaled"),
"--state="+stateFile,
"--socket="+tsSocket,
"--tun=userspace-networking",
)
hideWindow(tsCmd)
// Redirect tailscaled output to a dedicated log file.
tsLogPath := filepath.Join(tsDataDir, "tailscaled.log")
if tsLogFile, err := os.OpenFile(tsLogPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644); err == nil {
tsCmd.Stdout = tsLogFile
tsCmd.Stderr = tsLogFile
} else {
log.Printf("Cannot open tailscaled log file %s: %v", tsLogPath, err)
}
if err := tsCmd.Start(); err != nil {
tsCmd = nil
return "", fmt.Errorf("start tailscaled: %w", err)
}
if err := os.WriteFile(filepath.Join(tsDataDir, "tailscaled.pid"), []byte(strconv.Itoa(tsCmd.Process.Pid)), 0644); err != nil {
log.Printf("Cannot write tailscaled pid file: %v", err)
}
// Give tailscaled a moment to start listening.
time.Sleep(1 * time.Second)
// Bring the interface up with the auth key.
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
upArgs := []string{
"--socket=" + tsSocket,
"up",
"--login-server=" + headscaleURL,
"--hostname=" + nodeID,
"--accept-dns=false",
}
// The auth key is omitted on reconnect: Tailscale reuses the existing state.
if authKey != "" {
upArgs = append(upArgs, "--authkey="+authKey)
}
if runtime.GOOS == "windows" {
// On Windows, keep the VPN up even after the tailscale.exe CLI client disconnects.
upArgs = append(upArgs, "--unattended")
} else {
// --operator is only meaningful on Unix systems.
upArgs = append(upArgs, "--operator=root")
}
upCmd := exec.CommandContext(ctx, tailscaleBin("tailscale"), upArgs...)
hideWindow(upCmd)
upCmd.Stdout = log.Writer()
upCmd.Stderr = log.Writer()
if err := upCmd.Run(); err != nil {
_ = tsCmd.Process.Kill()
_ = tsCmd.Wait()
tsCmd = nil
return "", fmt.Errorf("tailscale up: %w", err)
}
// Wait for an IP address.
for {
statusCmd := exec.CommandContext(ctx, tailscaleBin("tailscale"),
"--socket="+tsSocket,
"status", "--json",
)
hideWindow(statusCmd)
out, err := statusCmd.Output()
if err != nil {
select {
case <-ctx.Done():
_ = tsCmd.Process.Kill()
_ = tsCmd.Wait()
tsCmd = nil
return "", fmt.Errorf("tailscale status: %w", err)
default:
time.Sleep(1 * time.Second)
continue
}
}
var st tailscaleStatus
if err := json.Unmarshal(out, &st); err == nil && len(st.Self.TailscaleIPs) > 0 {
tsIP = st.Self.TailscaleIPs[0]
break
}
select {
case <-ctx.Done():
_ = tsCmd.Process.Kill()
_ = tsCmd.Wait()
tsCmd = nil
return "", fmt.Errorf("tailscale IP timeout")
default:
time.Sleep(1 * time.Second)
}
}
log.Printf("Tailscale started with IP: %s", tsIP)
return tsIP, nil
}
func stopTailscale() {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
stopTailscaleLocked()
}
func stopTailscaleLocked() {
if tsCmd == nil || tsCmd.Process == nil {
return
}
if tsSocket != "" {
downCmd := exec.Command(tailscaleBin("tailscale"), "--socket="+tsSocket, "down")
hideWindow(downCmd)
_ = downCmd.Run()
}
_ = tsCmd.Process.Kill()
_ = tsCmd.Wait()
tsCmd = nil
tsIP = ""
if tsDataDir != "" {
_ = os.Remove(filepath.Join(tsDataDir, "tailscaled.pid"))
}
log.Printf("Tailscale stopped")
}
func isTailscaleRunning() bool {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
if tsCmd == nil || tsCmd.Process == nil {
return false
}
// Signal 0 checks process existence without affecting it.
return tsCmd.Process.Signal(syscall.Signal(0)) == nil
}
func getTailscaleIP() string {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
return tsIP
}
// isTailscaleReady reports whether tailscaled is running and has successfully
// joined the tailnet (i.e. it has a Tailscale IP). This is a stronger check
// than isTailscaleRunning which only verifies the process exists.
func isTailscaleReady() bool {
if !isTailscaleRunning() {
return false
}
tsCmdMu.Lock()
socket := tsSocket
tsCmdMu.Unlock()
if socket == "" {
return false
}
statusCmd := exec.Command(tailscaleBin("tailscale"), "--socket="+socket, "status", "--json")
hideWindow(statusCmd)
out, err := statusCmd.Output()
if err != nil {
return false
}
var st tailscaleStatus
if err := json.Unmarshal(out, &st); err != nil {
return false
}
return len(st.Self.TailscaleIPs) > 0
}
// setupTailscaleServe configures Tailscale to proxy inbound Tailnet traffic
// on the given TCP port to localhost:<port>. This is required on Windows
// because userspace networking does not forward incoming connections to
// loopback by default.
func setupTailscaleServe(port int) error {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
if tsSocket == "" {
return fmt.Errorf("tailscale socket not initialized")
}
portStr := strconv.Itoa(port)
// Clean up any stale config for this port first.
offCmd := exec.Command(tailscaleBin("tailscale"), "--socket="+tsSocket, "serve", "--bg", "--tcp="+portStr, "off")
hideWindow(offCmd)
_ = offCmd.Run()
serveCmd := exec.Command(tailscaleBin("tailscale"), "--socket="+tsSocket, "serve", "--bg", "--tcp="+portStr, "tcp://localhost:"+portStr)
hideWindow(serveCmd)
out, err := serveCmd.CombinedOutput()
if err != nil {
return fmt.Errorf("tailscale serve: %w: %s", err, string(out))
}
log.Printf("Tailscale serve configured for port %s", portStr)
return nil
}
// removeTailscaleServe removes the Tailscale serve proxy for a port when an
// instance is stopped or deleted.
func removeTailscaleServe(port int) {
tsCmdMu.Lock()
defer tsCmdMu.Unlock()
if tsSocket == "" {
return
}
portStr := strconv.Itoa(port)
offCmd := exec.Command(tailscaleBin("tailscale"), "--socket="+tsSocket, "serve", "--bg", "--tcp="+portStr, "off")
hideWindow(offCmd)
_ = offCmd.Run()
log.Printf("Tailscale serve removed for port %s", portStr)
}
// killStaleTailscaled terminates a previously started tailscaled process that
// may have been left running after the agent was force-killed.
func killStaleTailscaled(tsDataDir string) {
pidFile := filepath.Join(tsDataDir, "tailscaled.pid")
data, err := os.ReadFile(pidFile)
if err != nil {
return
}
var pid int
if _, err := fmt.Sscanf(string(data), "%d", &pid); err != nil {
return
}
proc, err := os.FindProcess(pid)
if err != nil {
return
}
if err := proc.Signal(syscall.Signal(0)); err == nil {
log.Printf("Killing stale tailscaled process %d", pid)
_ = proc.Kill()
_, _ = proc.Wait()
}
_ = os.Remove(pidFile)
}