Skip to content

Commit cdd5978

Browse files
drewpimlock
andauthored
feat(sandbox): inject host gateway hostAliases into sandbox pods (#306)
* feat(sandbox): inject host gateway hostAliases into sandbox pods Sandbox pods running in the k3s cluster cannot resolve host.docker.internal by default, preventing them from reaching services on the Docker host. Detect the host gateway IP (default route) in the cluster entrypoint, thread it through the Helm chart to the gateway server, and inject hostAliases entries (host.docker.internal, host.openshell.internal) into every sandbox pod spec. The injection is conditional -- when the IP is empty (non-Docker deployments), no hostAliases are added. --------- Co-authored-by: Piotr Mlocek <pmlocek@nvidia.com>
1 parent a590549 commit cdd5978

File tree

11 files changed

+514
-1
lines changed

11 files changed

+514
-1
lines changed

architecture/gateway-single-node.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ For the target daemon (local or remote):
170170
- Volume bind mount: `openshell-cluster-{name}:/var/lib/rancher/k3s`.
171171
- Network: `openshell-cluster-{name}` (per-gateway bridge network).
172172
- Extra host: `host.docker.internal:host-gateway`.
173+
- The cluster entrypoint prefers the resolved IPv4 for `host.docker.internal` when populating sandbox pod `hostAliases`, then falls back to the container default gateway. This keeps sandbox host aliases working on Docker Desktop, where the host-reachable IP differs from the bridge gateway.
173174
- Port mappings:
174175

175176
| Container Port | Host Port | Purpose |

crates/openshell-core/src/config.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ pub struct Config {
7777
/// the server over mTLS.
7878
#[serde(default)]
7979
pub client_tls_secret_name: String,
80+
81+
/// Host gateway IP for sandbox pod hostAliases.
82+
/// When set, sandbox pods get hostAliases entries mapping
83+
/// `host.docker.internal` and `host.openshell.internal` to this IP,
84+
/// allowing them to reach services running on the Docker host.
85+
#[serde(default)]
86+
pub host_gateway_ip: String,
8087
}
8188

8289
/// TLS configuration.
@@ -125,6 +132,7 @@ impl Config {
125132
ssh_handshake_skew_secs: default_ssh_handshake_skew_secs(),
126133
ssh_session_ttl_secs: default_ssh_session_ttl_secs(),
127134
client_tls_secret_name: String::new(),
135+
host_gateway_ip: String::new(),
128136
}
129137
}
130138

@@ -232,6 +240,13 @@ impl Config {
232240
self.client_tls_secret_name = name.into();
233241
self
234242
}
243+
244+
/// Set the host gateway IP for sandbox pod hostAliases.
245+
#[must_use]
246+
pub fn with_host_gateway_ip(mut self, ip: impl Into<String>) -> Self {
247+
self.host_gateway_ip = ip.into();
248+
self
249+
}
235250
}
236251

237252
fn default_bind_address() -> SocketAddr {

crates/openshell-server/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ pub async fn run_server(config: Config, tracing_log_bus: TracingLogBus) -> Resul
119119
config.ssh_handshake_secret.clone(),
120120
config.ssh_handshake_skew_secs,
121121
config.client_tls_secret_name.clone(),
122+
config.host_gateway_ip.clone(),
122123
)
123124
.await
124125
.map_err(|e| Error::execution(format!("failed to create kubernetes client: {e}")))?;

crates/openshell-server/src/main.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ struct Args {
9191
#[arg(long, env = "OPENSHELL_CLIENT_TLS_SECRET_NAME")]
9292
client_tls_secret_name: Option<String>,
9393

94+
/// Host gateway IP for sandbox pod hostAliases.
95+
/// When set, sandbox pods get hostAliases entries mapping
96+
/// host.docker.internal and host.openshell.internal to this IP.
97+
#[arg(long, env = "OPENSHELL_HOST_GATEWAY_IP")]
98+
host_gateway_ip: Option<String>,
99+
94100
/// Disable TLS entirely — listen on plaintext HTTP.
95101
/// Use this when the gateway sits behind a reverse proxy or tunnel
96102
/// (e.g. Cloudflare Tunnel) that terminates TLS at the edge.
@@ -178,6 +184,10 @@ async fn main() -> Result<()> {
178184
config = config.with_client_tls_secret_name(name);
179185
}
180186

187+
if let Some(ip) = args.host_gateway_ip {
188+
config = config.with_host_gateway_ip(ip);
189+
}
190+
181191
if args.disable_tls {
182192
info!("TLS disabled — listening on plaintext HTTP");
183193
} else if args.disable_gateway_auth {

crates/openshell-server/src/sandbox/mod.rs

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ pub struct SandboxClient {
4949
ssh_handshake_skew_secs: u64,
5050
/// When non-empty, sandbox pods get this K8s secret mounted for mTLS to the server.
5151
client_tls_secret_name: String,
52+
/// When non-empty, sandbox pods get `hostAliases` entries mapping
53+
/// `host.docker.internal` and `host.openshell.internal` to this IP.
54+
host_gateway_ip: String,
5255
}
5356

5457
impl std::fmt::Debug for SandboxClient {
@@ -71,6 +74,7 @@ impl SandboxClient {
7174
ssh_handshake_secret: String,
7275
ssh_handshake_skew_secs: u64,
7376
client_tls_secret_name: String,
77+
host_gateway_ip: String,
7478
) -> Result<Self, KubeError> {
7579
let mut config = match kube::Config::incluster() {
7680
Ok(c) => c,
@@ -92,6 +96,7 @@ impl SandboxClient {
9296
ssh_handshake_secret,
9397
ssh_handshake_skew_secs,
9498
client_tls_secret_name,
99+
host_gateway_ip,
95100
})
96101
}
97102

@@ -206,6 +211,7 @@ impl SandboxClient {
206211
self.ssh_handshake_secret(),
207212
self.ssh_handshake_skew_secs(),
208213
&self.client_tls_secret_name,
214+
&self.host_gateway_ip,
209215
);
210216
let api = self.api();
211217

@@ -759,6 +765,7 @@ fn sandbox_to_k8s_spec(
759765
ssh_handshake_secret: &str,
760766
ssh_handshake_skew_secs: u64,
761767
client_tls_secret_name: &str,
768+
host_gateway_ip: &str,
762769
) -> serde_json::Value {
763770
let mut root = serde_json::Map::new();
764771
if let Some(spec) = spec {
@@ -787,6 +794,7 @@ fn sandbox_to_k8s_spec(
787794
ssh_handshake_skew_secs,
788795
&spec.environment,
789796
client_tls_secret_name,
797+
host_gateway_ip,
790798
),
791799
);
792800
if !template.agent_socket.is_empty() {
@@ -820,6 +828,7 @@ fn sandbox_to_k8s_spec(
820828
ssh_handshake_skew_secs,
821829
spec_env,
822830
client_tls_secret_name,
831+
host_gateway_ip,
823832
),
824833
);
825834
}
@@ -843,6 +852,7 @@ fn sandbox_template_to_k8s(
843852
ssh_handshake_skew_secs: u64,
844853
spec_environment: &std::collections::HashMap<String, String>,
845854
client_tls_secret_name: &str,
855+
host_gateway_ip: &str,
846856
) -> serde_json::Value {
847857
if let Some(pod_template) = struct_to_json(&template.pod_template) {
848858
return inject_pod_template(
@@ -859,6 +869,7 @@ fn sandbox_template_to_k8s(
859869
ssh_handshake_skew_secs,
860870
spec_environment,
861871
client_tls_secret_name,
872+
host_gateway_ip,
862873
);
863874
}
864875

@@ -968,6 +979,17 @@ fn sandbox_template_to_k8s(
968979
);
969980
}
970981

982+
// Add hostAliases so sandbox pods can reach the Docker host.
983+
if !host_gateway_ip.is_empty() {
984+
spec.insert(
985+
"hostAliases".to_string(),
986+
serde_json::json!([{
987+
"ip": host_gateway_ip,
988+
"hostnames": ["host.docker.internal", "host.openshell.internal"]
989+
}]),
990+
);
991+
}
992+
971993
let mut template_value = serde_json::Map::new();
972994
if !metadata.is_empty() {
973995
template_value.insert("metadata".to_string(), serde_json::Value::Object(metadata));
@@ -997,6 +1019,7 @@ fn inject_pod_template(
9971019
ssh_handshake_skew_secs: u64,
9981020
spec_environment: &std::collections::HashMap<String, String>,
9991021
client_tls_secret_name: &str,
1022+
host_gateway_ip: &str,
10001023
) -> serde_json::Value {
10011024
let Some(spec) = pod_template
10021025
.get_mut("spec")
@@ -1012,6 +1035,17 @@ fn inject_pod_template(
10121035
);
10131036
}
10141037

1038+
// Add hostAliases so sandbox pods can reach the Docker host.
1039+
if !host_gateway_ip.is_empty() {
1040+
spec.insert(
1041+
"hostAliases".to_string(),
1042+
serde_json::json!([{
1043+
"ip": host_gateway_ip,
1044+
"hostnames": ["host.docker.internal", "host.openshell.internal"]
1045+
}]),
1046+
);
1047+
}
1048+
10151049
// Inject TLS volume at the pod spec level.
10161050
if !client_tls_secret_name.is_empty() {
10171051
let volumes = spec
@@ -1806,6 +1840,7 @@ mod tests {
18061840
300,
18071841
&std::collections::HashMap::new(),
18081842
"",
1843+
"",
18091844
);
18101845

18111846
assert_eq!(
@@ -1851,6 +1886,7 @@ mod tests {
18511886
300,
18521887
&std::collections::HashMap::new(),
18531888
"",
1889+
"",
18541890
);
18551891

18561892
let limits = &pod_template["spec"]["containers"][0]["resources"]["limits"];
@@ -1910,6 +1946,7 @@ mod tests {
19101946
300,
19111947
&std::collections::HashMap::new(),
19121948
"",
1949+
"",
19131950
);
19141951

19151952
assert_eq!(
@@ -1921,4 +1958,116 @@ mod tests {
19211958
serde_json::json!(GPU_RESOURCE_QUANTITY)
19221959
);
19231960
}
1961+
1962+
#[test]
1963+
fn host_aliases_injected_when_gateway_ip_set() {
1964+
let pod_template = sandbox_template_to_k8s(
1965+
&SandboxTemplate::default(),
1966+
false,
1967+
"openshell/sandbox:latest",
1968+
"",
1969+
"sandbox-id",
1970+
"sandbox-name",
1971+
"https://gateway.example.com",
1972+
"0.0.0.0:2222",
1973+
"secret",
1974+
300,
1975+
&std::collections::HashMap::new(),
1976+
"",
1977+
"172.17.0.1",
1978+
);
1979+
1980+
let host_aliases = pod_template["spec"]["hostAliases"]
1981+
.as_array()
1982+
.expect("hostAliases should exist");
1983+
assert_eq!(host_aliases.len(), 1);
1984+
assert_eq!(host_aliases[0]["ip"], "172.17.0.1");
1985+
let hostnames = host_aliases[0]["hostnames"]
1986+
.as_array()
1987+
.expect("hostnames should exist");
1988+
assert!(hostnames.contains(&serde_json::json!("host.docker.internal")));
1989+
assert!(hostnames.contains(&serde_json::json!("host.openshell.internal")));
1990+
}
1991+
1992+
#[test]
1993+
fn host_aliases_not_injected_when_gateway_ip_empty() {
1994+
let pod_template = sandbox_template_to_k8s(
1995+
&SandboxTemplate::default(),
1996+
false,
1997+
"openshell/sandbox:latest",
1998+
"",
1999+
"sandbox-id",
2000+
"sandbox-name",
2001+
"https://gateway.example.com",
2002+
"0.0.0.0:2222",
2003+
"secret",
2004+
300,
2005+
&std::collections::HashMap::new(),
2006+
"",
2007+
"",
2008+
);
2009+
2010+
assert!(
2011+
pod_template["spec"]["hostAliases"].is_null(),
2012+
"hostAliases should not be present when host_gateway_ip is empty"
2013+
);
2014+
}
2015+
2016+
#[test]
2017+
fn host_aliases_injected_in_custom_pod_template() {
2018+
let template = SandboxTemplate {
2019+
pod_template: Some(Struct {
2020+
fields: [(
2021+
"spec".to_string(),
2022+
Value {
2023+
kind: Some(Kind::StructValue(Struct {
2024+
fields: [(
2025+
"containers".to_string(),
2026+
Value {
2027+
kind: Some(Kind::ListValue(prost_types::ListValue {
2028+
values: vec![Value {
2029+
kind: Some(Kind::StructValue(Struct {
2030+
fields: [(
2031+
"name".to_string(),
2032+
string_value("agent"),
2033+
)]
2034+
.into_iter()
2035+
.collect(),
2036+
})),
2037+
}],
2038+
})),
2039+
},
2040+
)]
2041+
.into_iter()
2042+
.collect(),
2043+
})),
2044+
},
2045+
)]
2046+
.into_iter()
2047+
.collect(),
2048+
}),
2049+
..SandboxTemplate::default()
2050+
};
2051+
2052+
let pod_template = sandbox_template_to_k8s(
2053+
&template,
2054+
false,
2055+
"openshell/sandbox:latest",
2056+
"",
2057+
"sandbox-id",
2058+
"sandbox-name",
2059+
"https://gateway.example.com",
2060+
"0.0.0.0:2222",
2061+
"secret",
2062+
300,
2063+
&std::collections::HashMap::new(),
2064+
"",
2065+
"192.168.65.2",
2066+
);
2067+
2068+
let host_aliases = pod_template["spec"]["hostAliases"]
2069+
.as_array()
2070+
.expect("hostAliases should exist in custom pod template");
2071+
assert_eq!(host_aliases[0]["ip"], "192.168.65.2");
2072+
}
19242073
}

deploy/docker/cluster-entrypoint.sh

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,27 @@ if [ "${GPU_ENABLED:-}" = "true" ]; then
330330
fi
331331
fi
332332

333+
# ---------------------------------------------------------------------------
334+
# Detect host gateway IP for sandbox pod hostAliases
335+
# ---------------------------------------------------------------------------
336+
# Sandbox pods need to reach services running on the Docker host (e.g.
337+
# provider endpoints during local development). On Docker Desktop,
338+
# host.docker.internal resolves to a special host-reachable IP that is NOT the
339+
# bridge default gateway, so prefer Docker's own resolution when available.
340+
# Fall back to the container default gateway on Linux engines where
341+
# host.docker.internal commonly maps to the bridge gateway anyway.
342+
HOST_GATEWAY_IP=$(getent ahostsv4 host.docker.internal 2>/dev/null | awk 'NR == 1 { print $1; exit }')
343+
if [ -n "$HOST_GATEWAY_IP" ]; then
344+
echo "Detected host gateway IP from host.docker.internal: $HOST_GATEWAY_IP"
345+
else
346+
HOST_GATEWAY_IP=$(ip -4 route | awk '/default/ { print $3; exit }')
347+
if [ -n "$HOST_GATEWAY_IP" ]; then
348+
echo "Detected host gateway IP from default route: $HOST_GATEWAY_IP"
349+
else
350+
echo "Warning: Could not detect host gateway IP from host.docker.internal or default route"
351+
fi
352+
fi
353+
333354
# ---------------------------------------------------------------------------
334355
# Override image tag and pull policy for local development
335356
# ---------------------------------------------------------------------------
@@ -428,6 +449,16 @@ if [ -f "$HELMCHART" ]; then
428449
fi
429450
fi
430451

452+
# Inject host gateway IP into the HelmChart manifest so sandbox pods can
453+
# reach services on the Docker host via host.docker.internal / host.openshell.internal.
454+
if [ -n "$HOST_GATEWAY_IP" ] && [ -f "$HELMCHART" ]; then
455+
echo "Setting host gateway IP: $HOST_GATEWAY_IP"
456+
sed -i "s|__HOST_GATEWAY_IP__|${HOST_GATEWAY_IP}|g" "$HELMCHART"
457+
else
458+
# Clear the placeholder so the server gets an empty string (feature disabled)
459+
sed -i "s|hostGatewayIP: __HOST_GATEWAY_IP__|hostGatewayIP: \"\"|g" "$HELMCHART"
460+
fi
461+
431462
# Inject chart checksum into the HelmChart manifest so that a changed chart
432463
# tarball causes the HelmChart CR spec to differ, forcing the k3s Helm
433464
# controller to upgrade the release.

deploy/helm/openshell/templates/statefulset.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ spec:
6565
- name: OPENSHELL_SSH_GATEWAY_PORT
6666
value: {{ .Values.server.sshGatewayPort | quote }}
6767
{{- end }}
68+
{{- if .Values.server.hostGatewayIP }}
69+
- name: OPENSHELL_HOST_GATEWAY_IP
70+
value: {{ .Values.server.hostGatewayIP | quote }}
71+
{{- end }}
6872
- name: OPENSHELL_SSH_HANDSHAKE_SECRET
6973
value: {{ required "server.sshHandshakeSecret is required" .Values.server.sshHandshakeSecret | quote }}
7074
{{- if .Values.server.disableTls }}

0 commit comments

Comments
 (0)