Skip to main content

Running Boundary on Nomad

· 4 min read
Stephan Hochdörfer
Head of IT Business Operations

HashiCorp Boundary provides access to applications and critical systems with fine-grained authorizations without managing credentials or exposing your network internals.

Since most of our internal applications run on our Nomad cluster, the goal was also to deploy Boundary there.

Boundary consists of 2 parts: the controller and worker instances:

  • Controller instances are what users authenticate when using the Boundary client, and they contain Boundary's resources and permissions
  • Worker instances are primarily used as network proxies for Boundary sessions, allowing you to access private targets

For our test setup, we decided to create 2 distinct Nomad jobs to independently deploy the controller and worker nodes based on various constraints in our Nomad cluster.

The Nomad job file for the Boundary controller looks like this:

job "boundary.prod" {
datacenters = ["dc1"]
type = "service"

update {
stagger = "10s"
max_parallel = 1
}

group "boundary-controller.prod" {
count = 1

network {
port "api" {
to = 9200
}
port "cluster" {
to = 9201
}
}

task "boundary-controller" {
driver = "docker"

vault {
change_mode = "signal"
change_signal = "SIGUSR1"
}

template {
data = <<EOH
# API listener configuration block
disable_mlock = true

listener "tcp" {
tls_disable = true
address = "0.0.0.0:9200"
purpose = "api"
}

listener "tcp" {
address = "0.0.0.0:9201"
purpose = "cluster"
}

controller {
name = "controller"
description = "Boundary Controller Nomad"
public_cluster_addr = "{{ env "NOMAD_ADDR_cluster" }}"
graceful_shutdown_wait_duration = "10s"
database {
url = "" # TODO: Enter Postgresql connection details here
}
}

audit_enabled = true
sysevents_enabled = true
observations_enable = true

sink "stderr" {
name = "all-events"
description = "All events sent to stderr"
event_types = ["*"]
format = "cloudevents-json"
}

kms "transit" {
purpose = "root"
token = "" # TODO: Enter Vault token here
address = "https://vault.loc"
disable_renewal = "false"
key_name = "Boundary.root"
mount_path = "transit/"
tls_skip_verify = "true"
}

kms "transit" {
purpose = "recovery"
token = "" # TODO: Enter Vault token here
address = "https://vault.loc"
disable_renewal = "false"
key_name = "Boundary.recovery"
mount_path = "transit/"
tls_skip_verify = "true"
}

kms "transit" {
purpose = "worker-auth"
token = "" # TODO: Enter Vault token here
address = "https://vault.loc"
disable_renewal = "false"
key_name = "Boundary.worker.auth"
mount_path = "transit/"
tls_skip_verify = "true"
}
EOH
destination = "local/boundary.hcl"
}

config {
image = "hashicorp/boundary:0.16"
force_pull = true
privileged = true
ports = ["api", "cluster"]
volumes = [
"local/boundary.hcl:/boundary/boundary.hcl"
]
args = [
"boundary",
"server",
"-config",
"/boundary/boundary.hcl"
]
}

resources {
cpu = 256
memory = 1028
}

service {
name = "boundary-api-service"
provider = "nomad"
tags = [
"traefik.enable=true",
"traefik.http.routers.boundary-prod.entrypoints=websecure",
"traefik.http.routers.boundary-prod.rule=Host(`boundary.loc`)",
]
port = "api"

check {
name = "alive"
type = "tcp"
interval = "10s"
timeout = "2s"
}
}

service {
name = "boundary-cluster-service"
provider = "nomad"
port = "cluster"
tags = [
"traefik.enable=true",
"traefik.tcp.routers.boundary-controller.rule=HostSNI(`*`)",
"traefik.tcp.routers.boundary-controller.entrypoints=boundary-controller",
]

check {
name = "alive"
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
}

For the Worker process, we are using the following Job file:

job "boundary-worker.prod" {
datacenters = ["dc1"]
type = "service"

update {
stagger = "10s"
max_parallel = 1
}
group "boundary-worker.prod" {
count = 1

network {
port "worker" {
to = 9202
}
}

task "boundary-worker" {
driver = "docker"
vault {
change_mode = "signal"
change_signal = "SIGUSR1"
}

template {
data = <<EOH
listener "tcp" {
purpose = "proxy"
address = "0.0.0.0:9202"
}

worker {
name = "{{ env "NOMAD_TASK_NAME" }}-{{env "NOMAD_SHORT_ALLOC_ID"}}"
public_addr = "boundary-worker.loc"
initial_upstreams = ["{{ range nomadService "boundary-cluster-service" }}{{ .Address }}:{{ .Port }}{{ end }}"]
tags {
type = ["nomad", "dc1"]
region = ["eu-1"]
}
}

kms "transit" {
purpose = "worker-auth"
token = "" # TODO: Enter Vault token here
address = "https://vault.loc"
disable_renewal = "false"
key_name = "Boundary.worker.auth"
mount_path = "transit/"
tls_skip_verify = "true"
}
EOH
destination = "local/boundary.hcl"
}

config {
image = "hashicorp/boundary:0.16"
force_pull = true
privileged = true
ports = ["worker"]
volumes = [
"local/boundary.hcl:/boundary/boundary.hcl"
]
args = [
"boundary",
"server",
"-config",
"/boundary/boundary.hcl"
]
}

resources {
cpu = 256
memory = 516
}

service {
name = "boundary-worker-service"
provider = "nomad"
port = "worker"
tags = [
"traefik.enable=true",
"traefik.tcp.routers.boundary-worker.rule=HostSNI(`*`)",
"traefik.tcp.routers.boundary-worker.entrypoints=boundary-worker",
]

check {
name = "alive"
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
}

Both Nomad job files make use of Vault as Key management secrets engine, but you don't have to use Vault as Boundary also supports other systems. Pick what makes the most sense to you.

Also, since we are using Traefik for the request routing, we needed to extend the Traefik configuration and expose two new entrypoints, one for the controller node (boundary-controller) and one for the worker node (boundary-worker):

[entryPoints]
[entryPoints.boundary-controller]
address = ":9201"

[entryPoints.boundary-worker]
address = ":9202"