强曰为道

与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

第 14 章:Docker 与 Kubernetes

第 14 章:Docker 与 Kubernetes

本章介绍如何在容器化环境中部署生产级 RabbitMQ,包括单节点、集群和 Kubernetes 方案。


14.1 Docker 单节点部署

基本部署

docker run -d \
  --name rabbitmq \
  -p 5672:5672 \
  -p 15672:15672 \
  -p 15692:15692 \
  -e RABBITMQ_DEFAULT_USER=admin \
  -e RABBITMQ_DEFAULT_PASS=admin123 \
  -v rabbitmq_data:/var/lib/rabbitmq \
  -v rabbitmq_log:/var/log/rabbitmq \
  rabbitmq:4-management

推荐的完整配置

docker run -d \
  --name rabbitmq \
  --hostname rabbitmq-node1 \
  -p 5672:5672 \
  -p 15672:15672 \
  -p 15692:15692 \
  -p 5552:5552 \
  -e RABBITMQ_DEFAULT_USER=admin \
  -e RABBITMQ_DEFAULT_PASS=admin123 \
  -e RABBITMQ_DEFAULT_VHOST=/ \
  -e RABBITMQ_ERLANG_COOKIE="SWQOKODSQALRPCLNMEQG" \
  -e RABBITMQ_LOG_LEVEL=info \
  --ulimit nofile=65536:65536 \
  --memory=2g \
  --cpus=2 \
  -v rabbitmq_data:/var/lib/rabbitmq \
  -v ./rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro \
  -v ./enabled_plugins:/etc/rabbitmq/enabled_plugins:ro \
  --restart unless-stopped \
  --health-cmd "rabbitmq-diagnostics -q check_running && rabbitmq-diagnostics -q check_local_alarms" \
  --health-interval 30s \
  --health-timeout 10s \
  --health-retries 3 \
  --health-start-period 60s \
  rabbitmq:4-management

自定义配置文件

# rabbitmq.conf
listeners.tcp.default = 5672
management.tcp.port = 15672
prometheus.tcp.port = 15692

vm_memory_high_watermark.relative = 0.6
disk_free_limit.absolute = 1GB
heartbeat = 60
loopback_users.guest = true

# enabled_plugins
[rabbitmq_management,rabbitmq_prometheus,rabbitmq_delayed_message_exchange].

14.2 Docker Compose 单节点

version: '3.8'

services:
  rabbitmq:
    image: rabbitmq:4-management
    container_name: rabbitmq
    hostname: rabbitmq
    ports:
      - "5672:5672"
      - "15672:15672"
      - "15692:15692"
    environment:
      RABBITMQ_DEFAULT_USER: admin
      RABBITMQ_DEFAULT_PASS: admin123
      RABBITMQ_LOG_LEVEL: info
    volumes:
      - rabbitmq_data:/var/lib/rabbitmq
      - ./rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
      - ./enabled_plugins:/etc/rabbitmq/enabled_plugins:ro
    ulimits:
      nofile:
        soft: 65536
        hard: 65536
    deploy:
      resources:
        limits:
          memory: 2G
          cpus: '2'
    healthcheck:
      test: ["CMD", "rabbitmq-diagnostics", "-q", "check_running"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    restart: unless-stopped

volumes:
  rabbitmq_data:
    driver: local

14.3 Docker Compose 集群

version: '3.8'

services:
  rabbitmq1:
    image: rabbitmq:4-management
    container_name: rabbitmq1
    hostname: rabbitmq1
    ports:
      - "5672:5672"
      - "15672:15672"
    environment:
      RABBITMQ_ERLANG_COOKIE: "SWQOKODSQALRPCLNMEQG"
      RABBITMQ_DEFAULT_USER: admin
      RABBITMQ_DEFAULT_PASS: admin123
    volumes:
      - rabbitmq1_data:/var/lib/rabbitmq
      - ./rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
      - ./enabled_plugins:/etc/rabbitmq/enabled_plugins:ro
    networks:
      - rabbit_net
    healthcheck:
      test: ["CMD", "rabbitmq-diagnostics", "-q", "check_running"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 60s

  rabbitmq2:
    image: rabbitmq:4-management
    container_name: rabbitmq2
    hostname: rabbitmq2
    ports:
      - "5673:5672"
      - "15673:15672"
    environment:
      RABBITMQ_ERLANG_COOKIE: "SWQOKODSQALRPCLNMEQG"
    volumes:
      - rabbitmq2_data:/var/lib/rabbitmq
      - ./rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
      - ./enabled_plugins:/etc/rabbitmq/enabled_plugins:ro
    networks:
      - rabbit_net
    depends_on:
      rabbitmq1:
        condition: service_healthy

  rabbitmq3:
    image: rabbitmq:4-management
    container_name: rabbitmq3
    hostname: rabbitmq3
    ports:
      - "5674:5672"
      - "15674:15672"
    environment:
      RABBITMQ_ERLANG_COOKIE: "SWQOKODSQALRPCLNMEQG"
    volumes:
      - rabbitmq3_data:/var/lib/rabbitmq
      - ./rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
      - ./enabled_plugins:/etc/rabbitmq/enabled_plugins:ro
    networks:
      - rabbit_net
    depends_on:
      rabbitmq1:
        condition: service_healthy

  # HAProxy 负载均衡
  haproxy:
    image: haproxy:2.8
    container_name: rabbitmq_haproxy
    ports:
      - "5670:5672"   # AMQP
      - "15670:15672" # Management UI
    volumes:
      - ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro
    networks:
      - rabbit_net
    depends_on:
      - rabbitmq1
      - rabbitmq2
      - rabbitmq3

volumes:
  rabbitmq1_data:
  rabbitmq2_data:
  rabbitmq3_data:

networks:
  rabbit_net:
    driver: bridge

集群初始化脚本

#!/bin/bash
# init_cluster.sh

echo "等待 RabbitMQ 节点启动..."
sleep 10

echo "组建集群..."
# Node 2 加入集群
docker exec rabbitmq2 rabbitmqctl stop_app
docker exec rabbitmq2 rabbitmqctl reset
docker exec rabbitmq2 rabbitmqctl join_cluster rabbit@rabbitmq1
docker exec rabbitmq2 rabbitmqctl start_app

# Node 3 加入集群
docker exec rabbitmq3 rabbitmqctl stop_app
docker exec rabbitmq3 rabbitmqctl reset
docker exec rabbitmq3 rabbitmqctl join_cluster rabbit@rabbitmq1
docker exec rabbitmq3 rabbitmqctl start_app

echo "设置仲裁队列策略..."
# 无需设置(RabbitMQ 4.x 默认使用仲裁队列)

echo "验证集群状态..."
docker exec rabbitmq1 rabbitmqctl cluster_status

echo "集群搭建完成!"

HAProxy 配置

# haproxy.cfg
global
    log stdout format raw local0

defaults
    log global
    mode tcp
    timeout connect 5s
    timeout client 120s
    timeout server 120s

# AMQP 负载均衡
listen rabbitmq_amqp
    bind *:5672
    balance roundrobin
    option tcpka
    server rabbitmq1 rabbitmq1:5672 check inter 5s rise 2 fall 3
    server rabbitmq2 rabbitmq2:5672 check inter 5s rise 2 fall 3
    server rabbitmq3 rabbitmq3:5672 check inter 5s rise 2 fall 3

# Management UI 负载均衡
listen rabbitmq_management
    bind *:15672
    mode http
    balance roundrobin
    server rabbitmq1 rabbitmq1:15672 check
    server rabbitmq2 rabbitmq2:15672 check
    server rabbitmq3 rabbitmq3:15672 check

14.4 持久化配置

数据卷策略

数据路径持久化方式
消息数据/var/lib/rabbitmqNamed Volume / PVC
日志/var/log/rabbitmq可选持久化
配置/etc/rabbitmq只读挂载

生产级挂载

volumes:
  - type: volume
    source: rabbitmq_data
    target: /var/lib/rabbitmq
    volume:
      nocopy: false
  - type: bind
    source: ./rabbitmq.conf
    target: /etc/rabbitmq/rabbitmq.conf
    read_only: true
  - type: bind
    source: ./advanced.config
    target: /etc/rabbitmq/advanced.config
    read_only: true
  - type: bind
    source: ./enabled_plugins
    target: /etc/rabbitmq/enabled_plugins
    read_only: true

14.5 健康检查

Docker 健康检查

healthcheck:
  test:
    - "CMD"
    - "rabbitmq-diagnostics"
    - "-q"
    - "check_running"
  interval: 30s
  timeout: 10s
  retries: 3
  start_period: 60s

常用健康检查命令

命令说明适用场景
check_running检查节点是否运行基础检查
check_local_alarms检查本地告警内存/磁盘检查
check_port_connectivity检查端口连通性网络检查
check_if_node_is_quorum_critical检查仲裁队列状态集群检查

组合健康检查

healthcheck:
  test: ["CMD-SHELL", "rabbitmq-diagnostics -q check_running && rabbitmq-diagnostics -q check_local_alarms"]
  interval: 30s
  timeout: 10s
  retries: 3
  start_period: 60s

14.6 Kubernetes 部署

使用 RabbitMQ Cluster Operator

# 安装 Operator
kubectl apply -f "https://github.com/rabbitmq/cluster-operator/releases/latest/download/cluster-operator.yml"

# 验证安装
kubectl get pods -n rabbitmq-system

RabbitmqCluster CRD

apiVersion: rabbitmq.com/v1beta1
kind: RabbitmqCluster
metadata:
  name: production-rabbitmq
  namespace: messaging
spec:
  replicas: 3
  
  image: rabbitmq:4-management
  
  persistence:
    storageClassName: fast-ssd
    storage: 50Gi
  
  resources:
    requests:
      cpu: "500m"
      memory: "1Gi"
    limits:
      cpu: "2"
      memory: "4Gi"
  
  rabbitmq:
    additionalConfig: |
      vm_memory_high_watermark.relative = 0.6
      disk_free_limit.absolute = 2GB
      heartbeat = 60
      queue_leader_locator = balanced
    additionalPlugins:
      - rabbitmq_prometheus
      - rabbitmq_delayed_message_exchange
  
  override:
    statefulSet:
      spec:
        template:
          spec:
            containers:
              - name: rabbitmq
                env:
                  - name: RABBITMQ_DEFAULT_USER
                    value: admin
                  - name: RABBITMQ_DEFAULT_PASS
                    valueFrom:
                      secretKeyRef:
                        name: rabbitmq-secret
                        key: password
  
  tls:
    secretName: rabbitmq-tls
    caSecretName: rabbitmq-ca
  
  affinity:
    podAntiAffinity:
      preferredDuringSchedulingIgnoredDuringExecution:
        - weight: 100
          podAffinityTerm:
            labelSelector:
              matchLabels:
                app.kubernetes.io/name: production-rabbitmq
            topologyKey: kubernetes.io/hostname

部署和管理

# 部署集群
kubectl apply -f rabbitmq-cluster.yaml

# 查看状态
kubectl get rabbitmqclusters -n messaging
kubectl get pods -l app.kubernetes.io/name=production-rabbitmq -n messaging

# 获取访问密码
kubectl get secret production-rabbitmq-default-user \
  -o jsonpath='{.data.password}' -n messaging | base64 --decode

# 端口转发(调试)
kubectl port-forward svc/production-rabbitmq 15672:15672 -n messaging

# 查看集群状态
kubectl exec -it production-rabbitmq-server-0 -n messaging -- \
  rabbitmqctl cluster_status

Service 配置

apiVersion: v1
kind: Service
metadata:
  name: rabbitmq-service
  namespace: messaging
spec:
  type: ClusterIP
  ports:
    - name: amqp
      port: 5672
      targetPort: 5672
    - name: management
      port: 15672
      targetPort: 15672
    - name: prometheus
      port: 15692
      targetPort: 15692
  selector:
    app.kubernetes.io/name: production-rabbitmq

14.7 K8s 资源配置

ConfigMap

apiVersion: v1
kind: ConfigMap
metadata:
  name: rabbitmq-config
  namespace: messaging
data:
  rabbitmq.conf: |
    listeners.tcp.default = 5672
    management.tcp.port = 15672
    vm_memory_high_watermark.relative = 0.6
    disk_free_limit.absolute = 2GB
    heartbeat = 60
  enabled_plugins: |
    [rabbitmq_management,rabbitmq_prometheus].

Secret

apiVersion: v1
kind: Secret
metadata:
  name: rabbitmq-secret
  namespace: messaging
type: Opaque
stringData:
  password: "secure_password_here"
  erlang_cookie: "SWQOKODSQALRPCLNMEQG"

14.8 注意事项

⚠️ 文件描述符限制

Docker 容器默认的 ulimit 可能不够。必须设置 --ulimit nofile=65536:65536

⚠️ Erlang Cookie 一致性

集群中所有节点的 Erlang Cookie 必须完全一致。

⚠️ 时钟同步

集群节点之间需要时钟同步(NTP),否则可能出现消息顺序问题。

⚠️ Pod 反亲和性

在 K8s 中使用反亲和性确保 RabbitMQ Pod 分布在不同节点上。

⚠️ 存储性能

RabbitMQ 对磁盘 I/O 较敏感,建议使用 SSD 存储。

🔥 最佳实践: 生产环境使用 RabbitMQ Cluster Operator + 3 副本 + SSD 持久化 + Pod 反亲和性 + Prometheus 监控。


14.9 扩展阅读


下一章: 第 15 章:故障排查 — 快速定位和解决常见问题。