强曰为道

与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

第 12 章:运维规范与最佳实践

第 12 章:运维规范与最佳实践

12.1 配置管理规范

配置文件组织

# 推荐的配置目录结构
/etc/NetworkManager/
├── NetworkManager.conf          # 主配置(保持默认或最小化)
├── conf.d/                      # 配置片段
│   ├── 00-dns.conf             # DNS 后端配置
│   ├── 10-mac.conf             # MAC 地址策略
│   ├── 20-unmanaged.conf       # 不管理的设备
│   ├── 30-connectivity.conf    # 连通性检查
│   └── 99-logging.conf         # 日志级别
├── dispatcher.d/                # 事件脚本
│   ├── 50-logger.sh            # 日志记录
│   ├── 60-firewall.sh          # 防火墙更新
│   └── 70-vpn-auto.sh          # VPN 自动连接
└── system-connections/          # 连接配置(keyfile 格式)
    ├── eth0-static.nmconnection
    ├── wifi-home.nmconnection
    └── vpn-office.nmconnection

命名规范

项目规范示例
连接名{位置}-{用途}-{接口}dc1-web-eth0, home-wifi
配置文件{优先级}-{功能}.conf00-dns.conf, 20-unmanaged.conf
Dispatcher 脚本{优先级}-{功能}.sh50-logger.sh, 60-firewall.sh

版本控制

# 建议将 NM 配置纳入 Git 管理
cd /etc
git init
git add NetworkManager/
git commit -m "feat: initial NM configuration"

# 排除敏感文件
echo "NetworkManager/system-connections/*.nmconnection" >> /etc/.gitignore
# 或只排除密钥部分
# 使用 git-crypt 或 sops 加密敏感配置

12.2 服务器网络规范

服务器配置模板

#!/bin/bash
# server-network-setup.sh - 服务器网络初始化脚本

set -euo pipefail

HOSTNAME="$1"
STATIC_IP="$2"
GATEWAY="$3"
DNS1="${4:-8.8.8.8}"
DNS2="${5:-8.8.4.4}"

# 1. 设置主机名
sudo nmcli general hostname "$HOSTNAME"

# 2. 禁用 MAC 随机化(服务器不需要)
sudo tee /etc/NetworkManager/conf.d/00-server.conf << 'EOF'
[device]
wifi.scan-rand-mac-address=no
ethernet.cloned-mac-address=preserve

[connection]
ethernet.cloned-mac-address=preserve

[connectivity]
uri=
interval=0
EOF

# 3. 创建静态 IP 连接
nmcli connection add \
    type ethernet \
    con-name "server-eth0" \
    ifname eth0 \
    ipv4.method manual \
    ipv4.addresses "${STATIC_IP}/24" \
    ipv4.gateway "$GATEWAY" \
    ipv4.dns "${DNS1},${DNS2}" \
    ipv4.dns-search "$(hostname -d)" \
    ipv6.method disabled \
    connection.autoconnect yes \
    connection.autoconnect-priority 100 \
    ethernet.mtu 1500

# 4. 配置 DNS
sudo tee /etc/NetworkManager/conf.d/00-dns.conf << 'EOF'
[main]
dns=systemd-resolved
EOF

# 5. 配置 Dispatcher 日志
sudo tee /etc/NetworkManager/dispatcher.d/50-logger.sh << 'SCRIPT'
#!/bin/bash
INTERFACE=$1
EVENT=$2
logger -t nm-dispatcher "$EVENT on $INTERFACE (${CONNECTION_ID:-N/A}) IP:${IP4_ADDRESS_0:-N/A}"
SCRIPT
sudo chmod 755 /etc/NetworkManager/dispatcher.d/50-logger.sh

# 6. 重载并激活
sudo nmcli general reload conf
sudo systemctl restart NetworkManager
nmcli connection up "server-eth0"

# 7. 验证
echo "=== 网络配置验证 ==="
nmcli general status
nmcli device status
nmcli connection show --active
ip addr show eth0
ip route show
resolvectl status

Bond 高可用服务器配置

#!/bin/bash
# server-bond-setup.sh - 服务器双网卡 Bond 配置

IFACE1="eth0"
IFACE2="eth1"
BOND_IP="192.168.1.100/24"
BOND_GW="192.168.1.1"
BOND_DNS="8.8.8.8,8.8.4.4"

# 创建 Bond(active-backup 模式)
nmcli connection add \
    type bond \
    con-name "server-bond0" \
    ifname bond0 \
    bond.options "mode=active-backup,miimon=100,primary=${IFACE1}" \
    ipv4.method manual \
    ipv4.addresses "$BOND_IP" \
    ipv4.gateway "$BOND_GW" \
    ipv4.dns "$BOND_DNS" \
    ipv6.method disabled \
    connection.autoconnect yes

# 添加从接口
nmcli connection add \
    type ethernet \
    con-name "bond0-${IFACE1}" \
    ifname "$IFACE1" \
    master bond0 \
    slave-type bond

nmcli connection add \
    type ethernet \
    con-name "bond0-${IFACE2}" \
    ifname "$IFACE2" \
    master bond0 \
    slave-type bond

# 激活
nmcli connection up "server-bond0"

# 验证 Bond 状态
cat /proc/net/bonding/bond0
nmcli connection show "server-bond0" | grep bond

VLAN 多网段服务器

#!/bin/bash
# server-vlan-setup.sh - 服务器多 VLAN 配置

PARENT_IF="bond0"  # 基于 Bond 的 VLAN

declare -A VLANS=(
    [100]="10.100.0.1/24"
    [200]="10.200.0.1/24"
    [300]="10.300.0.1/24"
)

for vid in "${!VLANS[@]}"; do
    ip="${VLANS[$vid]}"
    con_name="vlan${vid}"
    vlan_iface="${PARENT_IF}.${vid}"

    nmcli connection add \
        type vlan \
        con-name "$con_name" \
        ifname "$vlan_iface" \
        vlan.parent "$PARENT_IF" \
        vlan.id "$vid" \
        ipv4.method manual \
        ipv4.addresses "$ip" \
        ipv6.method disabled \
        connection.autoconnect yes

    echo "已创建 VLAN $vid: $vlan_iface -> $ip"
done

nmcli connection reload

12.3 笔记本配置规范

多网络环境管理

#!/bin/bash
# laptop-network-setup.sh - 笔记本多网络环境配置

# 1. 公司网络(静态 IP + VPN)
nmcli connection add \
    type ethernet \
    con-name "office-eth" \
    ifname eth0 \
    ipv4.method manual \
    ipv4.addresses "10.0.1.50/24" \
    ipv4.gateway "10.0.1.1" \
    ipv4.dns "10.0.1.1" \
    connection.autoconnect-priority 100 \
    connection.autoconnect yes

# 2. 家庭网络(DHCP)
nmcli connection add \
    type ethernet \
    con-name "home-eth" \
    ifname eth0 \
    ipv4.method auto \
    connection.autoconnect-priority 50

# 3. WiFi 配置
nmcli connection add \
    type wifi \
    con-name "office-wifi" \
    ifname wlan0 \
    ssid "CorpWiFi" \
    wifi-sec.key-mgmt wpa-eap \
    802-1x.eap peap \
    802-1x.phase2-auth mschapv2 \
    802-1x.identity "username" \
    802-1x.password "password" \
    connection.autoconnect-priority 90

nmcli connection add \
    type wifi \
    con-name "home-wifi" \
    ifname wlan0 \
    ssid "HomeNetwork" \
    wifi-sec.key-mgmt wpa-psk \
    wifi-sec.psk "password" \
    connection.autoconnect-priority 40

# 4. VPN 自动连接(通过 Dispatcher)
sudo tee /etc/NetworkManager/dispatcher.d/60-auto-vpn.sh << 'SCRIPT'
#!/bin/bash
INTERFACE=$1
EVENT=$2

if [ "$EVENT" = "up" ]; then
    case "$CONNECTION_ID" in
        "office-eth"|"office-wifi")
            sleep 3
            nmcli connection up "Corp-VPN" &
            ;;
    esac
fi
SCRIPT
sudo chmod 755 /etc/NetworkManager/dispatcher.d/60-auto-vpn.sh

电源管理优化

# WiFi 省电模式(笔记本延长续航)
nmcli connection modify "home-wifi" \
    wifi.powersave 3  # 3=启用省电

# 以太网节能(如果驱动支持)
sudo ethtool -s eth0 advertise 0x002  # 限制最高速率

# 禁用不使用的接口
sudo nmcli device set wlan0 managed no  # 使用有线时禁用 WiFi

# 网络唤醒设置
sudo ethtool -s eth0 wol g  # 启用 Wake-on-LAN

12.4 安全加固规范

MAC 地址安全

# 公共场所使用随机 MAC
sudo tee /etc/NetworkManager/conf.d/mac-security.conf << 'EOF'
[device]
wifi.scan-rand-mac-address=yes

[connection]
# 公共 WiFi 使用随机 MAC
wifi.cloned-mac-address=random
# 受信任网络使用稳定 MAC
# 可在每个连接中单独设置
EOF

# 特定连接使用固定 MAC(公司网络)
nmcli connection modify "office-wifi" \
    wifi.cloned-mac-address=preserve

DNS 安全

# 使用 DNS over TLS
sudo tee /etc/systemd/resolved.conf.d/security.conf << 'EOF'
[Resolve]
DNS=1.1.1.1#cloudflare-dns.com 8.8.8.8#dns.google
FallbackDNS=9.9.9.9#dns.quad9.net
DNSSEC=allow-downgrade
DNSOverTLS=opportunistic
EOF
sudo systemctl restart systemd-resolved

防火墙集成

#!/bin/bash
# firewall-nm-integration.sh - NM 与防火墙集成

# 使用 Dispatcher 在网络变化时更新防火墙
sudo tee /etc/NetworkManager/dispatcher.d/55-firewall.sh << 'SCRIPT'
#!/bin/bash
INTERFACE=$1
EVENT=$2

if [ "$EVENT" = "up" ]; then
    # 根据网络类型应用不同规则
    case "$CONNECTION_ID" in
        "home-*")
            # 家庭网络:宽松规则
            firewall-cmd --set-default-zone=home
            ;;
        "office-*")
            # 公司网络:严格规则
            firewall-cmd --set-default-zone=work
            ;;
        "public-*"|"cafe-*")
            # 公共网络:最严格规则
            firewall-cmd --set-default-zone=public
            ;;
    esac
    firewall-cmd --reload
fi
SCRIPT
sudo chmod 755 /etc/NetworkManager/dispatcher.d/55-firewall.sh

VPN 安全规范

实践说明
公共 WiFi 必须 VPN使用 Dispatcher 自动连接 VPN
证书验证始终指定 CA 证书,不要跳过验证
Kill Switch配置 VPN 断开时阻断流量
Split Tunnel敏感流量走 VPN,普通流量直连
# VPN Kill Switch(通过路由实现)
nmcli connection modify "Corp-VPN" \
    ipv4.never-default no \    # 全隧道模式
    ipv4.dns-priority -50      # DNS 也走 VPN

12.5 自动化部署

Ansible Playbook

# ansible/playbooks/network.yml
---
- name: 配置服务器网络
  hosts: servers
  become: true
  tasks:
    - name: 安装 NetworkManager
      apt:
        name: network-manager
        state: present
      when: ansible_os_family == "Debian"

    - name: 确保 NM 服务运行
      systemd:
        name: NetworkManager
        state: started
        enabled: true

    - name: 配置 DNS 后端
      copy:
        content: |
          [main]
          dns=systemd-resolved
        dest: /etc/NetworkManager/conf.d/00-dns.conf
      notify: reload nm

    - name: 禁用 MAC 随机化
      copy:
        content: |
          [device]
          wifi.scan-rand-mac-address=no
          ethernet.cloned-mac-address=preserve
        dest: /etc/NetworkManager/conf.d/10-mac.conf
      notify: reload nm

    - name: 配置静态 IP
      command: >
        nmcli connection add
        type ethernet
        con-name "server-{{ ansible_default_ipv4.interface }}"
        ifname {{ ansible_default_ipv4.interface }}
        ipv4.method manual
        ipv4.addresses {{ server_ip }}/{{ server_netmask }}
        ipv4.gateway {{ server_gateway }}
        ipv4.dns {{ server_dns }}
        connection.autoconnect yes
      args:
        creates: /etc/NetworkManager/system-connections/server-{{ ansible_default_ipv4.interface }}.nmconnection
      notify: activate connection

  handlers:
    - name: reload nm
      command: nmcli general reload conf

    - name: activate connection
      command: nmcli connection up "server-{{ ansible_default_ipv4.interface }}"

Shell 自动化脚本

#!/bin/bash
# deploy-network-config.sh - 批量部署网络配置

set -euo pipefail

# 服务器列表
SERVERS=(
    "192.168.1.10|10.0.0.10/24|10.0.0.1"
    "192.168.1.11|10.0.0.11/24|10.0.0.1"
    "192.168.1.12|10.0.0.12/24|10.0.0.1"
)

for server_config in "${SERVERS[@]}"; do
    IFS='|' read -r host ip gw <<< "$server_config"
    
    echo "配置服务器: $host"
    
    ssh "$host" bash << REMOTE
        # 应用 NM 配置
        nmcli connection modify "server-eth0" \
            ipv4.method manual \
            ipv4.addresses "$ip" \
            ipv4.gateway "$gw"
        
        nmcli connection up "server-eth0"
        
        # 验证
        echo "IP: \$(ip -4 addr show eth0 | grep inet)"
        echo "GW: \$(ip route | grep default)"
REMOTE
    
    echo "  完成: $host"
done

12.6 监控与告警

网络监控脚本

#!/bin/bash
# nm-health-check.sh - NM 健康检查

LOG_FILE="/var/log/nm-health.log"
ALERT_THRESHOLD=3  # 连续失败次数

check_connectivity() {
    local result
    result=$(nmcli networking connectivity check 2>/dev/null)
    echo "$result"
}

check_dns() {
    if nslookup example.com &>/dev/null; then
        return 0
    else
        return 1
    fi
}

check_gateway() {
    local gw
    gw=$(ip route | awk '/default/ {print $3}' | head -1)
    if [ -n "$gw" ] && ping -c 1 -W 2 "$gw" &>/dev/null; then
        return 0
    else
        return 1
    fi
}

# 主检查
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
connectivity=$(check_connectivity)

echo "$timestamp | connectivity=$connectivity" >> "$LOG_FILE"

if [ "$connectivity" != "full" ]; then
    # 记录告警
    echo "$timestamp | ALERT: Connectivity is $connectivity" >> "$LOG_FILE"
    
    # 可以发送告警(Slack/Email/etc.)
    # curl -s -X POST "webhook_url" -d "{\"text\": \"⚠️ 网络异常: $connectivity\"}"
fi

Prometheus 指标导出

#!/bin/bash
# nm-exporter.sh - 导出 NM 指标供 Prometheus 采集

cat << 'METRICS'
# HELP nm_connection_status NetworkManager connection status
# TYPE nm_connection_status gauge
METRICS

nmcli -t -f NAME,TYPE,DEVICE connection show --active | while IFS=: read -r name type device; do
    echo "nm_connection_status{name=\"$name\",type=\"$type\",device=\"$device\"} 1"
done

cat << 'METRICS'
# HELP nm_device_state NetworkManager device state
# TYPE nm_device_state gauge
METRICS

nmcli -t -f DEVICE,STATE device status | while IFS=: read -r device state; do
    case "$state" in
        connected) value=100 ;;
        disconnected) value=30 ;;
        unavailable) value=10 ;;
        unmanaged) value=0 ;;
        *) value=50 ;;
    esac
    echo "nm_device_state{device=\"$device\",state=\"$state\"} $value"
done

12.7 备份与灾难恢复

完整备份方案

#!/bin/bash
# nm-full-backup.sh - NM 完整备份

BACKUP_ROOT="/backup/networkmanager"
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="${BACKUP_ROOT}/${DATE}"

mkdir -p "$BACKUP_DIR"

# 1. 备份配置文件
cp -a /etc/NetworkManager/ "$BACKUP_DIR/etc/"

# 2. 导出所有连接
for conn in $(nmcli -t -f NAME connection show); do
    nmcli connection export "$conn" > "$BACKUP_DIR/connections/${conn}.nmconnection" 2>/dev/null || true
done

# 3. 备份 DNS 配置
cp /etc/resolv.conf "$BACKUP_DIR/" 2>/dev/null || true
cp -a /etc/systemd/resolved.conf.d/ "$BACKUP_DIR/resolved.d/" 2>/dev/null || true

# 4. 生成恢复脚本
cat > "$BACKUP_DIR/restore.sh" << 'RESTORE'
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"

echo "恢复 NM 配置..."
sudo cp -a "$SCRIPT_DIR/etc/system-connections/"* /etc/NetworkManager/system-connections/
sudo cp -a "$SCRIPT_DIR/etc/conf.d/"* /etc/NetworkManager/conf.d/
sudo cp -a "$SCRIPT_DIR/etc/dispatcher.d/"* /etc/NetworkManager/dispatcher.d/

sudo chown root:root /etc/NetworkManager/system-connections/*
sudo chmod 600 /etc/NetworkManager/system-connections/*

sudo nmcli connection reload
echo "恢复完成。请手动激活连接。"
RESTORE

chmod +x "$BACKUP_DIR/restore.sh"

# 5. 压缩备份
tar czf "${BACKUP_DIR}.tar.gz" -C "$BACKUP_ROOT" "$DATE"
rm -rf "$BACKUP_DIR"

echo "备份完成: ${BACKUP_DIR}.tar.gz"

# 6. 清理旧备份(保留最近 30 天)
find "$BACKUP_ROOT" -name "*.tar.gz" -mtime +30 -delete

12.8 运维检查清单

日常检查

检查项命令频率
NM 服务状态systemctl status NetworkManager每日
连接状态nmcli connection show --active每日
设备状态nmcli device status每日
日志异常journalctl -u NetworkManager -p err --since today每日
DNS 解析resolvectl statistics每周
Bond 状态cat /proc/net/bonding/bond0每周

变更检查

检查项命令时机
配置备份备份脚本执行变更前
连接验证nmcli connection up <name>配置修改后
DNS 验证nslookup 测试DNS 变更后
连通性ping 测试任何网络变更后
日志检查journalctl -u NetworkManager -f变更期间

12.9 本章小结

类别要点
配置规范使用 conf.d/ 片段,命名规范,版本控制
服务器静态 IP、Bond 高可用、VLAN 隔离、禁用随机化
笔记本多环境连接优先级、Dispatcher 自动 VPN、电源管理
安全MAC 随机化、DNS over TLS、防火墙集成、Kill Switch
自动化Ansible、Shell 脚本、Prometheus 监控
备份定期备份、恢复脚本、保留策略

扩展阅读