强曰为道

与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

第7章:命令与插件开发

第7章:命令与插件开发

命令(Command)是连接 Nagios 核心与插件的桥梁。本章详细讲解命令定义、插件开发规范、返回值标准、性能数据格式以及远程监控协议 NRPE 和 NSCA。


一、命令定义

1.1 命令对象语法

define command {
    command_name    check_example       # 命令名称(唯一标识)
    command_line    $USER1$/check_example -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$
    #               实际执行的命令行(支持宏变量替换)
}

1.2 命令分类

类型命名约定用途示例
检查命令check_*主机/服务检查check_ping, check_http
通知命令notify_*发送通知notify-service-by-email
事件处理event_*事件处理脚本event-restart-service
性能数据process_*处理性能数据process-service-perfdata

1.3 常用检查命令定义

########################################
# 主机检查命令
########################################

# ICMP Ping 检查
define command {
    command_name    check_ping
    command_line    $USER1$/check_ping -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
}

# TCP 端口检查
define command {
    command_name    check_tcp
    command_line    $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ -t 10
}

# SSH 检查
define command {
    command_name    check_ssh
    command_line    $USER1$/check_ssh -H $HOSTADDRESS$ -p $ARG1$
}

# NRPE 远程检查
define command {
    command_name    check_nrpe
    command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -a $ARG2$
}

########################################
# Web 服务检查命令
########################################

# HTTP 检查
define command {
    command_name    check_http
    command_line    $USER1$/check_http -H $HOSTADDRESS$ -p $ARG1$ -u $ARG2$ -e 200
}

# HTTPS 检查
define command {
    command_name    check_https
    command_line    $USER1$/check_http -H $HOSTADDRESS$ -S -p $ARG1$ -u $ARG2$ --sni -e 200
}

# SSL 证书检查
define command {
    command_name    check_ssl_cert
    command_line    $USER1$/check_http -H $HOSTADDRESS$ -C $ARG1$,$ARG2$
}

# URL 内容检查
define command {
    command_name    check_url_content
    command_line    $USER1$/check_http -H $HOSTADDRESS$ -u $ARG1$ -s "$ARG2$"
}

########################################
# 系统资源检查命令
########################################

# 磁盘使用率
define command {
    command_name    check_disk
    command_line    $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
}

# CPU 负载
define command {
    command_name    check_load
    command_line    $USER1$/check_load -w $ARG1$ -c $ARG2$
}

# 内存使用率(需要 check_mem 插件)
define command {
    command_name    check_mem
    command_line    $USER1$/check_mem -w $ARG1$ -c $ARG2$ -f
}

# 进程数
define command {
    command_name    check_procs
    command_line    $USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
}

# 用户数
define command {
    command_name    check_users
    command_line    $USER1$/check_users -w $ARG1$ -c $ARG2$
}

# Swap 使用率
define command {
    command_name    check_swap
    command_line    $USER1$/check_swap -w $ARG1$ -c $ARG2$
}

# 文件描述符
define command {
    command_name    check_file_handles
    command_line    $USER1$/check_file_handles -w $ARG1$ -c $ARG2$
}

########################################
# 网络服务检查命令
########################################

# DNS 检查
define command {
    command_name    check_dns
    command_line    $USER1$/check_dns -H $ARG1$ -s $HOSTADDRESS$ -t 10
}

# SMTP 检查
define command {
    command_name    check_smtp
    command_line    $USER1$/check_smtp -H $HOSTADDRESS$ -p $ARG1$
}

# POP3 检查
define command {
    command_name    check_pop
    command_line    $USER1$/check_pop -H $HOSTADDRESS$ -p 110
}

# IMAP 检查
define command {
    command_name    check_imap
    command_line    $USER1$/check_imap -H $HOSTADDRESS$ -p 143
}

# NTP 检查
define command {
    command_name    check_ntp
    command_line    $USER1$/check_ntp -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$
}

########################################
# 数据库检查命令
########################################

# MySQL 检查
define command {
    command_name    check_mysql
    command_line    $USER1$/check_mysql -H $HOSTADDRESS$ -u $USER5$ -p $USER6$ -d $ARG1$
}

# PostgreSQL 检查
define command {
    command_name    check_pgsql
    command_line    $USER1$/check_pgsql -H $HOSTADDRESS$ -l $USER5$ -p $ARG1$
}

########################################
# 通知命令
########################################

# 邮件通知(主机)
define command {
    command_name    notify-host-by-email
    command_line    /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /usr/bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
}

# 邮件通知(服务)
define command {
    command_name    notify-service-by-email
    command_line    /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTNAME$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /usr/bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTNAME$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
}

########################################
# 事件处理命令
########################################

# 自动重启服务
define command {
    command_name    event-restart-service
    command_line    $USER2$/restart_service.sh $HOSTNAME$ $SERVICEDESC$
}

# 性能数据处理
define command {
    command_name    process-service-perfdata
    command_line    /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /var/log/nagios/service-perfdata.out
}

二、插件开发规范

2.1 插件输出规范

Nagios 插件必须遵循严格的输出规范:

# 标准输出格式(一行文本)
TEXT OUTPUT | OPTIONAL PERFDATA
LONG TEXT LINE 1
LONG TEXT LINE 2
...

# 示例
PING OK - Packet Loss = 0%, RTA = 10.50ms | rta=10.50ms;100.00;200.00;0; pl=0%;20;50;0;100

2.2 退出码规范

退出码状态含义
0OK正常
1WARNING警告
2CRITICAL严重
3UNKNOWN未知

2.3 性能数据格式

# 格式:
# label=value[UOM];[warn];[crit];[min];[max]

# 示例:
rta=10.50ms;100.00;200.00;0;          # RTA 延迟
pl=0%;20;50;0;100                      # 丢包率
time=0.15s;1;5;0;                      # 响应时间
users=5;10;20;0;                       # 用户数
size=1024B;2048;4096;0;                # 文件大小
temperature=45;60;80;0;100             # 温度

# 单位(UOM):
# (无) = 无单位(整数/浮点数)
# s = 秒
# % = 百分比
# B = 字节
# KB, MB, GB, TB = 存储单位
# c = 计数器

2.4 性能数据多行输出

# 第一行:状态输出和主要性能数据
HTTP OK - Response time = 0.15s | time=0.15s;1;5;0; size=15234B;;;0;

# 长输出(可选)
HTTP/1.1 200 OK
Content-Length: 15234
Server: Apache/2.4.41

# 多性能数据项
| time=0.15s;1;5;0; size=15234B;;;0; pages=25;;;0;

三、Shell 插件开发

3.1 基本框架

#!/bin/bash
# check_example.sh - 示例检查插件
# 用法: check_example.sh -H <host> -w <warning> -c <critical>

########################################
# 定义退出码
########################################
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3

########################################
# 默认值
########################################
HOST=""
WARNING=""
CRITICAL=""
VERBOSE=0

########################################
# 使用帮助
########################################
print_help() {
    echo "Usage: $0 -H <host> -w <warning> -c <critical>"
    echo ""
    echo "Options:"
    echo "  -H    Host address"
    echo "  -w    Warning threshold"
    echo "  -c    Critical threshold"
    echo "  -v    Verbose output"
    echo "  -h    Show this help"
    exit $STATE_UNKNOWN
}

########################################
# 参数解析
########################################
while getopts "H:w:c:vh" opt; do
    case $opt in
        H) HOST=$OPTARG ;;
        w) WARNING=$OPTARG ;;
        c) CRITICAL=$OPTARG ;;
        v) VERBOSE=1 ;;
        h) print_help ;;
        *) print_help ;;
    esac
done

# 参数验证
if [ -z "$HOST" ] || [ -z "$WARNING" ] || [ -z "$CRITICAL" ]; then
    echo "UNKNOWN: Missing required parameters"
    print_help
fi

########################################
# 检查逻辑
########################################
# 这里实现你的检查逻辑
# 例如:检查某个指标

METRIC=$(some_check_command $HOST)

if [ $VERBOSE -eq 1 ]; then
    echo "Metric: $METRIC, Warning: $WARNING, Critical: $CRITICAL"
fi

########################################
# 状态判断
########################################
if [ -z "$METRIC" ]; then
    echo "UNKNOWN: Unable to retrieve metric"
    exit $STATE_UNKNOWN
fi

# 使用 bc 进行浮点比较
if (( $(echo "$METRIC > $CRITICAL" | bc -l) )); then
    echo "CRITICAL: Metric=$METRIC (threshold=$CRITICAL) | metric=$METRIC;$WARNING;$CRITICAL"
    exit $STATE_CRITICAL
elif (( $(echo "$METRIC > $WARNING" | bc -l) )); then
    echo "WARNING: Metric=$METRIC (threshold=$WARNING) | metric=$METRIC;$WARNING;$CRITICAL"
    exit $STATE_WARNING
else
    echo "OK: Metric=$METRIC | metric=$METRIC;$WARNING;$CRITICAL"
    exit $STATE_OK
fi

3.2 实用示例:磁盘 I/O 检查

#!/bin/bash
# check_disk_io.sh - 检查磁盘 I/O 状态
# 用法: check_disk_io.sh -d <device> -w <read_warning>,<write_warning> -c <read_critical>,<write_critical>

STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3

DEVICE=""
READ_WARN=""
READ_CRIT=""
WRITE_WARN=""
WRITE_CRIT=""

print_help() {
    echo "Usage: $0 -d <device> -r <read_warn>,<read_crit> -w <write_warn>,<write_crit>"
    echo "Example: $0 -d sda -r 50,100 -w 50,100"
    exit $STATE_UNKNOWN
}

while getopts "d:r:w:vh" opt; do
    case $opt in
        d) DEVICE=$OPTARG ;;
        r) READ_WARN=$(echo $OPTARG | cut -d',' -f1)
           READ_CRIT=$(echo $OPTARG | cut -d',' -f2) ;;
        w) WRITE_WARN=$(echo $OPTARG | cut -d',' -f1)
           WRITE_CRIT=$(echo $OPTARG | cut -d',' -f2) ;;
        h) print_help ;;
        *) print_help ;;
    esac
done

if [ -z "$DEVICE" ]; then
    echo "UNKNOWN: Device not specified"
    print_help
fi

# 使用 iostat 获取磁盘 I/O 数据
IOSTAT_DATA=$(iostat -d $DEVICE 1 2 | tail -1)
READ_KB=$(echo $IOSTAT_DATA | awk '{print $3}')
WRITE_KB=$(echo $IOSTAT_DATA | awk '{print $4}')

if [ -z "$READ_KB" ] || [ -z "$WRITE_KB" ]; then
    echo "UNKNOWN: Unable to get I/O stats for $DEVICE"
    exit $STATE_UNKNOWN
fi

# 判断状态
STATE=$STATE_OK
OUTPUT="OK"

if [ -n "$READ_CRIT" ] && (( $(echo "$READ_KB > $READ_CRIT" | bc -l) )); then
    STATE=$STATE_CRITICAL
    OUTPUT="CRITICAL: Read=$READ_KB KB/s > $READ_CRIT"
elif [ -n "$WRITE_CRIT" ] && (( $(echo "$WRITE_KB > $WRITE_CRIT" | bc -l) )); then
    STATE=$STATE_CRITICAL
    OUTPUT="CRITICAL: Write=$WRITE_KB KB/s > $WRITE_CRIT"
elif [ -n "$READ_WARN" ] && (( $(echo "$READ_KB > $READ_WARN" | bc -l) )); then
    STATE=$STATE_WARNING
    OUTPUT="WARNING: Read=$READ_KB KB/s > $READ_WARN"
elif [ -n "$WRITE_WARN" ] && (( $(echo "$WRITE_KB > $WRITE_WARN" | bc -l) )); then
    STATE=$STATE_WARNING
    OUTPUT="WARNING: Write=$WRITE_KB KB/s > $WRITE_WARN"
else
    OUTPUT="OK: Read=$READ_KB KB/s, Write=$WRITE_KB KB/s"
fi

echo "$OUTPUT | read=${READ_KB}KB/s;${READ_WARN};${READ_CRIT};0; write=${WRITE_KB}KB/s;${WRITE_WARN};${WRITE_CRIT};0"
exit $STATE

四、Python 插件开发

4.1 基本框架

#!/usr/bin/env python3
# check_example.py - Python 插件示例

import sys
import argparse
import subprocess

# 退出码定义
STATE_OK = 0
STATE_WARNING = 1
STATE_CRITICAL = 2
STATE_UNKNOWN = 3

def parse_args():
    parser = argparse.ArgumentParser(description='Nagios check plugin example')
    parser.add_argument('-H', '--host', required=True, help='Host address')
    parser.add_argument('-w', '--warning', type=float, required=True, help='Warning threshold')
    parser.add_argument('-c', '--critical', type=float, required=True, help='Critical threshold')
    parser.add_argument('-p', '--port', type=int, default=80, help='Port number')
    parser.add_argument('-t', '--timeout', type=int, default=10, help='Timeout in seconds')
    parser.add_argument('-v', '--verbose', action='store_true', help='Verbose output')
    return parser.parse_args()

def check_metric(host, port, timeout):
    """实现检查逻辑,返回指标值"""
    import socket
    import time

    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(timeout)
        start = time.time()
        sock.connect((host, port))
        elapsed = time.time() - start
        sock.close()
        return elapsed * 1000  # 转换为毫秒
    except Exception as e:
        return None

def main():
    args = parse_args()

    metric = check_metric(args.host, args.port, args.timeout)

    if metric is None:
        print(f"UNKNOWN: Unable to connect to {args.host}:{args.port}")
        sys.exit(STATE_UNKNOWN)

    perfdata = f"response_time={metric:.2f}ms;{args.warning};{args.critical};0;"

    if metric >= args.critical:
        print(f"CRITICAL: Response time = {metric:.2f}ms (threshold = {args.critical}ms) | {perfdata}")
        sys.exit(STATE_CRITICAL)
    elif metric >= args.warning:
        print(f"WARNING: Response time = {metric:.2f}ms (threshold = {args.warning}ms) | {perfdata}")
        sys.exit(STATE_WARNING)
    else:
        print(f"OK: Response time = {metric:.2f}ms | {perfdata}")
        sys.exit(STATE_OK)

if __name__ == '__main__':
    main()

4.2 HTTP API 检查插件

#!/usr/bin/env python3
# check_rest_api.py - REST API 健康检查插件

import sys
import argparse
import requests
import time
import json

STATE_OK = 0
STATE_WARNING = 1
STATE_CRITICAL = 2
STATE_UNKNOWN = 3

def parse_args():
    parser = argparse.ArgumentParser(description='REST API health check')
    parser.add_argument('-u', '--url', required=True, help='API URL')
    parser.add_argument('-m', '--method', default='GET', help='HTTP method')
    parser.add_argument('-e', '--expected', default='200', help='Expected status code')
    parser.add_argument('-s', '--string', help='Expected string in response')
    parser.add_argument('-w', '--warning', type=float, default=5.0, help='Response time warning (seconds)')
    parser.add_argument('-c', '--critical', type=float, default=10.0, help='Response time critical (seconds)')
    parser.add_argument('--header', action='append', help='HTTP header (format: Key: Value)')
    parser.add_argument('--data', help='POST data')
    parser.add_argument('--timeout', type=int, default=30, help='Request timeout')
    parser.add_argument('--ssl-verify', action='store_true', default=True, help='Verify SSL')
    parser.add_argument('--no-ssl-verify', dest='ssl_verify', action='store_false', help='Disable SSL verify')
    return parser.parse_args()

def main():
    args = parse_args()

    headers = {}
    if args.header:
        for h in args.header:
            key, value = h.split(':', 1)
            headers[key.strip()] = value.strip()

    try:
        start = time.time()
        response = requests.request(
            method=args.method,
            url=args.url,
            headers=headers,
            data=args.data,
            timeout=args.timeout,
            verify=args.ssl_verify
        )
        elapsed = time.time() - start
    except requests.exceptions.Timeout:
        print(f"CRITICAL: Connection timeout after {args.timeout}s | time={args.timeout}s;{args.warning};{args.critical}")
        sys.exit(STATE_CRITICAL)
    except requests.exceptions.ConnectionError as e:
        print(f"CRITICAL: Connection failed - {e} | time=0;{args.warning};{args.critical}")
        sys.exit(STATE_CRITICAL)
    except Exception as e:
        print(f"UNKNOWN: {e}")
        sys.exit(STATE_UNKNOWN)

    # 检查状态码
    expected_codes = [int(c) for c in args.expected.split(',')]
    if response.status_code not in expected_codes:
        print(f"CRITICAL: HTTP {response.status_code} (expected {args.expected}) | time={elapsed:.3f}s;{args.warning};{args.critical}")
        sys.exit(STATE_CRITICAL)

    # 检查响应内容
    if args.string and args.string not in response.text:
        print(f"CRITICAL: Expected string '{args.string}' not found | time={elapsed:.3f}s;{args.warning};{args.critical}")
        sys.exit(STATE_CRITICAL)

    # 检查响应时间
    perfdata = f"time={elapsed:.3f}s;{args.warning};{args.critical};0;"

    if elapsed >= args.critical:
        print(f"CRITICAL: Response time = {elapsed:.3f}s | {perfdata}")
        sys.exit(STATE_CRITICAL)
    elif elapsed >= args.warning:
        print(f"WARNING: Response time = {elapsed:.3f}s | {perfdata}")
        sys.exit(STATE_WARNING)
    else:
        print(f"OK: HTTP {response.status_code}, Response time = {elapsed:.3f}s | {perfdata}")
        sys.exit(STATE_OK)

if __name__ == '__main__':
    main()

五、返回值与状态处理

5.1 返回码处理流程

插件执行
    │
    ▼
返回码判断
    │
    ├─ 0 (OK) → 状态设为 OK
    │
    ├─ 1 (WARNING) → 状态设为 WARNING
    │     │
    │     ├─ 软状态 → 重试检查
    │     └─ 硬状态 → 触发通知
    │
    ├─ 2 (CRITICAL) → 状态设为 CRITICAL
    │     │
    │     ├─ 软状态 → 重试检查
    │     └─ 硬状态 → 触发通知
    │
    └─ 3 (UNKNOWN) → 状态设为 UNKNOWN
          │
          ├─ 软状态 → 重试检查
          └─ 硬状态 → 触发通知

5.2 状态类型(Soft/Hard)

# 软状态(SOFT):未达到 max_check_attempts
# 不会触发通知,只记录日志
# 持续重试直到达到阈值或状态恢复

# 硬状态(HARD):达到 max_check_attempts
# 触发通知
# 后续检查如果仍为异常,继续通知(根据 notification_interval)

# 示例:max_check_attempts = 3
# 检查1: CRITICAL → SOFT 1
# 检查2: CRITICAL → SOFT 2
# 检查3: CRITICAL → HARD 1 → 发送通知
# 检查4: CRITICAL → HARD 2 → 按 notification_interval 发送
# 检查5: OK → RECOVERY → 发送恢复通知

六、NRPE 命令定义

6.1 NRPE 远程检查

# NRPE 命令定义(在监控服务器上)
define command {
    command_name    check_nrpe
    command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -a $ARG2$
}

# NRPE 命令(带超时和 SSL)
define command {
    command_name    check_nrpe_ssl
    command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -a $ARG2$ -t 30 --ssl
}

# 使用示例
define service {
    use                 generic-service
    host_name           remote-server-01
    service_description Disk Space
    check_command       check_nrpe!check_disk!-w 20% -c 10% -p /
}

6.2 被动检查命令(NSCA)

# NSCA 提交命令
define command {
    command_name    submit_service_check
    command_line    /usr/bin/printf "%s\t%s\t%s\t%s\n" "$HOSTNAME$" "$SERVICEDESC$" "$SERVICESTATEID$" "$SERVICEOUTPUT$" | /usr/local/nagios/bin/send_nsca -H nagios-server -c /etc/send_nsca.cfg
}

# 被动检查结果处理
define command {
    command_name    process_passive_check
    command_line    /bin/true  # 被动检查不需要实际执行命令
}

七、事件处理命令

7.1 事件处理器定义

# 自动重启失败的服务
define command {
    command_name    restart-service
    command_line    $USER2$/event_handlers/restart_service.sh $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEDESC$
}

# 自动重启主机
define command {
    command_name    restart-host
    command_line    $USER2$/event_handlers/restart_host.sh $HOSTSTATE$ $HOSTSTATETYPE$ $HOSTNAME$
}

7.2 事件处理脚本示例

#!/bin/bash
# restart_service.sh - 服务自动重启事件处理器

SERVICESTATE=$1
SERVICESTATETYPE=$2
SERVICEDESC=$3
LOGFILE="/var/log/nagios/event_handlers.log"

echo "$(date): $SERVICEDESC is $SERVICESTATE ($SERVICESTATETYPE)" >> $LOGFILE

# 只在首次硬状态故障时尝试重启
if [ "$SERVICESTATE" = "CRITICAL" ] && [ "$SERVICESTATETYPE" = "HARD" ]; then
    echo "$(date): Attempting to restart $SERVICEDESC" >> $LOGFILE

    case $SERVICEDESC in
        "Apache" | "HTTP")
            sudo systemctl restart httpd
            ;;
        "MySQL")
            sudo systemctl restart mysqld
            ;;
        "SSH")
            sudo systemctl restart sshd
            ;;
        *)
            echo "$(date): No restart handler for $SERVICEDESC" >> $LOGFILE
            exit 0
            ;;
    esac

    # 等待服务启动
    sleep 5

    # 检查服务状态
    if systemctl is-active --quiet ${SERVICEDESC,,}; then
        echo "$(date): $SERVICEDESC restarted successfully" >> $LOGFILE
    else
        echo "$(date): Failed to restart $SERVICEDESC" >> $LOGFILE
    fi
fi

八、注意事项

注意事项说明
插件权限插件文件需要执行权限 chmod +x
插件超时设置合理的插件超时避免卡死
返回码必须正确返回 0-3 的退出码
性能数据格式必须严格遵循规范
安全性插件中不要硬编码密码
日志记录插件应输出有意义的状态信息

九、本章小结

  1. 命令定义连接 Nagios 和插件,支持宏变量替换
  2. 插件输出必须遵循文本和性能数据规范
  3. 返回码决定服务/主机状态(0-3)
  4. Shell/Python 是常用的插件开发语言
  5. NRPE/NSCA 实现远程和被动检查

下一章第8章:插件体系详解 - 深入了解 Nagios 插件生态系统。