Erlang/OTP 完全指南 / 16 - 错误处理

第 16 章：错误处理

Erlang 的错误处理哲学是 “Let it crash”，配合 Supervisor 自动重启。本章学习 try/catch、exit 信号、link 和 monitor 机制。

16.1 Erlang 错误类型

16.1.1 三种异常

类型	触发方式	说明
`error`	`erlang:error/1,2`	运行时错误（除零、badarg 等）
`exit`	`exit/1,2`	进程退出信号
`throw`	`throw/1`	用户抛出的异常

%% error 类型
1/0.                          % arithmetic error
erlang:error(badarg).         % 自定义 error
element(5, {1, 2, 3}).       % badarg

%% exit 类型
exit(normal).                 % 正常退出
exit(shutdown).               % 关闭
exit({reason, "something"}).  % 自定义退出原因

%% throw 类型
throw(my_error).              % 用户抛出的异常

16.2 try…catch

16.2.1 基本语法

try Expression of
    Pattern1 -> Body1;
    Pattern2 -> Body2
catch
    Type1:Reason1:Stacktrace1 -> Handler1;
    Type2:Reason2:Stacktrace2 -> Handler2
after
    CleanupCode
end.

16.2.2 使用示例

%% 捕获所有错误
safe_divide(A, B) ->
    try A / B of
        Result -> {ok, Result}
    catch
        error:badarith -> {error, division_by_zero}
    end.

%% 捕获特定错误类型
process_file(Path) ->
    try file:read_file(Path) of
        {ok, Data} -> {ok, process(Data)};
        {error, Reason} -> {error, Reason}
    catch
        throw:bad_data -> {error, invalid_data};
        error:Reason -> {error, {runtime_error, Reason}}
    end.

%% after 子句（清理代码）
read_and_close(Path) ->
    {ok, Handle} = file:open(Path, [read]),
    try
        file:read(Handle, 1024)
    after
        file:close(Handle)  %% 无论成功或失败都会执行
    end.

16.2.3 获取堆栈跟踪

%% Erlang/OTP 21+ 使用 :Stacktrace 绑定
try dangerous_operation()
catch
    Type:Reason:Stacktrace ->
        io:format("Error: ~p:~p~nStack: ~p~n", [Type, Reason, Stacktrace])
end.

16.2.4 不带 of 子句

%% 不需要匹配返回值时
try
    do_something(),
    do_another_thing()
catch
    _:E -> io:format("Error: ~p~n", [E])
end.

16.3 Exit 信号

16.3.1 进程退出

%% 正常退出
exit(normal).
exit(self(), normal).

%% 异常退出
exit(kill).           % 不可捕获的退出
exit({shutdown, "server stop"}). % 带原因的退出

16.3.2 Link 与 Exit 传播

%% 链接的进程收到 EXIT 消息
process_flag(trap_exit, true),

Pid = spawn_link(fun() ->
    timer:sleep(1000),
    exit(crash_reason)
end),

receive
    {'EXIT', Pid, normal} ->
        io:format("Normal exit~n");
    {'EXIT', Pid, Reason} ->
        io:format("Crashed: ~p~n", [Reason])
end.

16.3.3 Exit 传播规则

发送者	接收者	接收者 trap_exit = false	接收者 trap_exit = true
`exit(normal)`	链接	不影响	收到 `{'EXIT', Pid, normal}`
`exit(Reason)`	链接	双方崩溃	收到 `{'EXIT', Pid, Reason}`
`exit(kill)`	链接	双方崩溃	双方崩溃（不可捕获）

16.4 Monitor

16.4.1 基本使用

%% 监控另一个进程
Ref = monitor(process, SpawnedPid),

%% 当被监控进程退出时收到消息
receive
    {'DOWN', Ref, process, Pid, normal} ->
        io:format("Process ~p exited normally~n", [Pid]);
    {'DOWN', Ref, process, Pid, Reason} ->
        io:format("Process ~p crashed: ~p~n", [Pid, Reason])
end.

%% 取消监控
demonitor(Ref).
demonitor(Ref, [flush]).  % 并清除已有的 DOWN 消息

16.4.2 Monitor vs Link

特性	Link	Monitor
方向	双向	单向
默认行为	双方崩溃	仅收到消息
需要 trap_exit	是	否
适用场景	紧密耦合	监控/观察

16.5 Let it crash 哲学

16.5.1 传统防御性编程 vs Let it crash

%% ❌ 防御性编程（繁琐、容易遗漏）
handle_request(Request) ->
    case validate_request(Request) of
        {ok, ValidReq} ->
            case process_request(ValidReq) of
                {ok, Result} -> {ok, Result};
                {error, Reason} -> {error, Reason};
                Other -> {error, {unexpected, Other}}
            end;
        {error, Reason} -> {error, Reason};
        Other -> {error, {unexpected, Other}}
    end.

%% ✅ Let it crash（简洁、由 Supervisor 处理）
handle_request(Request) ->
    Result = process_request(Request),
    {ok, Result}.
%% 如果 process_request 崩溃，Supervisor 会重启进程

16.5.2 何时使用 try/catch

场景	是否用 try/catch
输入验证	是（用户输入不可信）
外部 IO	是（文件、网络可能失败）
第三方库	是（可能抛异常）
内部逻辑	否（Let it crash）
OTP 回调	否（框架已处理）

16.6 实战：健壮的文件处理器

%% safe_file.erl
-module(safe_file).
-export([read_safe/1, write_safe/2, with_file/3]).

-spec read_safe(string()) -> {ok, binary()} | {error, term()}.
read_safe(Path) ->
    case file:read_file(Path) of
        {ok, Data} -> {ok, Data};
        {error, Reason} -> {error, {file_error, Path, Reason}}
    end.

-spec write_safe(string(), binary()) -> ok | {error, term()}.
write_safe(Path, Data) ->
    try
        file:write_file(Path, Data)
    catch
        error:Reason -> {error, {write_failed, Path, Reason}}
    end.

-spec with_file(string(), atom(), fun((file:io_device()) -> A)) -> {ok, A} | {error, term()}.
with_file(Path, Mode, Fun) ->
    case file:open(Path, Mode) of
        {ok, Handle} ->
            try
                Result = Fun(Handle),
                {ok, Result}
            catch
                Type:Reason:Stack ->
                    {error, {Type, Reason, Stack}}
            after
                file:close(Handle)
            end;
        {error, Reason} ->
            {error, {open_failed, Path, Reason}}
    end.

16.7 实战：带重试的操作

%% retry.erl
-module(retry).
-export([with_retry/2, with_retry/3]).

-spec with_retry(fun(() -> A), non_neg_integer()) -> A | {error, max_retries}.
with_retry(Fun, MaxRetries) ->
    with_retry(Fun, MaxRetries, 1000).

-spec with_retry(fun(() -> A), non_neg_integer(), non_neg_integer()) -> A | {error, max_retries}.
with_retry(_Fun, 0, _Delay) ->
    {error, max_retries};
with_retry(Fun, Retries, Delay) ->
    try Fun() of
        Result -> Result
    catch
        _:_ ->
            timer:sleep(Delay),
            with_retry(Fun, Retries - 1, Delay * 2)  %% 指数退避
    end.

retry:with_retry(fun() ->
    http_request("https://api.example.com/data")
end, 3, 500).

16.8 注意事项

⚠️ 常见陷阱

不要在 after 中抛出异常（会覆盖原始异常）
exit(kill) 不可被 trap_exit 捕获
try/catch 有性能开销，不要滥用
堆栈跟踪可能很大，注意内存
过度使用 try/catch 会隐藏 bug

💡 最佳实践

对外部输入使用 try/catch 验证
内部逻辑让进程崩溃，由 Supervisor 重启
使用 Monitor 而不是 Link 监控外部进程
永远在 after 中清理资源
使用 logger 模块记录错误和堆栈

16.9 扩展阅读

上一章：15 - IO 与网络 下一章：17 - 测试