强曰为道

与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

16 - 错误处理

第 16 章:错误处理

Erlang 的错误处理哲学是 “Let it crash”,配合 Supervisor 自动重启。本章学习 try/catch、exit 信号、link 和 monitor 机制。


16.1 Erlang 错误类型

16.1.1 三种异常

类型触发方式说明
errorerlang:error/1,2运行时错误(除零、badarg 等)
exitexit/1,2进程退出信号
throwthrow/1用户抛出的异常
%% error 类型
1/0.                          % arithmetic error
erlang:error(badarg).         % 自定义 error
element(5, {1, 2, 3}).       % badarg

%% exit 类型
exit(normal).                 % 正常退出
exit(shutdown).               % 关闭
exit({reason, "something"}).  % 自定义退出原因

%% throw 类型
throw(my_error).              % 用户抛出的异常

16.2 try…catch

16.2.1 基本语法

try Expression of
    Pattern1 -> Body1;
    Pattern2 -> Body2
catch
    Type1:Reason1:Stacktrace1 -> Handler1;
    Type2:Reason2:Stacktrace2 -> Handler2
after
    CleanupCode
end.

16.2.2 使用示例

%% 捕获所有错误
safe_divide(A, B) ->
    try A / B of
        Result -> {ok, Result}
    catch
        error:badarith -> {error, division_by_zero}
    end.

%% 捕获特定错误类型
process_file(Path) ->
    try file:read_file(Path) of
        {ok, Data} -> {ok, process(Data)};
        {error, Reason} -> {error, Reason}
    catch
        throw:bad_data -> {error, invalid_data};
        error:Reason -> {error, {runtime_error, Reason}}
    end.

%% after 子句(清理代码)
read_and_close(Path) ->
    {ok, Handle} = file:open(Path, [read]),
    try
        file:read(Handle, 1024)
    after
        file:close(Handle)  %% 无论成功或失败都会执行
    end.

16.2.3 获取堆栈跟踪

%% Erlang/OTP 21+ 使用 :Stacktrace 绑定
try dangerous_operation()
catch
    Type:Reason:Stacktrace ->
        io:format("Error: ~p:~p~nStack: ~p~n", [Type, Reason, Stacktrace])
end.

16.2.4 不带 of 子句

%% 不需要匹配返回值时
try
    do_something(),
    do_another_thing()
catch
    _:E -> io:format("Error: ~p~n", [E])
end.

16.3 Exit 信号

16.3.1 进程退出

%% 正常退出
exit(normal).
exit(self(), normal).

%% 异常退出
exit(kill).           % 不可捕获的退出
exit({shutdown, "server stop"}). % 带原因的退出
%% 链接的进程收到 EXIT 消息
process_flag(trap_exit, true),

Pid = spawn_link(fun() ->
    timer:sleep(1000),
    exit(crash_reason)
end),

receive
    {'EXIT', Pid, normal} ->
        io:format("Normal exit~n");
    {'EXIT', Pid, Reason} ->
        io:format("Crashed: ~p~n", [Reason])
end.

16.3.3 Exit 传播规则

发送者接收者接收者 trap_exit = false接收者 trap_exit = true
exit(normal)链接不影响收到 {'EXIT', Pid, normal}
exit(Reason)链接双方崩溃收到 {'EXIT', Pid, Reason}
exit(kill)链接双方崩溃双方崩溃(不可捕获)

16.4 Monitor

16.4.1 基本使用

%% 监控另一个进程
Ref = monitor(process, SpawnedPid),

%% 当被监控进程退出时收到消息
receive
    {'DOWN', Ref, process, Pid, normal} ->
        io:format("Process ~p exited normally~n", [Pid]);
    {'DOWN', Ref, process, Pid, Reason} ->
        io:format("Process ~p crashed: ~p~n", [Pid, Reason])
end.

%% 取消监控
demonitor(Ref).
demonitor(Ref, [flush]).  % 并清除已有的 DOWN 消息
特性LinkMonitor
方向双向单向
默认行为双方崩溃仅收到消息
需要 trap_exit
适用场景紧密耦合监控/观察

16.5 Let it crash 哲学

16.5.1 传统防御性编程 vs Let it crash

%% ❌ 防御性编程(繁琐、容易遗漏)
handle_request(Request) ->
    case validate_request(Request) of
        {ok, ValidReq} ->
            case process_request(ValidReq) of
                {ok, Result} -> {ok, Result};
                {error, Reason} -> {error, Reason};
                Other -> {error, {unexpected, Other}}
            end;
        {error, Reason} -> {error, Reason};
        Other -> {error, {unexpected, Other}}
    end.

%% ✅ Let it crash(简洁、由 Supervisor 处理)
handle_request(Request) ->
    Result = process_request(Request),
    {ok, Result}.
%% 如果 process_request 崩溃,Supervisor 会重启进程

16.5.2 何时使用 try/catch

场景是否用 try/catch
输入验证是(用户输入不可信)
外部 IO是(文件、网络可能失败)
第三方库是(可能抛异常)
内部逻辑否(Let it crash)
OTP 回调否(框架已处理)

16.6 实战:健壮的文件处理器

%% safe_file.erl
-module(safe_file).
-export([read_safe/1, write_safe/2, with_file/3]).

-spec read_safe(string()) -> {ok, binary()} | {error, term()}.
read_safe(Path) ->
    case file:read_file(Path) of
        {ok, Data} -> {ok, Data};
        {error, Reason} -> {error, {file_error, Path, Reason}}
    end.

-spec write_safe(string(), binary()) -> ok | {error, term()}.
write_safe(Path, Data) ->
    try
        file:write_file(Path, Data)
    catch
        error:Reason -> {error, {write_failed, Path, Reason}}
    end.

-spec with_file(string(), atom(), fun((file:io_device()) -> A)) -> {ok, A} | {error, term()}.
with_file(Path, Mode, Fun) ->
    case file:open(Path, Mode) of
        {ok, Handle} ->
            try
                Result = Fun(Handle),
                {ok, Result}
            catch
                Type:Reason:Stack ->
                    {error, {Type, Reason, Stack}}
            after
                file:close(Handle)
            end;
        {error, Reason} ->
            {error, {open_failed, Path, Reason}}
    end.

16.7 实战:带重试的操作

%% retry.erl
-module(retry).
-export([with_retry/2, with_retry/3]).

-spec with_retry(fun(() -> A), non_neg_integer()) -> A | {error, max_retries}.
with_retry(Fun, MaxRetries) ->
    with_retry(Fun, MaxRetries, 1000).

-spec with_retry(fun(() -> A), non_neg_integer(), non_neg_integer()) -> A | {error, max_retries}.
with_retry(_Fun, 0, _Delay) ->
    {error, max_retries};
with_retry(Fun, Retries, Delay) ->
    try Fun() of
        Result -> Result
    catch
        _:_ ->
            timer:sleep(Delay),
            with_retry(Fun, Retries - 1, Delay * 2)  %% 指数退避
    end.
retry:with_retry(fun() ->
    http_request("https://api.example.com/data")
end, 3, 500).

16.8 注意事项

⚠️ 常见陷阱

  1. 不要在 after 中抛出异常(会覆盖原始异常)
  2. exit(kill) 不可被 trap_exit 捕获
  3. try/catch 有性能开销,不要滥用
  4. 堆栈跟踪可能很大,注意内存
  5. 过度使用 try/catch 会隐藏 bug

💡 最佳实践

  1. 对外部输入使用 try/catch 验证
  2. 内部逻辑让进程崩溃,由 Supervisor 重启
  3. 使用 Monitor 而不是 Link 监控外部进程
  4. 永远在 after 中清理资源
  5. 使用 logger 模块记录错误和堆栈

16.9 扩展阅读


上一章:15 - IO 与网络 下一章:17 - 测试