强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

OCaml 教程 / OCaml 格式化输出 Format 模块

OCaml 格式化输出 Format 模块

Format 模块是 OCaml 中用于美观打印(Pretty Printing)的强大工具。它提供了盒子模型(Box Model)来自动处理缩进和换行,广泛用于编译器输出、错误消息格式化和 AST 打印。

Format 模块概述

Printf 不同,Format 会根据输出宽度自动换行和缩进,产生结构化的可读输出。

(* Printf vs Format 对比 *)

(* Printf:不会自动换行 *)
let () =
  Printf.printf "List: [%d, %d, %d, %d, %d, %d, %d, %d, %d, %d]\n"
    1 2 3 4 5 6 7 8 9 10

(* Format:自动换行和缩进 *)
let () =
  Format.printf "@[<hov 2>list: [%d;@ %d;@ %d;@ %d;@ %d;@ %d;@ %d;@ %d;@ %d;@ %d]@]@."
    1 2 3 4 5 6 7 8 9 10

Format 核心概念

概念说明
Formatter格式化器,输出的目标
Box盒子,控制缩进和换行的逻辑单元
Break hint换行提示,告诉格式化器可以在此处换行
Indentation缩进量,换行后的对齐位置

格式化器 Formatter

formatterFormat 模块的核心类型,它封装了输出通道和格式化状态。

(* 标准格式化器 *)
(* Format.std_formatter  -- 对应 stdout *)
(* Format.err_formatter  -- 对应 stderr *)
(* Format.str_formatter  -- 输出到字符串 *)

(* 使用标准格式化器 *)
let () =
  Format.fprintf Format.std_formatter "Hello, %s!@." "world"

(* 输出到字符串 *)
let result =
  Format.asprintf "Hello, %s! You are %d years old.@." "Alice" 30

let () = print_endline result

(* 创建自定义格式化器 *)
let buf = Buffer.create 64
let fmt = Format.formatter_of_buffer buf

let () =
  Format.fprintf fmt "Score: %d@." 100;
  Format.fprintf fmt "Name: %s@." "Alice";
  Format.pp_print_flush fmt ();
  Printf.printf "Buffer content:\n%s" (Buffer.contents buf)

常用格式化器函数

函数类型说明
fprintfformatter -> ('a, formatter, unit) format -> 'a格式化输出
printf('a, formatter, unit) format -> 'a输出到 stdout
eprintf('a, formatter, unit) format -> 'a输出到 stderr
asprintf('a, formatter, unit) format -> 'a输出到字符串
sprintf不推荐使用 asprintf 替代
fprintfformatter -> ('a, formatter, unit) format -> 'a通用格式化
pp_print_flushformatter -> unit刷新格式化器
pp_print_newlineformatter -> unit打印换行并刷新
pp_print_stringformatter -> string -> unit打印字符串
pp_print_intformatter -> int -> unit打印整数
pp_print_floatformatter -> float -> unit打印浮点数
pp_print_charformatter -> char -> unit打印字符
pp_print_boolformatter -> bool -> unit打印布尔值
pp_print_list?pp_sep:(formatter -> unit -> unit) -> (formatter -> 'a -> unit) -> formatter -> 'a list -> unit打印列表
pp_print_textformatter -> string -> unit打印文本(自动换行)

@[ ] 盒子模型

盒子(Box)是 Format 的核心抽象,控制内容的换行和缩进行为。

(* @[<hov n> ... @]  水平-垂直盒子:尽量水平排列,空间不足时垂直换行 *)
let () =
  Format.printf "@[<hov 2>list: [%d;@ %d;@ %d;@ %d;@ %d;@ %d]@]@."
    1 2 3 4 5 6

(* @[<h n> ... @]  水平盒子:强制水平排列(忽略换行提示) *)
let () =
  Format.printf "@[<h>%d@ %d@ %d@ %d@]@." 1 2 3 4

(* @[<v n> ... @]  垂直盒子:强制每个 break hint 处换行 *)
let () =
  Format.printf "@[<v>Item 1:@ %d@ Item 2:@ %d@ Item 3:@ %d@]@." 10 20 30

(* @[<hv n> ... @]  水平-垂直盒子:每个 break hint 处尝试换行 *)
let () =
  Format.printf "@[<hv 2>{ %d;@ %d;@ %d;@ %d }@]@." 1 2 3 4

盒子类型总结

盒子类型语法行为
水平盒子@[<h n>...@]强制水平排列
垂直盒子@[<v n>...@]强制垂直排列,每个 @ 换行
水平-垂直@[<hov n>...@]尽量水平,空间不足时垂直
水平-垂直@[<hv n>...@]类似 hov,但换行策略稍有不同
立方体盒子@[<b n>...@]自适应盒子

💡 提示@[<hov n> 是最常用的盒子类型。n 表示换行后的缩进量。

@, 换行

@, 是一个强制换行标记,无论盒子类型如何都会换行。

let () =
  Format.printf "@[<v 2>@[<hov>List:@ %d;@ %d;@ %d@]@,@[<hov>Array:@ %d;@ %d;@ %d@]@]@."
    1 2 3 4 5 6

输出:

  List: 1; 2; 3
  Array: 4; 5; 6

@; 空格

@; 是一个 break hint,表示"可以在此处换行,如果换行则缩进到盒子起始位置"。

(* @; 的宽度参数 *)
let () =
  Format.printf "@[<hov 0>%s@;%s@;%s@;%s@;%s@;%s@;%s@;%s@;%s@;%s@]@."
    "one" "two" "three" "four" "five" "six" "seven" "eight" "nine" "ten"

break hint 对比

标记不换行行为换行行为
@打印空格换行 + 缩进
@,不打印强制换行
@;打印空格换行 + 当前缩进
@;<n m>打印 n 个空格换行 + m 的缩进

自定义打印器

type expr =
  | Num of int
  | Add of expr * expr
  | Mul of expr * expr
  | Var of string

let rec pp_expr fmt = function
  | Num n -> Format.fprintf fmt "%d" n
  | Var s -> Format.pp_print_string fmt s
  | Add (a, b) ->
    Format.fprintf fmt "@[<hov 2>(%a@ + %a)@]" pp_expr a pp_expr b
  | Mul (a, b) ->
    Format.fprintf fmt "@[<hov 2>(%a@ * %a)@]" pp_expr a pp_expr b

let () =
  let expr = Add (Mul (Var "x", Num 2), Add (Num 1, Mul (Var "y", Num 3))) in
  Format.printf "Expr: %a@." pp_expr expr

输出:

Expr: ((x * 2) + (1 + (y * 3)))

嵌套结构打印

type json =
  | JNull
  | JBool of bool
  | JNum of float
  | JStr of string
  | JArr of json list
  | JObj of (string * json) list

let rec pp_json fmt = function
  | JNull -> Format.pp_print_string fmt "null"
  | JBool b -> Format.pp_print_bool fmt b
  | JNum f -> Format.fprintf fmt "%g" f
  | JStr s -> Format.fprintf fmt "\"%s\"" s
  | JArr items ->
    Format.fprintf fmt "@[<v 2>[@,";
    List.iteri (fun i item ->
      if i > 0 then Format.fprintf fmt ",@,";
      pp_json fmt item
    ) items;
    Format.fprintf fmt "@,@]]"
  | JObj pairs ->
    Format.fprintf fmt "@[<v 2>{@,";
    List.iteri (fun i (key, value) ->
      if i > 0 then Format.fprintf fmt ",@,";
      Format.fprintf fmt "@[<hov 2>\"%s\":@ %a@]" key pp_json value
    ) pairs;
    Format.fprintf fmt "@,@,}@]"

let sample_json = JObj [
  ("name", JStr "Alice");
  ("age", JNum 30.0);
  ("active", JBool true);
  ("scores", JArr [JNum 95.0; JNum 87.0; JNum 92.0]);
  ("address", JObj [
    ("city", JStr "Beijing");
    ("zip", JStr "100000");
  ]);
]

let () =
  Format.printf "JSON:@.%a@." pp_json sample_json

pretty printing 原理

Format 模块的 pretty printing 算法基于 Derek Oppen 的论文 “Prettyprinting”(1980),核心思想是:

  1. 盒子嵌套:内容被组织在嵌套的盒子中
  2. Break hints:在可以换行的位置放置 @ 等标记
  3. 空间计算:格式化器根据剩余空间决定是否换行
  4. 缩进传播:换行后自动缩进到当前盒子的缩进层级
(* 演示盒子嵌套 *)
let pp_nested fmt () =
  Format.fprintf fmt "@[<v 2>Outer:@,";          (* 外层垂直盒子 *)
  Format.fprintf fmt "@[<hov 2>Inner:@ %d;@ %d;@ %d@]@,"  (* 内层 hov 盒子 *)
    100 200 300;
  Format.fprintf fmt "@[<hov 2>Another:@ %s;@ %s@]@,"  (* 另一个 hov 盒子 *)
    "hello" "world";
  Format.fprintf fmt "@]"                           (* 关闭外层盒子 *)

let () =
  Format.printf "Nested:@.%a@." (fun fmt () -> pp_nested fmt ()) ()

输出:

Nested:
  Outer:
    Inner: 100; 200; 300
    Another: hello; world

调试输出 pp_debug

(* 使用 pp_print_flush 进行调试 *)
let debug_print fmt =
  Format.fprintf fmt "Debug: value=%d@." 42;
  Format.pp_print_flush fmt ()

let () =
  debug_print Format.std_formatter

(* 使用 asprintf 构建调试字符串 *)
let debug_message component msg =
  Format.asprintf "[%s] %s@." component msg

let () =
  print_endline (debug_message "Auth" "User logged in");
  print_endline (debug_message "DB" "Query executed in 42ms")

(* 带格式的调试输出 *)
let pp_debug_pair pp_a pp_b fmt (a, b) =
  Format.fprintf fmt "@[<hov 2>(%a,@ %a)@]" pp_a a pp_b b

let pp_debug_list pp_elem fmt xs =
  Format.fprintf fmt "@[<hov 2>[%a]@]"
    (Format.pp_print_list
      ~pp_sep:(fun fmt () -> Format.fprintf fmt ";@ ")
      pp_elem)
    xs

let () =
  let pair = (42, "hello") in
  Format.printf "Pair: %a@." (pp_debug_pair Format.pp_print_int Format.pp_print_string) pair;

  let xs = [1; 2; 3; 4; 5] in
  Format.printf "List: %a@." (pp_debug_list Format.pp_print_int) xs

Format 与 printf 对比

特性PrintfFormat
自动换行
缩进管理✅ 盒子模型
类型安全
性能更快稍慢
输出目标只有 stdout/string任意 formatter
复杂布局难以实现原生支持
学习曲线
%a 格式有限支持完整支持

💡 提示:简单的一行输出用 Printf,需要格式化复杂结构(列表、嵌套对象、AST)时用 Format

实际应用:错误消息

type loc = {
  file : string;
  line : int;
  col : int;
}

type error =
  | Type_mismatch of { expected : string; got : string; loc : loc }
  | Unbound_var of { name : string; loc : loc }
  | Parse_error of { msg : string; loc : loc }

let pp_loc fmt loc =
  Format.fprintf fmt "%s:%d:%d" loc.file loc.line loc.col

let pp_error fmt = function
  | Type_mismatch { expected; got; loc } ->
    Format.fprintf fmt "@[<hov 2>Error at %a:@ Type mismatch:@ expected %s,@ got %s@]"
      pp_loc loc expected got
  | Unbound_var { name; loc } ->
    Format.fprintf fmt "@[<hov 2>Error at %a:@ Unbound variable '%s'@]"
      pp_loc loc name
  | Parse_error { msg; loc } ->
    Format.fprintf fmt "@[<hov 2>Error at %a:@ Parse error: %s@]"
      pp_loc loc msg

let pp_errors fmt errors =
  Format.fprintf fmt "@[<v 2>Found %d error(s):@," (List.length errors);
  List.iter (fun err ->
    Format.fprintf fmt "- %a@," pp_error err
  ) errors;
  Format.fprintf fmt "@]"

let () =
  let errors = [
    Type_mismatch {
      expected = "int";
      got = "string";
      loc = { file = "main.ml"; line = 10; col = 5 }
    };
    Unbound_var {
      name = "x";
      loc = { file = "main.ml"; line = 15; col = 12 }
    };
    Parse_error {
      msg = "unexpected token ')'";
      loc = { file = "main.ml"; line = 20; col = 1 }
    };
  ] in
  Format.printf "%a@." pp_errors errors

实际应用:AST 打印

type typ =
  | TInt
  | TBool
  | TString
  | TFun of typ * typ
  | TList of typ
  | TOption of typ

type pattern =
  | PVar of string
  | PWild
  | PNum of int
  | PBool of bool
  | PNil
  | PCons of pattern * pattern

type expr =
  | EInt of int
  | EBool of bool
  | EVar of string
  | EApp of expr * expr
  | ELam of pattern * expr
  | ELet of pattern * expr * expr
  | EMatch of expr * (pattern * expr) list
  | EIf of expr * expr * expr
  | ENil
  | ECons of expr * expr

let rec pp_typ fmt = function
  | TInt -> Format.pp_print_string fmt "int"
  | TBool -> Format.pp_print_string fmt "bool"
  | TString -> Format.pp_print_string fmt "string"
  | TFun (a, b) ->
    Format.fprintf fmt "@[<hov 2>%a -> %a@]" pp_typ_atomic a pp_typ b
  | TList t ->
    Format.fprintf fmt "%a list" pp_typ_atomic t
  | TOption t ->
    Format.fprintf fmt "%a option" pp_typ_atomic t

and pp_typ_atomic fmt = function
  | TInt | TBool | TString as t -> pp_typ fmt t
  | t -> Format.fprintf fmt "(%a)" pp_typ t

let rec pp_pattern fmt = function
  | PVar s -> Format.pp_print_string fmt s
  | PWild -> Format.pp_print_string fmt "_"
  | PNum n -> Format.pp_print_int fmt n
  | PBool b -> Format.pp_print_bool fmt b
  | PNil -> Format.pp_print_string fmt "[]"
  | PCons (hd, tl) ->
    Format.fprintf fmt "@[<hov 2>(%a :: %a)@]" pp_pattern hd pp_pattern tl

let rec pp_expr fmt = function
  | EInt n -> Format.pp_print_int fmt n
  | EBool b -> Format.pp_print_bool fmt b
  | EVar s -> Format.pp_print_string fmt s
  | ENil -> Format.pp_print_string fmt "[]"
  | ECons (hd, tl) ->
    Format.fprintf fmt "@[<hov 2>(%a :: %a)@]" pp_expr hd pp_expr tl
  | EApp (f, arg) ->
    Format.fprintf fmt "@[<hov 2>(%a@ %a)@]" pp_expr f pp_expr arg
  | ELam (param, body) ->
    Format.fprintf fmt "@[<hov 2>fun %a ->@ %a@]" pp_pattern param pp_expr body
  | ELet (pat, value, body) ->
    Format.fprintf fmt "@[<v 0>@[<hov 2>let %a =@ %a@ in@]@ %a@]"
      pp_pattern pat pp_expr value pp_expr body
  | EMatch (scrutinee, cases) ->
    Format.fprintf fmt "@[<v 0>@[<hov 2>match %a with@]@," pp_expr scrutinee;
    List.iter (fun (pat, body) ->
      Format.fprintf fmt "@[<hov 2>| %a ->@ %a@]@," pp_pattern pat pp_expr body
    ) cases;
    Format.fprintf fmt "@]"
  | EIf (cond, then_e, else_e) ->
    Format.fprintf fmt "@[<v 0>@[<hov 2>if %a then@ %a@]@ @[<hov 2>else@ %a@]@]"
      pp_expr cond pp_expr then_e pp_expr else_e

let sample_expr =
  ELet (PVar "map",
    ELam (PVar "f",
      ELam (PVar "xs",
        EMatch (EVar "xs", [
          (PNil, ENil);
          (PCons (PVar "h", PVar "t"),
            ECons (EApp (EVar "f", EVar "h"),
                   EApp (EApp (EVar "map", EVar "f"), EVar "t")));
        ]))),
    EApp (EApp (EVar "map", ELam (PVar "x", EApp (EApp (EVar "+", EVar "x"), EInt 1))),
          ECons (EInt 1, ECons (EInt 2, ECons (EInt 3, ENil)))))

let () =
  Format.printf "AST:@.%a@." pp_expr sample_expr

⚠️ 注意:在编写 pp_* 系列函数时,原子类型(如 TIntEVar)不需要括号,而复合类型(如 TFunEApp)需要根据上下文决定是否添加括号。使用 pp_typ_atomic 等辅助函数来处理。

Format 高级技巧

格式化带标签的参数

let pp_labeled ~pp_value fmt label value =
  Format.fprintf fmt "@[<hov 2>%s:@ %a@]" label pp_value value

let pp_kv_pair pp_v fmt (k, v) =
  Format.fprintf fmt "@[<hov 2>\"%s\":@ %a@]" k pp_v v

let pp_record pp_fields fmt fields =
  Format.fprintf fmt "@[<v 2>{@,";
  Format.pp_print_list
    ~pp_sep:(fun fmt () -> Format.fprintf fmt ",@,")
    pp_fields
    fmt fields;
  Format.fprintf fmt "@,}@]"

let () =
  let pairs = [("name", "Alice"); ("age", "30"); ("city", "Beijing")] in
  Format.printf "Record: %a@."
    (pp_record (pp_kv_pair Format.pp_print_string))
    pairs

条件缩进

let pp_block indent fmt content =
  if indent then
    Format.fprintf fmt "@[<v 2>{@,%a@,}@]" content ()
  else
    Format.fprintf fmt "@[<hov>{ %a }@]" content ()

let () =
  Format.printf "Indented:@.%a@."
    (pp_block true (fun fmt () ->
      Format.fprintf fmt "line 1@,line 2@,line 3"))
    ();
  Format.printf "Inline: %a@."
    (pp_block false (fun fmt () ->
      Format.fprintf fmt "item1; item2; item3"))
    ()

扩展阅读