OCaml 教程 / 变体类型(Variant Types)
变体类型(Variant Types)
概述
变体类型(Variant Types)是 OCaml 类型系统的核心特性,也称为代数数据类型(Algebraic Data Types, ADT)。它们允许你定义一个类型,该类型的值可以是若干**构造器(Constructor)**之一,每个构造器可以携带不同类型和数量的数据。
变体类型是 OCaml 与其他语言中 enum 的强大泛化——不仅列举可能的值,每个变体还可以附带数据。
简单变体
最简单的变体类似枚举:
(* 定义变体类型 *)
type color = Red | Green | Blue
(* 使用构造器创建值 *)
let r = Red
let g = Green
(* 模式匹配 *)
let color_to_string c =
match c with
| Red -> "红色"
| Green -> "绿色"
| Blue -> "蓝色"
let _ = color_to_string Red (* => "红色" *)
(* 比较和排序 *)
let _ = (Red = Red) (* => true *)
let _ = (compare Red Blue) (* 按定义顺序比较 *)
带数据的变体
(* 每个构造器可以携带不同类型的数据 *)
type shape =
| Circle of float (* 半径 *)
| Rectangle of float * float (* 宽、高 *)
| Triangle of float * float * float (* 三条边 *)
(* 创建变体值 *)
let c = Circle 5.0
let r = Rectangle (3.0, 4.0)
let t = Triangle (3.0, 4.0, 5.0)
(* 模式匹配解构 *)
let area s =
match s with
| Circle r -> Float.pi *. r *. r
| Rectangle (w, h) -> w *. h
| Triangle (a, b, c) ->
let s = (a +. b +. c) /. 2.0 in
sqrt (s *. (s -. a) *. (s -. b) *. (s -. c))
let _ = area c (* => 78.54... *)
let _ = area r (* => 12.0 *)
let _ = area t (* => 6.0 *)
(* 带守卫的匹配 *)
let describe s =
match s with
| Circle r when r > 0.0 -> Printf.sprintf "圆(半径=%.2f)" r
| Circle _ -> "无效圆"
| Rectangle (w, h) when w > 0.0 && h > 0.0 ->
Printf.sprintf "矩形(%.2fx%.2f)" w h
| Rectangle _ -> "无效矩形"
| Triangle (a, b, c) when a > 0.0 && b > 0.0 && c > 0.0 ->
Printf.sprintf "三角形(%.2f,%.2f,%.2f)" a b c
| Triangle _ -> "无效三角形"
💡 提示:每个构造器就是一个智能构造函数(Smart Constructor),它自动将参数包装成变体值。
递归变体
变体类型可以递归引用自身:
(* 表达式树 *)
type expr =
| Num of float
| Add of expr * expr
| Mul of expr * expr
| Sub of expr * expr
| Div of expr * expr
| Neg of expr
(* 构建表达式:(1 + 2) * 3 *)
let expr1 = Mul (Add (Num 1.0, Num 2.0), Num 3.0)
(* 求值 *)
let rec eval e =
match e with
| Num n -> n
| Add (a, b) -> eval a +. eval b
| Mul (a, b) -> eval a *. eval b
| Sub (a, b) -> eval a -. eval b
| Div (a, b) -> eval a /. eval b
| Neg e -> ~-.(eval e)
let _ = eval expr1 (* => 9.0 *)
⚠️ **注意**:递归变体的模式匹配必须覆盖所有构造器,否则编译器会发出警告。当修改变体定义(添加新构造器)时,编译器会自动标记所有需要更新的匹配表达式。
(* 打印表达式 *)
let rec to_string e =
match e with
| Num n ->
if Float.is_integer n then string_of_int (int_of_float n)
else Printf.sprintf "%.2f" n
| Add (a, b) -> Printf.sprintf "(%s + %s)" (to_string a) (to_string b)
| Mul (a, b) -> Printf.sprintf "(%s * %s)" (to_string a) (to_string b)
| Sub (a, b) -> Printf.sprintf "(%s - %s)" (to_string a) (to_string b)
| Div (a, b) -> Printf.sprintf "(%s / %s)" (to_string a) (to_string b)
| Neg e -> Printf.sprintf "(-%s)" (to_string e)
let _ = to_string expr1 (* => "((1 + 2) * 3)" *)
链表
(* OCaml 列表本身就是递归变体 *)
type 'a my_list =
| Nil
| Cons of 'a * 'a my_list
let rec length = function
| Nil -> 0
| Cons (_, rest) -> 1 + length rest
let rec map f = function
| Nil -> Nil
| Cons (x, rest) -> Cons (f x, map f rest)
let rec fold_left f acc = function
| Nil -> acc
| Cons (x, rest) -> fold_left f (f acc x) rest
(* 构建列表 *)
let lst = Cons (1, Cons (2, Cons (3, Nil)))
let _ = length lst (* => 3 *)
let _ = map (fun x -> x * 2) lst
(* => Cons (2, Cons (4, Cons (6, Nil))) *)
二叉树
type 'a tree =
| Leaf
| Node of 'a tree * 'a * 'a tree
(* 创建示例树 *)
let sample_tree =
Node (
Node (Leaf, 1, Leaf),
2,
Node (Node (Leaf, 3, Leaf), 4, Leaf)
)
(* 计算大小 *)
let rec size = function
| Leaf -> 0
| Node (l, _, r) -> 1 + size l + size r
(* 查找最大值 *)
let rec tree_max = function
| Leaf -> min_int
| Node (l, v, r) -> max v (max (tree_max l) (tree_max r))
(* 中序遍历 *)
let rec inorder = function
| Leaf -> []
| Node (l, v, r) -> inorder l @ [v] @ inorder r
let _ = size sample_tree (* => 4 *)
let _ = tree_max sample_tree (* => 4 *)
let _ = inorder sample_tree (* => [1; 2; 3; 4] *)
变体与模式匹配
变体类型的真正威力在于与模式匹配的结合:
(* 带类型参数的变体 *)
type 'a option =
| None
| Some of 'a
(* 安全除法 *)
let safe_divide a b =
if b = 0 then None
else Some (a / b)
let _ = safe_divide 10 3 (* => Some 3 *)
let _ = safe_divide 10 0 (* => None *)
(* 处理 option 值 *)
let handle_result r =
match r with
| Some v -> Printf.sprintf "结果: %d" v
| None -> "错误: 除以零"
(* option 的链式操作 *)
let (>>=) opt f =
match opt with
| None -> None
| Some v -> f v
let safe_calc a b c =
safe_divide a b >>= fun x ->
safe_divide x c
let _ = safe_calc 100 2 5 (* => Some 10 *)
let _ = safe_calc 100 0 5 (* => None *)
Result 类型
type ('a, 'b) result =
| Ok of 'a
| Error of 'b
(* 解析整数 *)
let parse_int s =
try Ok (int_of_string s)
with Failure _ -> Error (Printf.sprintf "无法解析: %s" s)
(* 链式操作 *)
let (>>=) r f =
match r with
| Ok v -> f v
| Error e -> Error e
let parse_and_double s =
parse_int s >>= fun n ->
if n >= 0 then Ok (n * 2)
else Error "负数不支持"
let _ = parse_and_double "21" (* => Ok 42 *)
let _ = parse_and_double "abc" (* => Error "无法解析: abc" *)
let _ = parse_and_double "-5" (* => Error "负数不支持" *)
变体的编译表示
了解变体的内存布局有助于理解性能:
简单变体(无数据)
type color = Red | Green | Blue
(* 编译为整数常量:Red=0, Green=1, Blue=2 *)
(* 占用一个机器字 *)
带数据的变体
type shape =
| Circle of float
| Rectangle of float * float
(* Circle r 编译为: [tag=0; r]
Rectangle (w, h) 编译为: [tag=1; w; h] *)
(* 使用堆分配的块 *)
优化:内联记录
(* 使用内联记录优化内存布局 *)
type shape_optimized =
| Circle of { radius : float }
| Rectangle of { width : float; height : float }
(* 字段名在运行时被擦除,更紧凑 *)
| 变体类型 | 内存布局 | 大小 |
|---|---|---|
无数据(如 Red) | 整数常量 | 1 字 |
单数据(如 Circle of float) | [tag, data] | 2 字 |
⚠️ 注意:无数据的变体构造器使用整数表示,因此比较操作非常高效(等同于整数比较)。带数据的构造器需要堆分配,涉及指针解引用。
| 多数据(如 Rect of float * float) | [tag, d1, d2] | 3 字 |
| 内联记录 | 与多数据类似 | 类似 |
JSON 抽象语法树实例
(* 完整的 JSON AST *)
type json =
| JNull
| JBool of bool
| JNumber of float
| JString of string
| JArray of json list
| JObject of (string * json) list
(* JSON 序列化 *)
let rec to_json_string = function
| JNull -> "null"
| JBool true -> "true"
| JBool false -> "false"
| JNumber n ->
if Float.is_integer n then string_of_int (int_of_float n)
else Printf.sprintf "%g" n
| JString s -> Printf.sprintf "%S" s
| JArray items ->
"[" ^ String.concat ", " (List.map to_json_string items) ^ "]"
| JObject pairs ->
let kv = List.map (fun (k, v) ->
Printf.sprintf "%S: %s" k (to_json_string v)
) pairs in
"{" ^ String.concat ", " kv ^ "}"
(* JSON 访问器 *)
let rec get keys json =
match keys, json with
| [], _ -> Some json
| key :: rest, JObject pairs ->
(match List.assoc_opt key pairs with
| Some v -> get rest v
| None -> None)
| _, _ -> None
let json_number = function
| JNumber n -> Some n
| _ -> None
let json_string = function
| JString s -> Some s
| _ -> None
(* 使用示例 *)
let data = JObject [
("name", JString "Alice");
("age", JNumber 30.0);
("active", JBool true);
("address", JObject [
("city", JString "北京");
("zip", JString "100000")
]);
("scores", JArray [JNumber 95.0; JNumber 87.0]);
("notes", JNull);
]
let () =
print_endline (to_json_string data);
(match get ["address"; "city"] data with
| Some (JString city) -> Printf.printf "城市: %s\n" city
| _ -> print_endline "未找到城市");
(match get ["scores"] data with
| Some (JArray _) -> print_endline "有成绩数据"
| _ -> print_endline "无成绩数据")
类型别名与变体组合
(* 类型别名 *)
type error_code = int
type error_message = string
type 'a result = ('a, error_code * error_message) Stdlib.result
(* 组合多个变体 *)
type http_method = GET | POST | PUT | DELETE | PATCH
type status_code =
| OK
| BadRequest
| Unauthorized
| NotFound
| InternalError
type 'a http_response = {
status : status_code;
headers : (string * string) list;
body : 'a;
}
(* 将变体转换为值 *)
let status_to_int = function
| OK -> 200
| BadRequest -> 400
| Unauthorized -> 401
| NotFound -> 404
| InternalError -> 500
let method_to_string = function
| GET -> "GET"
| POST -> "POST"
| PUT -> "PUT"
| DELETE -> "DELETE"
| PATCH -> "PATCH"
Polymorphic Variants(多态变体)
OCaml 还支持多态变体,使用反引号标记:
(* 多态变体不需要预定义类型 *)
let red = `Red
let blue = `Blue
(* 函数接受多态变体 *)
let color_to_int = function
| `Red -> 0xFF0000
| `Green -> 0x00FF00
| `Blue -> 0x0000FF
| `RGB (r, g, b) -> (r lsl 16) lor (g lsl 8) lor b
let _ = color_to_int `Red (* => 16711680 *)
let _ = color_to_int (`RGB (255, 128, 0))
(* 多态变体的类型是 "打开的" *)
let handle_value v =
match v with
| `Int n -> string_of_int n
| `Float f -> string_of_float f
| `String s -> s
| `Bool b -> string_of_bool b
| other -> "未知值" (* 可以匹配任意其他变体 *)
💡 提示:多态变体不需要预定义类型,更灵活但类型检查较弱。在需要定义明确接口时使用常规变体,在需要灵活组合时使用多态变体。
⚠️ 注意:多态变体的类型推导可能导致意外的类型宽度。如果函数返回 `Red | `Blue,编译器会推导为包含这两种变体的开放类型,而不是精确的 color 类型。
变体的设计模式
状态机
type connection_state =
| Disconnected
| Connecting of { host : string; timeout : int }
| Connected of { socket_fd : int; since : float }
| Error of { code : int; message : string }
let state_to_string = function
| Disconnected -> "已断开"
| Connecting { host; timeout } ->
Printf.sprintf "连接中 (%s, timeout=%d)" host timeout
| Connected { socket_fd; since } ->
Printf.sprintf "已连接 (fd=%d, since=%.0f)" socket_fd since
| Error { code; message } ->
Printf.sprintf "错误 (%d: %s)" code message
命令模式
type command =
| Create of { name : string; data : string }
| Read of { name : string }
| Update of { name : string; data : string }
| Delete of { name : string }
| List
let execute_command store cmd =
match cmd with
| Create { name; data } ->
Printf.printf "创建 %s = %s\n" name data;
(name, data) :: store
| Read { name } ->
(match List.assoc_opt name store with
| Some data -> Printf.printf "读取 %s = %s\n" name data
| None -> Printf.printf "未找到 %s\n" name);
store
| Update { name; data } ->
Printf.printf "更新 %s = %s\n" name data;
(name, data) :: List.filter (fun (k, _) -> k <> name) store
| Delete { name } ->
Printf.printf "删除 %s\n" name;
List.filter (fun (k, _) -> k <> name) store
| List ->
List.iter (fun (k, v) -> Printf.printf " %s: %s\n" k v) store;
store
业务场景
| 场景 | 变体设计 |
|---|---|
| AST | 递归变体 + 模式匹配 |
| 错误处理 | Result / 自定义错误变体 |
| 状态机 | 状态变体 + 转换函数 |
| 配置 | 变体表示不同配置类型 |
| 事件系统 | 事件变体 + 处理函数 |
| 解析器 | Token 变体 |