强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

OCaml 教程 / 变体类型(Variant Types)

变体类型(Variant Types)

概述

变体类型(Variant Types)是 OCaml 类型系统的核心特性,也称为代数数据类型(Algebraic Data Types, ADT)。它们允许你定义一个类型,该类型的值可以是若干**构造器(Constructor)**之一,每个构造器可以携带不同类型和数量的数据。

变体类型是 OCaml 与其他语言中 enum 的强大泛化——不仅列举可能的值,每个变体还可以附带数据。

简单变体

最简单的变体类似枚举:

(* 定义变体类型 *)
type color = Red | Green | Blue

(* 使用构造器创建值 *)
let r = Red
let g = Green

(* 模式匹配 *)
let color_to_string c =
  match c with
  | Red -> "红色"
  | Green -> "绿色"
  | Blue -> "蓝色"

let _ = color_to_string Red  (* => "红色" *)

(* 比较和排序 *)
let _ = (Red = Red)           (* => true *)
let _ = (compare Red Blue)    (* 按定义顺序比较 *)

带数据的变体

(* 每个构造器可以携带不同类型的数据 *)
type shape =
  | Circle of float                   (* 半径 *)
  | Rectangle of float * float         (* 宽、高 *)
  | Triangle of float * float * float  (* 三条边 *)

(* 创建变体值 *)
let c = Circle 5.0
let r = Rectangle (3.0, 4.0)
let t = Triangle (3.0, 4.0, 5.0)

(* 模式匹配解构 *)
let area s =
  match s with
  | Circle r -> Float.pi *. r *. r
  | Rectangle (w, h) -> w *. h
  | Triangle (a, b, c) ->
    let s = (a +. b +. c) /. 2.0 in
    sqrt (s *. (s -. a) *. (s -. b) *. (s -. c))

let _ = area c  (* => 78.54... *)
let _ = area r  (* => 12.0 *)
let _ = area t  (* => 6.0 *)

(* 带守卫的匹配 *)
let describe s =
  match s with
  | Circle r when r > 0.0 -> Printf.sprintf "圆(半径=%.2f)" r
  | Circle _ -> "无效圆"
  | Rectangle (w, h) when w > 0.0 && h > 0.0 ->
    Printf.sprintf "矩形(%.2fx%.2f)" w h
  | Rectangle _ -> "无效矩形"
  | Triangle (a, b, c) when a > 0.0 && b > 0.0 && c > 0.0 ->
    Printf.sprintf "三角形(%.2f,%.2f,%.2f)" a b c
  | Triangle _ -> "无效三角形"

💡 提示:每个构造器就是一个智能构造函数(Smart Constructor),它自动将参数包装成变体值。

递归变体

变体类型可以递归引用自身:

(* 表达式树 *)
type expr =
  | Num of float
  | Add of expr * expr
  | Mul of expr * expr
  | Sub of expr * expr
  | Div of expr * expr
  | Neg of expr

(* 构建表达式:(1 + 2) * 3 *)
let expr1 = Mul (Add (Num 1.0, Num 2.0), Num 3.0)

(* 求值 *)
let rec eval e =
  match e with
  | Num n -> n
  | Add (a, b) -> eval a +. eval b
  | Mul (a, b) -> eval a *. eval b
  | Sub (a, b) -> eval a -. eval b
  | Div (a, b) -> eval a /. eval b
  | Neg e -> ~-.(eval e)

let _ = eval expr1  (* => 9.0 *)

**注意**递归变体的模式匹配必须覆盖所有构造器否则编译器会发出警告当修改变体定义添加新构造器编译器会自动标记所有需要更新的匹配表达式

(* 打印表达式 *)
let rec to_string e =
  match e with
  | Num n ->
    if Float.is_integer n then string_of_int (int_of_float n)
    else Printf.sprintf "%.2f" n
  | Add (a, b) -> Printf.sprintf "(%s + %s)" (to_string a) (to_string b)
  | Mul (a, b) -> Printf.sprintf "(%s * %s)" (to_string a) (to_string b)
  | Sub (a, b) -> Printf.sprintf "(%s - %s)" (to_string a) (to_string b)
  | Div (a, b) -> Printf.sprintf "(%s / %s)" (to_string a) (to_string b)
  | Neg e -> Printf.sprintf "(-%s)" (to_string e)

let _ = to_string expr1  (* => "((1 + 2) * 3)" *)

链表

(* OCaml 列表本身就是递归变体 *)
type 'a my_list =
  | Nil
  | Cons of 'a * 'a my_list

let rec length = function
  | Nil -> 0
  | Cons (_, rest) -> 1 + length rest

let rec map f = function
  | Nil -> Nil
  | Cons (x, rest) -> Cons (f x, map f rest)

let rec fold_left f acc = function
  | Nil -> acc
  | Cons (x, rest) -> fold_left f (f acc x) rest

(* 构建列表 *)
let lst = Cons (1, Cons (2, Cons (3, Nil)))
let _ = length lst    (* => 3 *)
let _ = map (fun x -> x * 2) lst
(* => Cons (2, Cons (4, Cons (6, Nil))) *)

二叉树

type 'a tree =
  | Leaf
  | Node of 'a tree * 'a * 'a tree

(* 创建示例树 *)
let sample_tree =
  Node (
    Node (Leaf, 1, Leaf),
    2,
    Node (Node (Leaf, 3, Leaf), 4, Leaf)
  )

(* 计算大小 *)
let rec size = function
  | Leaf -> 0
  | Node (l, _, r) -> 1 + size l + size r

(* 查找最大值 *)
let rec tree_max = function
  | Leaf -> min_int
  | Node (l, v, r) -> max v (max (tree_max l) (tree_max r))

(* 中序遍历 *)
let rec inorder = function
  | Leaf -> []
  | Node (l, v, r) -> inorder l @ [v] @ inorder r

let _ = size sample_tree        (* => 4 *)
let _ = tree_max sample_tree    (* => 4 *)
let _ = inorder sample_tree     (* => [1; 2; 3; 4] *)

变体与模式匹配

变体类型的真正威力在于与模式匹配的结合:

(* 带类型参数的变体 *)
type 'a option =
  | None
  | Some of 'a

(* 安全除法 *)
let safe_divide a b =
  if b = 0 then None
  else Some (a / b)

let _ = safe_divide 10 3  (* => Some 3 *)
let _ = safe_divide 10 0  (* => None *)

(* 处理 option 值 *)
let handle_result r =
  match r with
  | Some v -> Printf.sprintf "结果: %d" v
  | None -> "错误: 除以零"

(* option 的链式操作 *)
let (>>=) opt f =
  match opt with
  | None -> None
  | Some v -> f v

let safe_calc a b c =
  safe_divide a b >>= fun x ->
  safe_divide x c

let _ = safe_calc 100 2 5   (* => Some 10 *)
let _ = safe_calc 100 0 5   (* => None *)

Result 类型

type ('a, 'b) result =
  | Ok of 'a
  | Error of 'b

(* 解析整数 *)
let parse_int s =
  try Ok (int_of_string s)
  with Failure _ -> Error (Printf.sprintf "无法解析: %s" s)

(* 链式操作 *)
let (>>=) r f =
  match r with
  | Ok v -> f v
  | Error e -> Error e

let parse_and_double s =
  parse_int s >>= fun n ->
  if n >= 0 then Ok (n * 2)
  else Error "负数不支持"

let _ = parse_and_double "21"    (* => Ok 42 *)
let _ = parse_and_double "abc"   (* => Error "无法解析: abc" *)
let _ = parse_and_double "-5"    (* => Error "负数不支持" *)

变体的编译表示

了解变体的内存布局有助于理解性能:

简单变体(无数据)

type color = Red | Green | Blue
(* 编译为整数常量:Red=0, Green=1, Blue=2 *)
(* 占用一个机器字 *)

带数据的变体

type shape =
  | Circle of float
  | Rectangle of float * float

(* Circle r 编译为: [tag=0; r]
   Rectangle (w, h) 编译为: [tag=1; w; h] *)
(* 使用堆分配的块 *)

优化:内联记录

(* 使用内联记录优化内存布局 *)
type shape_optimized =
  | Circle of { radius : float }
  | Rectangle of { width : float; height : float }
(* 字段名在运行时被擦除,更紧凑 *)
变体类型内存布局大小
无数据(如 Red整数常量1 字
单数据(如 Circle of float[tag, data]2 字

⚠️ 注意:无数据的变体构造器使用整数表示,因此比较操作非常高效(等同于整数比较)。带数据的构造器需要堆分配,涉及指针解引用。 | 多数据(如 Rect of float * float) | [tag, d1, d2] | 3 字 | | 内联记录 | 与多数据类似 | 类似 |

JSON 抽象语法树实例

(* 完整的 JSON AST *)
type json =
  | JNull
  | JBool of bool
  | JNumber of float
  | JString of string
  | JArray of json list
  | JObject of (string * json) list

(* JSON 序列化 *)
let rec to_json_string = function
  | JNull -> "null"
  | JBool true -> "true"
  | JBool false -> "false"
  | JNumber n ->
    if Float.is_integer n then string_of_int (int_of_float n)
    else Printf.sprintf "%g" n
  | JString s -> Printf.sprintf "%S" s
  | JArray items ->
    "[" ^ String.concat ", " (List.map to_json_string items) ^ "]"
  | JObject pairs ->
    let kv = List.map (fun (k, v) ->
      Printf.sprintf "%S: %s" k (to_json_string v)
    ) pairs in
    "{" ^ String.concat ", " kv ^ "}"

(* JSON 访问器 *)
let rec get keys json =
  match keys, json with
  | [], _ -> Some json
  | key :: rest, JObject pairs ->
    (match List.assoc_opt key pairs with
     | Some v -> get rest v
     | None -> None)
  | _, _ -> None

let json_number = function
  | JNumber n -> Some n
  | _ -> None

let json_string = function
  | JString s -> Some s
  | _ -> None

(* 使用示例 *)
let data = JObject [
  ("name", JString "Alice");
  ("age", JNumber 30.0);
  ("active", JBool true);
  ("address", JObject [
    ("city", JString "北京");
    ("zip", JString "100000")
  ]);
  ("scores", JArray [JNumber 95.0; JNumber 87.0]);
  ("notes", JNull);
]

let () =
  print_endline (to_json_string data);
  (match get ["address"; "city"] data with
   | Some (JString city) -> Printf.printf "城市: %s\n" city
   | _ -> print_endline "未找到城市");
  (match get ["scores"] data with
   | Some (JArray _) -> print_endline "有成绩数据"
   | _ -> print_endline "无成绩数据")

类型别名与变体组合

(* 类型别名 *)
type error_code = int
type error_message = string
type 'a result = ('a, error_code * error_message) Stdlib.result

(* 组合多个变体 *)
type http_method = GET | POST | PUT | DELETE | PATCH

type status_code =
  | OK
  | BadRequest
  | Unauthorized
  | NotFound
  | InternalError

type 'a http_response = {
  status : status_code;
  headers : (string * string) list;
  body : 'a;
}

(* 将变体转换为值 *)
let status_to_int = function
  | OK -> 200
  | BadRequest -> 400
  | Unauthorized -> 401
  | NotFound -> 404
  | InternalError -> 500

let method_to_string = function
  | GET -> "GET"
  | POST -> "POST"
  | PUT -> "PUT"
  | DELETE -> "DELETE"
  | PATCH -> "PATCH"

Polymorphic Variants(多态变体)

OCaml 还支持多态变体,使用反引号标记:

(* 多态变体不需要预定义类型 *)
let red = `Red
let blue = `Blue

(* 函数接受多态变体 *)
let color_to_int = function
  | `Red -> 0xFF0000
  | `Green -> 0x00FF00
  | `Blue -> 0x0000FF
  | `RGB (r, g, b) -> (r lsl 16) lor (g lsl 8) lor b

let _ = color_to_int `Red          (* => 16711680 *)
let _ = color_to_int (`RGB (255, 128, 0))

(* 多态变体的类型是 "打开的" *)
let handle_value v =
  match v with
  | `Int n -> string_of_int n
  | `Float f -> string_of_float f
  | `String s -> s
  | `Bool b -> string_of_bool b
  | other -> "未知值"   (* 可以匹配任意其他变体 *)

💡 提示:多态变体不需要预定义类型,更灵活但类型检查较弱。在需要定义明确接口时使用常规变体,在需要灵活组合时使用多态变体。

⚠️ 注意:多态变体的类型推导可能导致意外的类型宽度。如果函数返回 `Red | `Blue,编译器会推导为包含这两种变体的开放类型,而不是精确的 color 类型。

变体的设计模式

状态机

type connection_state =
  | Disconnected
  | Connecting of { host : string; timeout : int }
  | Connected of { socket_fd : int; since : float }
  | Error of { code : int; message : string }

let state_to_string = function
  | Disconnected -> "已断开"
  | Connecting { host; timeout } ->
    Printf.sprintf "连接中 (%s, timeout=%d)" host timeout
  | Connected { socket_fd; since } ->
    Printf.sprintf "已连接 (fd=%d, since=%.0f)" socket_fd since
  | Error { code; message } ->
    Printf.sprintf "错误 (%d: %s)" code message

命令模式

type command =
  | Create of { name : string; data : string }
  | Read of { name : string }
  | Update of { name : string; data : string }
  | Delete of { name : string }
  | List

let execute_command store cmd =
  match cmd with
  | Create { name; data } ->
    Printf.printf "创建 %s = %s\n" name data;
    (name, data) :: store
  | Read { name } ->
    (match List.assoc_opt name store with
     | Some data -> Printf.printf "读取 %s = %s\n" name data
     | None -> Printf.printf "未找到 %s\n" name);
    store
  | Update { name; data } ->
    Printf.printf "更新 %s = %s\n" name data;
    (name, data) :: List.filter (fun (k, _) -> k <> name) store
  | Delete { name } ->
    Printf.printf "删除 %s\n" name;
    List.filter (fun (k, _) -> k <> name) store
  | List ->
    List.iter (fun (k, v) -> Printf.printf "  %s: %s\n" k v) store;
    store

业务场景

场景变体设计
AST递归变体 + 模式匹配
错误处理Result / 自定义错误变体
状态机状态变体 + 转换函数
配置变体表示不同配置类型
事件系统事件变体 + 处理函数
解析器Token 变体

扩展阅读