OCaml 教程 / GADTs(广义代数数据类型)
GADTs(广义代数数据类型)
GADTs(Generalized Algebraic Data Types)是 OCaml 类型系统中最强大的特性之一。它允许你在构造器的返回类型上添加约束,从而实现更精确的类型控制。
1. 基本语法
1.1 普通 ADT vs GADT
(* 普通 ADT *)
type expr =
| Int of int
| Add of expr * expr
| Bool of bool
(* GADT 语法 *)
type 'a value =
| Int : int -> int value (* 返回类型是 int value *)
| Bool : bool -> bool value (* 返回类型是 bool value *)
| 特性 | 普通 ADT | GADT |
|---|---|---|
| 构造器返回类型 | 统一为 expr | 可以是 'a value 的不同实例 |
| 类型信息 | 丢失 | 在类型中保留 |
| 模式匹配后 | 需要运行时检查 | 编译器自动推导 |
| 语法 | Tag of arg_type | Tag : arg_type -> result_type |
1.2 GADT 声明
type _ term =
| Int : int -> int term
| Bool : bool -> bool term
| Add : int term * int term -> int term
| If : bool term * 'a term * 'a term -> 'a term
2. 类型安全的表达式求值器
2.1 不使用 GADT(有运行时错误风险)
type expr =
| EInt of int
| EBool of bool
| EAdd of expr * expr
type value =
| VInt of int
| VBool of bool
let rec eval = function
| EInt n -> VInt n
| EBool b -> VBool b
| EAdd (a, b) ->
match eval a, eval b with
| VInt x, VInt y -> VInt (x + y)
| _ -> failwith "type error" (* 运行时错误! *)
2.2 使用 GADT(编译期安全)
type _ value =
| Int : int -> int value
| Bool : bool -> bool value
type _ expr =
| Val : 'a value -> 'a expr
| Add : int expr * int expr -> int expr
| Equal : int expr * int expr -> bool expr
| If : bool expr * 'a expr * 'a expr -> 'a expr
let rec eval : type a. a expr -> a = function
| Val (Int n) -> n
| Val (Bool b) -> b
| Add (a, b) -> eval a + eval b
| Equal (a, b) -> eval a = eval b
| If (cond, then_, else_) ->
if eval cond then eval then_ else eval else_
💡 提示:let rec eval : type a. a expr -> a 中的 type a 是局部抽象类型(locally abstract type),是 GADT 模式匹配所必需的。
2.3 使用示例
let two : int = eval (Add (Val (Int 1), Val (Int 1)))
let is_true : bool = eval (Equal (Val (Int 1), Val (Int 1)))
let result : int = eval (If (Val (Bool true), Val (Int 42), Val (Int 0)))
⚠️ 注意点:eval (Add (Val (Bool true), Val (Int 1))) 会在编译期报错,因为 Add 的参数必须是 int expr,而 Bool true 是 bool expr。
3. 类型索引(Type Indexing)
GADTs 的核心概念是类型索引——用类型参数来"标记"不同的构造器:
type ('a, 'b) eq = Eq : ('a, 'a) eq (* 只有当 'a = 'b 时才能构造 *)
let cast : type a b. (a, b) eq -> a -> b = fun Eq x -> x
(* 编译器知道:当 Eq 匹配时,a = b *)
let x : string = cast Eq 42 (* 编译错误!int ≠ string *)
let y : int = cast Eq 42 (* OK *)
4. Phantom Types 与 GADTs
Phantom types(幻影类型)是不出现在值构造中的类型参数:
type locked = Locked
type unlocked = Unlocked
type 'state door =
| Closed : locked door
| Open : unlocked door
let open_door : locked door -> unlocked door = function
| Closed -> Open
let close_door : unlocked door -> locked door = function
| Open -> Closed
(* 类型安全:不能打开已经打开的门 *)
(* let bad = open_door Open (* 编译错误! *) *)
| 状态 | 构造器 | 类型 |
|---|---|---|
| 锁着的门 | Closed | locked door |
| 开着的门 | Open | unlocked door |
5. 类型安全的状态机
5.1 线性状态机
type initial = Initial
type connected = Connected
type authenticated = Authenticated
type _ connection =
| Connecting : initial connection
| Connected : string -> connected connection
| Authed : string * string -> authenticated connection
let connect : initial connection -> connected connection = function
| Connecting -> Connected "127.0.0.1"
let authenticate : connected connection -> string -> authenticated connection =
fun conn password ->
match conn with
| Connected host -> Authed (host, password)
let execute : authenticated connection -> string -> string =
fun (Authed (host, user)) cmd ->
host ^ ": executing " ^ cmd ^ " as " ^ user
(* ✅ 类型安全的调用链 *)
let session =
let conn = connect Connecting in
let authed = authenticate conn "secret" in
execute authed "SELECT *"
(* ❌ 不能跳过步骤 *)
(* let bad = execute (connect Connecting) "SELECT *" (* 编译错误! *) *)
6. Existential Types via GADTs
GADTs 可以编码存在类型(existential types):
type packed = Pack : 'a * ('a -> string) -> packed
let print_packed (Pack (x, show)) = show x
(* 异构集合 *)
let items : packed list = [
Pack (42, string_of_int);
Pack ("hello", fun s -> s);
Pack (3.14, string_of_float);
]
let () = List.iter (fun p -> print_endline (print_packed p)) items
6.1 类型擦除与存在类型
type any = Any : 'a -> any
let forget x = Any x
type showable = Show : 'a * ('a -> string) -> showable
let show (Show (x, f)) = f x
let items = [Show (42, string_of_int); Show ("hello", Fun.id)]
let () = List.iter (fun x -> print_endline (show x)) items
7. GADTs 与模式匹配
7.1 精确的模式匹配
type 'a typ =
| TInt : int typ
| TBool : bool typ
| TString : string typ
let default_value : type a. a typ -> a = function
| TInt -> 0
| TBool -> false
| TString -> ""
(* 编译器知道匹配 TInt 时 a = int,所以返回 int 是安全的 *)
7.2 不可穷举的模式
type (_, _) eq = Refl : ('a, 'a) eq
type ('a, 'b) either =
| Left : 'a -> ('a, 'b) either
| Right : 'b -> ('a, 'b) either
(* GADT 使得某些函数类型更加精确 *)
let apply_eq : type a b. (a, b) eq -> a -> b = fun Refl x -> x
⚠️ 注意点:GADT 模式匹配必须使用 let f : type a. ... = function ... 语法,或者在 .mli 文件中声明类型。
8. GADTs 设计模式
8.1 类型安全的 DSL
type _ expr =
| Lit : int -> int expr
| Neg : int expr -> int expr
| Add : int expr * int expr -> int expr
| And : bool expr * bool expr -> bool expr
| Not : bool expr -> bool expr
| Eq : int expr * int expr -> bool expr
let rec eval : type a. a expr -> a = function
| Lit n -> n
| Neg e -> -(eval e)
| Add (a, b) -> eval a + eval b
| And (a, b) -> eval a && eval b
| Not e -> not (eval e)
| Eq (a, b) -> eval a = eval b
let to_string : type a. a expr -> string = function
| Lit n -> string_of_int n
| Neg e -> "-(" ^ to_string e ^ ")"
| Add (a, b) -> "(" ^ to_string a ^ " + " ^ to_string b ^ ")"
| And (a, b) -> "(" ^ to_string a ^ " && " ^ to_string b ^ ")"
| Not e -> "!(" ^ to_string e ^ ")"
| Eq (a, b) -> "(" ^ to_string a ^ " = " ^ to_string b ^ ")"
8.2 类型见证(Type Witness)
type 'a witness =
| WInt : int witness
| WString : string witness
| WList : 'a witness -> 'a list witness
| WOption : 'a witness -> 'a option witness
let rec default : type a. a witness -> a = function
| WInt -> 0
| WString -> ""
| WList _ -> []
| WOption _ -> None
9. 限制与注意事项
9.1 性能考虑
| 方面 | 影响 |
|---|---|
| 编译时间 | GADT 推导较慢 |
| 运行时 | 通常无额外开销(类型擦除) |
| 代码大小 | 可能增加(需要更多模式匹配) |
9.2 常见陷阱
(* ❌ 陷阱1:忘记局部抽象类型 *)
(* let f (x : int expr) = match x with Lit n -> n | _ -> 0 *)
(* 错误:需要 type annotation *)
(* ✅ 正确写法 *)
let f : int expr -> int = function Lit n -> n | _ -> 0
(* ❌ 陷阱2:GADT 不支持 polymorphic recursion *)
(* 需要额外的类型注解 *)
9.3 何时使用 GADT
| 场景 | 推荐使用 |
|---|---|
| 类型安全的 DSL | ✅ 强烈推荐 |
| 有限状态机 | ✅ 推荐 |
| 类型安全的解释器 | ✅ 推荐 |
| 简单的树结构 | ❌ 普通 ADT 即可 |
| 数据库模型 | ⚠️ 视复杂度而定 |
10. 业务场景
10.1 类型安全的 SQL 查询构造
type _ query =
| Select : string list -> string list query
| Where : bool condition * 'a query -> 'a query
| Limit : int * 'a query -> 'a query
and _ condition =
| Eq : string * string -> bool condition
| Gt : string * int -> bool condition
| And : bool condition * bool condition -> bool condition
let rec string_of_query : type a. a query -> string = function
| Select cols -> "SELECT " ^ String.concat ", " cols
| Where (cond, q) ->
string_of_query q ^ " WHERE " ^ string_of_condition cond
| Limit (n, q) ->
string_of_query q ^ " LIMIT " ^ string_of_int n
and string_of_condition : type a. a condition -> string = function
| Eq (col, val_) -> col ^ " = '" ^ val_ ^ "'"
| Gt (col, n) -> col ^ " > " ^ string_of_int n
| And (a, b) ->
"(" ^ string_of_condition a ^ " AND " ^ string_of_condition b ^ ")"
10.2 类型安全的 API 端点定义
type _ endpoint =
| GetUsers : user list endpoint
| GetUser : int -> user option endpoint
| CreateUser : user -> user endpoint
and user = { id : int; name : string }
let handle_request : type a. a endpoint -> a = function
| GetUsers -> [{ id = 1; name = "Alice" }]
| GetUser id -> Some { id; name = "User " ^ string_of_int id }
| CreateUser u -> { u with id = 42 }
11. 扩展阅读
| 资源 | 说明 |
|---|---|
| OCaml Manual: GADTs | v2.ocaml.org |
| “Fun with Type Functions” | Simon Peyton Jones et al. |
| Real World OCaml: GADTs | Chapter 11 |
| GADTs in Haskell | GHC User’s Guide |
💡 提示:GADTs 是高级类型系统特性,初学者建议先掌握普通 ADT 和模式匹配,再逐步学习 GADTs。从简单的 phantom type 开始练习,再尝试类型安全的 DSL。