强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

OCaml 教程 / GADTs(广义代数数据类型)

GADTs(广义代数数据类型)

GADTs(Generalized Algebraic Data Types)是 OCaml 类型系统中最强大的特性之一。它允许你在构造器的返回类型上添加约束,从而实现更精确的类型控制。


1. 基本语法

1.1 普通 ADT vs GADT

(* 普通 ADT *)
type expr =
  | Int of int
  | Add of expr * expr
  | Bool of bool

(* GADT 语法 *)
type 'a value =
  | Int : int -> int value       (* 返回类型是 int value *)
  | Bool : bool -> bool value   (* 返回类型是 bool value *)
特性普通 ADTGADT
构造器返回类型统一为 expr可以是 'a value 的不同实例
类型信息丢失在类型中保留
模式匹配后需要运行时检查编译器自动推导
语法Tag of arg_typeTag : arg_type -> result_type

1.2 GADT 声明

type _ term =
  | Int : int -> int term
  | Bool : bool -> bool term
  | Add : int term * int term -> int term
  | If : bool term * 'a term * 'a term -> 'a term

2. 类型安全的表达式求值器

2.1 不使用 GADT(有运行时错误风险)

type expr =
  | EInt of int
  | EBool of bool
  | EAdd of expr * expr

type value =
  | VInt of int
  | VBool of bool

let rec eval = function
  | EInt n -> VInt n
  | EBool b -> VBool b
  | EAdd (a, b) ->
    match eval a, eval b with
    | VInt x, VInt y -> VInt (x + y)
    | _ -> failwith "type error"  (* 运行时错误! *)

2.2 使用 GADT(编译期安全)

type _ value =
  | Int : int -> int value
  | Bool : bool -> bool value

type _ expr =
  | Val : 'a value -> 'a expr
  | Add : int expr * int expr -> int expr
  | Equal : int expr * int expr -> bool expr
  | If : bool expr * 'a expr * 'a expr -> 'a expr

let rec eval : type a. a expr -> a = function
  | Val (Int n) -> n
  | Val (Bool b) -> b
  | Add (a, b) -> eval a + eval b
  | Equal (a, b) -> eval a = eval b
  | If (cond, then_, else_) ->
    if eval cond then eval then_ else eval else_

💡 提示let rec eval : type a. a expr -> a 中的 type a 是局部抽象类型(locally abstract type),是 GADT 模式匹配所必需的。

2.3 使用示例

let two : int = eval (Add (Val (Int 1), Val (Int 1)))
let is_true : bool = eval (Equal (Val (Int 1), Val (Int 1)))
let result : int = eval (If (Val (Bool true), Val (Int 42), Val (Int 0)))

⚠️ 注意点eval (Add (Val (Bool true), Val (Int 1))) 会在编译期报错,因为 Add 的参数必须是 int expr,而 Bool truebool expr


3. 类型索引(Type Indexing)

GADTs 的核心概念是类型索引——用类型参数来"标记"不同的构造器:

type ('a, 'b) eq = Eq : ('a, 'a) eq  (* 只有当 'a = 'b 时才能构造 *)

let cast : type a b. (a, b) eq -> a -> b = fun Eq x -> x
(* 编译器知道:当 Eq 匹配时,a = b *)

let x : string = cast Eq 42   (* 编译错误!int ≠ string *)
let y : int = cast Eq 42      (* OK *)

4. Phantom Types 与 GADTs

Phantom types(幻影类型)是不出现在值构造中的类型参数:

type locked = Locked
type unlocked = Unlocked

type 'state door =
  | Closed : locked door
  | Open : unlocked door

let open_door : locked door -> unlocked door = function
  | Closed -> Open

let close_door : unlocked door -> locked door = function
  | Open -> Closed

(* 类型安全:不能打开已经打开的门 *)
(* let bad = open_door Open  (* 编译错误! *) *)
状态构造器类型
锁着的门Closedlocked door
开着的门Openunlocked door

5. 类型安全的状态机

5.1 线性状态机

type initial = Initial
type connected = Connected
type authenticated = Authenticated

type _ connection =
  | Connecting : initial connection
  | Connected : string -> connected connection
  | Authed : string * string -> authenticated connection

let connect : initial connection -> connected connection = function
  | Connecting -> Connected "127.0.0.1"

let authenticate : connected connection -> string -> authenticated connection =
  fun conn password ->
    match conn with
    | Connected host -> Authed (host, password)

let execute : authenticated connection -> string -> string =
  fun (Authed (host, user)) cmd ->
    host ^ ": executing " ^ cmd ^ " as " ^ user

(* ✅ 类型安全的调用链 *)
let session =
  let conn = connect Connecting in
  let authed = authenticate conn "secret" in
  execute authed "SELECT *"

(* ❌ 不能跳过步骤 *)
(* let bad = execute (connect Connecting) "SELECT *"  (* 编译错误! *) *)

6. Existential Types via GADTs

GADTs 可以编码存在类型(existential types):

type packed = Pack : 'a * ('a -> string) -> packed

let print_packed (Pack (x, show)) = show x

(* 异构集合 *)
let items : packed list = [
  Pack (42, string_of_int);
  Pack ("hello", fun s -> s);
  Pack (3.14, string_of_float);
]

let () = List.iter (fun p -> print_endline (print_packed p)) items

6.1 类型擦除与存在类型

type any = Any : 'a -> any

let forget x = Any x

type showable = Show : 'a * ('a -> string) -> showable

let show (Show (x, f)) = f x

let items = [Show (42, string_of_int); Show ("hello", Fun.id)]
let () = List.iter (fun x -> print_endline (show x)) items

7. GADTs 与模式匹配

7.1 精确的模式匹配

type 'a typ =
  | TInt : int typ
  | TBool : bool typ
  | TString : string typ

let default_value : type a. a typ -> a = function
  | TInt -> 0
  | TBool -> false
  | TString -> ""

(* 编译器知道匹配 TInt 时 a = int,所以返回 int 是安全的 *)

7.2 不可穷举的模式

type (_, _) eq = Refl : ('a, 'a) eq

type ('a, 'b) either =
  | Left : 'a -> ('a, 'b) either
  | Right : 'b -> ('a, 'b) either

(* GADT 使得某些函数类型更加精确 *)
let apply_eq : type a b. (a, b) eq -> a -> b = fun Refl x -> x

⚠️ 注意点:GADT 模式匹配必须使用 let f : type a. ... = function ... 语法,或者在 .mli 文件中声明类型。


8. GADTs 设计模式

8.1 类型安全的 DSL

type _ expr =
  | Lit : int -> int expr
  | Neg : int expr -> int expr
  | Add : int expr * int expr -> int expr
  | And : bool expr * bool expr -> bool expr
  | Not : bool expr -> bool expr
  | Eq : int expr * int expr -> bool expr

let rec eval : type a. a expr -> a = function
  | Lit n -> n
  | Neg e -> -(eval e)
  | Add (a, b) -> eval a + eval b
  | And (a, b) -> eval a && eval b
  | Not e -> not (eval e)
  | Eq (a, b) -> eval a = eval b

let to_string : type a. a expr -> string = function
  | Lit n -> string_of_int n
  | Neg e -> "-(" ^ to_string e ^ ")"
  | Add (a, b) -> "(" ^ to_string a ^ " + " ^ to_string b ^ ")"
  | And (a, b) -> "(" ^ to_string a ^ " && " ^ to_string b ^ ")"
  | Not e -> "!(" ^ to_string e ^ ")"
  | Eq (a, b) -> "(" ^ to_string a ^ " = " ^ to_string b ^ ")"

8.2 类型见证(Type Witness)

type 'a witness =
  | WInt : int witness
  | WString : string witness
  | WList : 'a witness -> 'a list witness
  | WOption : 'a witness -> 'a option witness

let rec default : type a. a witness -> a = function
  | WInt -> 0
  | WString -> ""
  | WList _ -> []
  | WOption _ -> None

9. 限制与注意事项

9.1 性能考虑

方面影响
编译时间GADT 推导较慢
运行时通常无额外开销(类型擦除)
代码大小可能增加(需要更多模式匹配)

9.2 常见陷阱

(* ❌ 陷阱1:忘记局部抽象类型 *)
(* let f (x : int expr) = match x with Lit n -> n | _ -> 0 *)
(* 错误:需要 type annotation *)

(* ✅ 正确写法 *)
let f : int expr -> int = function Lit n -> n | _ -> 0

(* ❌ 陷阱2:GADT 不支持 polymorphic recursion *)
(* 需要额外的类型注解 *)

9.3 何时使用 GADT

场景推荐使用
类型安全的 DSL✅ 强烈推荐
有限状态机✅ 推荐
类型安全的解释器✅ 推荐
简单的树结构❌ 普通 ADT 即可
数据库模型⚠️ 视复杂度而定

10. 业务场景

10.1 类型安全的 SQL 查询构造

type _ query =
  | Select : string list -> string list query
  | Where : bool condition * 'a query -> 'a query
  | Limit : int * 'a query -> 'a query

and _ condition =
  | Eq : string * string -> bool condition
  | Gt : string * int -> bool condition
  | And : bool condition * bool condition -> bool condition

let rec string_of_query : type a. a query -> string = function
  | Select cols -> "SELECT " ^ String.concat ", " cols
  | Where (cond, q) ->
    string_of_query q ^ " WHERE " ^ string_of_condition cond
  | Limit (n, q) ->
    string_of_query q ^ " LIMIT " ^ string_of_int n

and string_of_condition : type a. a condition -> string = function
  | Eq (col, val_) -> col ^ " = '" ^ val_ ^ "'"
  | Gt (col, n) -> col ^ " > " ^ string_of_int n
  | And (a, b) ->
    "(" ^ string_of_condition a ^ " AND " ^ string_of_condition b ^ ")"

10.2 类型安全的 API 端点定义

type _ endpoint =
  | GetUsers : user list endpoint
  | GetUser : int -> user option endpoint
  | CreateUser : user -> user endpoint

and user = { id : int; name : string }

let handle_request : type a. a endpoint -> a = function
  | GetUsers -> [{ id = 1; name = "Alice" }]
  | GetUser id -> Some { id; name = "User " ^ string_of_int id }
  | CreateUser u -> { u with id = 42 }

11. 扩展阅读

资源说明
OCaml Manual: GADTsv2.ocaml.org
“Fun with Type Functions”Simon Peyton Jones et al.
Real World OCaml: GADTsChapter 11
GADTs in HaskellGHC User’s Guide

💡 提示:GADTs 是高级类型系统特性,初学者建议先掌握普通 ADT 和模式匹配,再逐步学习 GADTs。从简单的 phantom type 开始练习,再尝试类型安全的 DSL。