OCaml 教程 / 依赖记录与类型级编程
依赖记录与类型级编程
OCaml 并不是一门依赖类型语言,但通过 GADTs 和模块系统,我们可以在一定程度上模拟依赖类型、类型级自然数和长度索引向量等高级特性。
1. 依赖函数类型(Dependent Function Types)
1.1 什么是依赖类型
依赖类型(Dependent Types)是指类型的定义依赖于值:
-- 伪代码(Agda/Idris 风格)
replicate : (n : Nat) -> (a : A) -> Vec A n
-- 返回类型 Vec A n 依赖于值 n
| 概念 | 说明 | OCaml 支持程度 |
|---|---|---|
| 依赖函数类型 | 返回类型依赖于参数值 | ⚠️ 通过 GADTs 部分支持 |
| 依赖对 (Σ类型) | 第二个元素的类型依赖于第一个元素的值 | ✅ 通过 GADTs |
| 向量 (Vec A n) | 长度索引的列表 | ✅ 通过 GADTs |
| 类型级自然数 | Peano 数编码 | ✅ 通过类型 |
1.2 OCaml 中的近似
(* OCaml 不能直接写出 replicate : (n:int) -> 'a -> 'a list
其中返回类型依赖于 n 的值,但可以用 GADTs 模拟 *)
(* 先引入类型级自然数 *)
type zero = Zero
type 'n succ = Succ
type _ nat =
| Z : zero nat
| S : 'n nat -> 'n succ nat
(* 长度索引向量 *)
type (_, _) vec =
| Nil : (zero, 'a) vec
| Cons : 'a * ('n, 'a) vec -> ('n succ, 'a) vec
let x : (zero succ succ, int) vec = Cons (1, Cons (2, Nil))
(* x 是长度为 2 的 int 向量 *)
2. 用 GADTs 编码依赖类型
2.1 类型级自然数
type zero = Zero
type 'n succ = Succ
type _ nat =
| Z : zero nat
| S : 'n nat -> 'n succ nat
(* 类型级自然数:0, 1, 2, 3, ... *)
let n0 : zero nat = Z
let n1 : zero succ nat = S Z
let n2 : zero succ succ nat = S (S Z)
let n3 : zero succ succ succ nat = S (S (S Z))
2.2 类型级加法
(* 类型级加法证明 *)
type (_, _, _) plus =
| PlusZ : (zero, 'n, 'n) plus
| PlusS : ('m, 'n, 'p) plus
-> ('m succ, 'n, 'p succ) plus
(* 证明 2 + 1 = 3 *)
let proof_2_plus_1 : (zero succ succ, zero succ, zero succ succ succ) plus =
PlusS (PlusS PlusZ)
(* 运行时加法 *)
let rec nat_to_int : type n. n nat -> int = function
| Z -> 0
| S n -> 1 + nat_to_int n
let rec add_nat : type m n. m nat -> n nat -> (m, n, _) nat =
fun m n -> match m with
| Z -> n
| S m' -> S (add_nat m' n)
let result = add_nat n2 n1 (* 类型: zero succ succ succ nat *)
let () = Printf.printf "2 + 1 = %d\n" (nat_to_int result)
2.3 依赖对(Σ 类型)
(* ∃(n:nat). vec(n, int) *)
type packed_vec = PV : 'n nat * ('n, int) vec -> packed_vec
let pv1 = PV (S (S Z), Cons (1, Cons (2, Nil)))
let pv2 = PV (S (S (S Z)), Cons (3, Cons (4, Cons (5, Nil))))
(* 获取长度 *)
let pv_length (PV (n, _)) = nat_to_int n
let () = Printf.printf "pv1 length: %d\n" (pv_length pv1) (* 2 *)
let () = Printf.printf "pv2 length: %d\n" (pv_length pv2) (* 3 *)
3. 类型级编程
3.1 类型级布尔
type true_ = True
type false_ = False
type (_, _, _) and_ =
| AndTT : (true_, true_, true_) and_
| AndTF : (true_, false_, false_) and_
| AndFT : (false_, true_, false_) and_
| AndFF : (false_, false_, false_) and_
type (_, _, _) or_ =
| OrTT : (true_, true_, true_) or_
| OrTF : (true_, false_, true_) or_
| OrFT : (false_, true_, true_) or_
| OrFF : (false_, false_, false_) or_
type _ not =
| NotT : true_ not -> false_
| NotF : false_ not -> true_
3.2 类型级列表
type nil = Nil
type ('h, 't) cons = Cons
type _ tlist =
| TNil : nil tlist
| TCons : 'h * 't tlist -> ('h, 't) cons tlist
(* 类型级列表操作 *)
type (_, _, _) tappend =
| TAppendNil : (nil, 'b, 'b) tappend
| TAppendCons : ('t, 'b, 'c) tappend
-> (('h, 't) cons, 'b, ('h, 'c) cons) tappend
3.3 类型级相等
type (_, _) eq = Refl : ('a, 'a) eq
let cast : type a b. (a, b) eq -> a -> b = fun Refl x -> x
(* 证明 int = int *)
let int_eq_int : (int, int) eq = Refl
(* 对称性 *)
let sym : type a b. (a, b) eq -> (b, a) eq = fun Refl -> Refl
(* 传递性 *)
let trans : type a b c. (a, b) eq -> (b, c) eq -> (a, c) eq =
fun Refl Refl -> Refl
4. 向量类型(长度索引)
4.1 基本向量操作
type zero = Zero
type 'n succ = Succ
type (_, _) vec =
| Nil : (zero, 'a) vec
| Cons : 'a * ('n, 'a) vec -> ('n succ, 'a) vec
(* 安全的 head —— 非空向量一定有 head *)
let head : type n. (n succ, 'a) vec -> 'a = fun (Cons (x, _)) -> x
(* 安全的 tail *)
let tail : type n. (n succ, 'a) vec -> (n, 'a) vec = fun (Cons (_, xs)) -> xs
(* 长度 *)
let rec length : type n a. (n, a) vec -> int = function
| Nil -> 0
| Cons (_, xs) -> 1 + length xs
(* 映射 *)
let rec vmap : type n a b. (a -> b) -> (n, a) vec -> (n, b) vec =
fun f -> function
| Nil -> Nil
| Cons (x, xs) -> Cons (f x, vmap f xs)
(* zip —— 两个相同长度的向量 *)
let rec vzip : type n a b. (n, a) vec -> (n, b) vec -> (n, a * b) vec =
fun xs ys -> match xs, ys with
| Nil, Nil -> Nil
| Cons (x, xs'), Cons (y, ys') -> Cons ((x, y), vzip xs' ys')
4.2 使用示例
let v1 : (zero succ succ succ, int) vec =
Cons (1, Cons (2, Cons (3, Nil)))
let v2 : (zero succ succ succ, string) vec =
Cons ("a", Cons ("b", Cons ("c", Nil)))
let zipped = vzip v1 v2
(* 类型保证:只能 zip 相同长度的向量 *)
let () =
let (Cons ((1, "a"), Cons ((2, "b"), Cons ((3, "c"), Nil)))) = zipped in
print_endline "zipped OK"
(* 编译时安全检查 *)
(* let bad = vzip v1 (Cons (1, Nil)) (* 编译错误!长度不匹配 *) *)
⚠️ 注意点:类型级向量的模式匹配需要精确的类型注解。如果类型不匹配,错误消息可能很复杂。
4.3 安全的索引
(* 使用类型级自然数作为索引 *)
type (_, _) fin =
| FZ : ('n succ, zero) fin
| FS : ('n, 'i) fin -> ('n succ, 'i succ) fin
(* fin 类型:fin(n, i) 表示 i 是 0..n-1 范围内的索引 *)
(* 安全的向量索引访问 *)
let rec vget : type n a i. (n, a) vec -> (n, i) fin -> a =
fun vec idx -> match vec, idx with
| Cons (x, _), FZ -> x
| Cons (_, xs), FS i -> vget xs i
| Nil, _ -> . (* 不可能:fin 保证索引在范围内 *)
(* 示例 *)
let v : (zero succ succ succ, int) vec = Cons (10, Cons (20, Cons (30, Nil)))
let idx0 : (zero succ succ succ, zero) fin = FZ
let idx1 : (zero succ succ succ, zero succ) fin = FS FZ
let idx2 : (zero succ succ succ, zero succ succ) fin = FS (FS FZ)
let () =
Printf.printf "v[0] = %d\n" (vget v idx0); (* 10 *)
Printf.printf "v[1] = %d\n" (vget v idx1); (* 20 *)
Printf.printf "v[2] = %d\n" (vget v idx2) (* 30 *)
(* 编译时安全:不能越界 *)
(* let idx3 = FS (FS (FS FZ)) (* 编译时无法匹配 3 元素向量 *) *)
5. 类型安全的格式化字符串
5.1 格式化类型
(* 类型级格式化规范 *)
type (_, _) fmt =
| FEnd : (string, string) fmt
| FInt : ('r, 's) fmt -> (int -> 'r, 's) fmt
| FStr : ('r, 's) fmt -> (string -> 'r, 's) fmt
| FLit : string * ('r, 's) fmt -> ('r, 's) fmt
(* 格式化函数 *)
let rec sprintf : type r s. (r, s) fmt -> r = function
| FEnd -> ""
| FInt rest -> fun n -> string_of_int n ^ sprintf rest
| FStr rest -> fun s -> s ^ sprintf rest
| FLit (lit, rest) -> lit ^ sprintf rest
(* 格式化字符串解析器 *)
let rec format : type r s. (r, s) fmt -> r -> s = function
| FEnd -> fun acc -> acc
| FInt rest -> fun f n -> format rest (f n)
| FStr rest -> fun f s -> format rest (f s)
| FLit (_, rest) -> fun f -> format rest f
(* 构建格式化规范 *)
let hello_fmt : (int -> string -> string, string) fmt =
FLit ("Hello, ", FInt (FLit ("! Your name is ", FStr (FLit (".", FEnd)))))
(* 使用 *)
let result = format hello_fmt (fun n name ->
Printf.sprintf "Hello, %d! Your name is %s." n name
) 42 "Alice"
5.2 简化的类型安全格式化
(* 更实用的版本 *)
type ('a, 'b) fmt =
| Lit : string * ('a, 'b) fmt -> ('a, 'b) fmt
| Int : ('a, 'b) fmt -> (int -> 'a, 'b) fmt
| Str : ('a, 'b) fmt -> (string -> 'a, 'b) fmt
| End : (string, string) fmt
let rec ksprintf : type a b. (string -> b) -> (a, b) fmt -> a = fun k -> function
| End -> k ""
| Lit (s, rest) -> ksprintf (fun acc -> k (s ^ acc)) rest
| Int rest -> fun n -> ksprintf (fun acc -> k (string_of_int n ^ acc)) rest
| Str rest -> fun s -> ksprintf (fun acc -> k (s ^ acc)) rest
let sprintf fmt = ksprintf Fun.id fmt
(* 使用 *)
let my_fmt : (int -> string -> string, string) fmt =
Lit ("User ", Int (Lit (" is ", Str (End))))
let () = print_endline (sprintf my_fmt 42 "Alice")
(* 输出: "User 42 is Alice" *)
6. 类型安全的状态机
6.1 状态转移
type locked = Locked
type unlocked = Unlocked
type (_, _) action =
| InsertCoin : (locked, unlocked) action
| Push : (unlocked, locked) action
type 'state machine = {
state : string;
process : 'state;
}
let insert_coin : locked machine -> unlocked machine = fun m ->
{ state = "unlocked (" ^ m.state ^ ")"; process = Unlocked }
let push : unlocked machine -> locked machine = fun m ->
{ state = "locked (" ^ m.state ^ ")"; process = Locked }
(* 使用 *)
let start : locked machine = { state = "initial"; process = Locked }
let opened = insert_coin start
let closed = push opened
let opened_again = insert_coin closed
let () = Printf.printf "Final state: %s\n" opened_again.state
6.2 网络协议状态机
type disconnected = Disconnected
type connected = Connected
type authenticated = Authenticated
type (_, _) protocol =
| Connect : (disconnected, connected) protocol
| Authenticate : string -> (connected, authenticated) protocol
| Disconnect : (_, disconnected) protocol
| Send : string -> (authenticated, authenticated) protocol
| Receive : (authenticated, string * authenticated) protocol
type 'state connection = {
state : string;
data : 'state;
}
let connect : disconnected connection -> connected connection = fun c ->
{ state = "connected"; data = Connected }
let authenticate : string -> connected connection -> authenticated connection =
fun _pass c ->
{ state = "authenticated"; data = Authenticated }
let send : string -> authenticated connection -> authenticated connection =
fun _msg c -> c
let disconnect : type s. s connection -> disconnected connection = fun _c ->
{ state = "disconnected"; data = Disconnected }
(* 使用 *)
let session =
let c0 : disconnected connection = { state = "init"; data = Disconnected } in
let c1 = connect c0 in
let c2 = authenticate "password" c1 in
let c3 = send "Hello" c2 in
disconnect c3
let () = Printf.printf "Final: %s\n" session.state
7. 限制与实际应用
7.1 OCaml 依赖类型的限制
| 限制 | 说明 |
|---|---|
| 没有完整的依赖类型 | 类型不能依赖任意运行时值 |
| GADT 模式匹配限制 | 需要局部抽象类型注解 |
| 类型级编程 | 非常笨拙,代码量大 |
| 错误消息 | 类型级编程的错误消息几乎不可读 |
| 编译时间 | 复杂 GADT 会显著增加编译时间 |
| 没有类型类 | 需要手动传递操作字典 |
7.2 何时使用类型级编程
| 场景 | 推荐度 | 原因 |
|---|---|---|
| 长度索引容器 | ⭐⭐⭐ | 编译时边界检查 |
| 类型安全 DSL | ⭐⭐⭐⭐ | 强烈推荐 |
| 状态机 | ⭐⭐⭐⭐ | 防止非法状态转移 |
| 类型安全格式化 | ⭐⭐ | 实用但编写困难 |
| 通用类型级编程 | ⭐ | 不推荐,太复杂 |
7.3 实际建议
(* ✅ 好的使用场景:简单的 phantom type *)
type 'a validated = Validated of 'a
type unvalidated
type validated
let validate : unvalidated validated -> validated validated =
fun (Validated x) -> Validated x
(* ✅ 好的使用场景:有限状态机 *)
type off = Off
type on = On
type 'state light = { state : string }
(* ⚠️ 谨慎使用:复杂的类型级计算 *)
(* 当代码变得不可读时,考虑用运行时检查替代 *)
8. 扩展阅读
| 资源 | 说明 |
|---|---|
| “Dependent Types in OCaml” | 社区博客文章 |
| Agda/Idris 文档 | 理解真正的依赖类型 |
| “Singleton Types” | Eisenberg, Stolarek |
| OCaml GADTs tutorial | v2.ocaml.org |
| “Lightweight Dependent-Type Programming” | Chen & Dunfield (2013) |
💡 提示:类型级编程在 OCaml 中是一种权衡。它提供了强大的编译时安全保证,但代价是代码复杂度和可读性下降。从简单的 phantom types 和 GADTs 开始,只在真正需要时才深入类型级编程。记住:运行时检查 + 测试在大多数情况下是更务实的选择。