强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

OCaml 教程 / 依赖记录与类型级编程

依赖记录与类型级编程

OCaml 并不是一门依赖类型语言,但通过 GADTs 和模块系统,我们可以在一定程度上模拟依赖类型、类型级自然数和长度索引向量等高级特性。


1. 依赖函数类型(Dependent Function Types)

1.1 什么是依赖类型

依赖类型(Dependent Types)是指类型的定义依赖于值:

-- 伪代码(Agda/Idris 风格)
replicate : (n : Nat) -> (a : A) -> Vec A n
-- 返回类型 Vec A n 依赖于值 n
概念说明OCaml 支持程度
依赖函数类型返回类型依赖于参数值⚠️ 通过 GADTs 部分支持
依赖对 (Σ类型)第二个元素的类型依赖于第一个元素的值✅ 通过 GADTs
向量 (Vec A n)长度索引的列表✅ 通过 GADTs
类型级自然数Peano 数编码✅ 通过类型

1.2 OCaml 中的近似

(* OCaml 不能直接写出 replicate : (n:int) -> 'a -> 'a list
   其中返回类型依赖于 n 的值,但可以用 GADTs 模拟 *)

(* 先引入类型级自然数 *)
type zero = Zero
type 'n succ = Succ

type _ nat =
  | Z : zero nat
  | S : 'n nat -> 'n succ nat

(* 长度索引向量 *)
type (_, _) vec =
  | Nil : (zero, 'a) vec
  | Cons : 'a * ('n, 'a) vec -> ('n succ, 'a) vec

let x : (zero succ succ, int) vec = Cons (1, Cons (2, Nil))
(* x 是长度为 2 的 int 向量 *)

2. 用 GADTs 编码依赖类型

2.1 类型级自然数

type zero = Zero
type 'n succ = Succ

type _ nat =
  | Z : zero nat
  | S : 'n nat -> 'n succ nat

(* 类型级自然数:0, 1, 2, 3, ... *)
let n0 : zero nat = Z
let n1 : zero succ nat = S Z
let n2 : zero succ succ nat = S (S Z)
let n3 : zero succ succ succ nat = S (S (S Z))

2.2 类型级加法

(* 类型级加法证明 *)
type (_, _, _) plus =
  | PlusZ : (zero, 'n, 'n) plus
  | PlusS : ('m, 'n, 'p) plus
    -> ('m succ, 'n, 'p succ) plus

(* 证明 2 + 1 = 3 *)
let proof_2_plus_1 : (zero succ succ, zero succ, zero succ succ succ) plus =
  PlusS (PlusS PlusZ)

(* 运行时加法 *)
let rec nat_to_int : type n. n nat -> int = function
  | Z -> 0
  | S n -> 1 + nat_to_int n

let rec add_nat : type m n. m nat -> n nat -> (m, n, _) nat =
  fun m n -> match m with
  | Z -> n
  | S m' -> S (add_nat m' n)

let result = add_nat n2 n1  (* 类型: zero succ succ succ nat *)
let () = Printf.printf "2 + 1 = %d\n" (nat_to_int result)

2.3 依赖对(Σ 类型)

(* ∃(n:nat). vec(n, int) *)
type packed_vec = PV : 'n nat * ('n, int) vec -> packed_vec

let pv1 = PV (S (S Z), Cons (1, Cons (2, Nil)))
let pv2 = PV (S (S (S Z)), Cons (3, Cons (4, Cons (5, Nil))))

(* 获取长度 *)
let pv_length (PV (n, _)) = nat_to_int n

let () = Printf.printf "pv1 length: %d\n" (pv_length pv1)  (* 2 *)
let () = Printf.printf "pv2 length: %d\n" (pv_length pv2)  (* 3 *)

3. 类型级编程

3.1 类型级布尔

type true_ = True
type false_ = False

type (_, _, _) and_ =
  | AndTT : (true_, true_, true_) and_
  | AndTF : (true_, false_, false_) and_
  | AndFT : (false_, true_, false_) and_
  | AndFF : (false_, false_, false_) and_

type (_, _, _) or_ =
  | OrTT : (true_, true_, true_) or_
  | OrTF : (true_, false_, true_) or_
  | OrFT : (false_, true_, true_) or_
  | OrFF : (false_, false_, false_) or_

type _ not =
  | NotT : true_ not -> false_
  | NotF : false_ not -> true_

3.2 类型级列表

type nil = Nil
type ('h, 't) cons = Cons

type _ tlist =
  | TNil : nil tlist
  | TCons : 'h * 't tlist -> ('h, 't) cons tlist

(* 类型级列表操作 *)
type (_, _, _) tappend =
  | TAppendNil : (nil, 'b, 'b) tappend
  | TAppendCons : ('t, 'b, 'c) tappend
    -> (('h, 't) cons, 'b, ('h, 'c) cons) tappend

3.3 类型级相等

type (_, _) eq = Refl : ('a, 'a) eq

let cast : type a b. (a, b) eq -> a -> b = fun Refl x -> x

(* 证明 int = int *)
let int_eq_int : (int, int) eq = Refl

(* 对称性 *)
let sym : type a b. (a, b) eq -> (b, a) eq = fun Refl -> Refl

(* 传递性 *)
let trans : type a b c. (a, b) eq -> (b, c) eq -> (a, c) eq =
  fun Refl Refl -> Refl

4. 向量类型(长度索引)

4.1 基本向量操作

type zero = Zero
type 'n succ = Succ

type (_, _) vec =
  | Nil : (zero, 'a) vec
  | Cons : 'a * ('n, 'a) vec -> ('n succ, 'a) vec

(* 安全的 head —— 非空向量一定有 head *)
let head : type n. (n succ, 'a) vec -> 'a = fun (Cons (x, _)) -> x

(* 安全的 tail *)
let tail : type n. (n succ, 'a) vec -> (n, 'a) vec = fun (Cons (_, xs)) -> xs

(* 长度 *)
let rec length : type n a. (n, a) vec -> int = function
  | Nil -> 0
  | Cons (_, xs) -> 1 + length xs

(* 映射 *)
let rec vmap : type n a b. (a -> b) -> (n, a) vec -> (n, b) vec =
  fun f -> function
  | Nil -> Nil
  | Cons (x, xs) -> Cons (f x, vmap f xs)

(* zip —— 两个相同长度的向量 *)
let rec vzip : type n a b. (n, a) vec -> (n, b) vec -> (n, a * b) vec =
  fun xs ys -> match xs, ys with
  | Nil, Nil -> Nil
  | Cons (x, xs'), Cons (y, ys') -> Cons ((x, y), vzip xs' ys')

4.2 使用示例

let v1 : (zero succ succ succ, int) vec =
  Cons (1, Cons (2, Cons (3, Nil)))

let v2 : (zero succ succ succ, string) vec =
  Cons ("a", Cons ("b", Cons ("c", Nil)))

let zipped = vzip v1 v2
(* 类型保证:只能 zip 相同长度的向量 *)

let () =
  let (Cons ((1, "a"), Cons ((2, "b"), Cons ((3, "c"), Nil)))) = zipped in
  print_endline "zipped OK"

(* 编译时安全检查 *)
(* let bad = vzip v1 (Cons (1, Nil))  (* 编译错误!长度不匹配 *) *)

⚠️ 注意点:类型级向量的模式匹配需要精确的类型注解。如果类型不匹配,错误消息可能很复杂。

4.3 安全的索引

(* 使用类型级自然数作为索引 *)
type (_, _) fin =
  | FZ : ('n succ, zero) fin
  | FS : ('n, 'i) fin -> ('n succ, 'i succ) fin

(* fin 类型:fin(n, i) 表示 i 是 0..n-1 范围内的索引 *)

(* 安全的向量索引访问 *)
let rec vget : type n a i. (n, a) vec -> (n, i) fin -> a =
  fun vec idx -> match vec, idx with
  | Cons (x, _), FZ -> x
  | Cons (_, xs), FS i -> vget xs i
  | Nil, _ -> .  (* 不可能:fin 保证索引在范围内 *)

(* 示例 *)
let v : (zero succ succ succ, int) vec = Cons (10, Cons (20, Cons (30, Nil)))

let idx0 : (zero succ succ succ, zero) fin = FZ
let idx1 : (zero succ succ succ, zero succ) fin = FS FZ
let idx2 : (zero succ succ succ, zero succ succ) fin = FS (FS FZ)

let () =
  Printf.printf "v[0] = %d\n" (vget v idx0);  (* 10 *)
  Printf.printf "v[1] = %d\n" (vget v idx1);  (* 20 *)
  Printf.printf "v[2] = %d\n" (vget v idx2)   (* 30 *)

(* 编译时安全:不能越界 *)
(* let idx3 = FS (FS (FS FZ))  (* 编译时无法匹配 3 元素向量 *) *)

5. 类型安全的格式化字符串

5.1 格式化类型

(* 类型级格式化规范 *)
type (_, _) fmt =
  | FEnd : (string, string) fmt
  | FInt : ('r, 's) fmt -> (int -> 'r, 's) fmt
  | FStr : ('r, 's) fmt -> (string -> 'r, 's) fmt
  | FLit : string * ('r, 's) fmt -> ('r, 's) fmt

(* 格式化函数 *)
let rec sprintf : type r s. (r, s) fmt -> r = function
  | FEnd -> ""
  | FInt rest -> fun n -> string_of_int n ^ sprintf rest
  | FStr rest -> fun s -> s ^ sprintf rest
  | FLit (lit, rest) -> lit ^ sprintf rest

(* 格式化字符串解析器 *)
let rec format : type r s. (r, s) fmt -> r -> s = function
  | FEnd -> fun acc -> acc
  | FInt rest -> fun f n -> format rest (f n)
  | FStr rest -> fun f s -> format rest (f s)
  | FLit (_, rest) -> fun f -> format rest f

(* 构建格式化规范 *)
let hello_fmt : (int -> string -> string, string) fmt =
  FLit ("Hello, ", FInt (FLit ("! Your name is ", FStr (FLit (".", FEnd)))))

(* 使用 *)
let result = format hello_fmt (fun n name ->
  Printf.sprintf "Hello, %d! Your name is %s." n name
) 42 "Alice"

5.2 简化的类型安全格式化

(* 更实用的版本 *)
type ('a, 'b) fmt =
  | Lit : string * ('a, 'b) fmt -> ('a, 'b) fmt
  | Int : ('a, 'b) fmt -> (int -> 'a, 'b) fmt
  | Str : ('a, 'b) fmt -> (string -> 'a, 'b) fmt
  | End : (string, string) fmt

let rec ksprintf : type a b. (string -> b) -> (a, b) fmt -> a = fun k -> function
  | End -> k ""
  | Lit (s, rest) -> ksprintf (fun acc -> k (s ^ acc)) rest
  | Int rest -> fun n -> ksprintf (fun acc -> k (string_of_int n ^ acc)) rest
  | Str rest -> fun s -> ksprintf (fun acc -> k (s ^ acc)) rest

let sprintf fmt = ksprintf Fun.id fmt

(* 使用 *)
let my_fmt : (int -> string -> string, string) fmt =
  Lit ("User ", Int (Lit (" is ", Str (End))))

let () = print_endline (sprintf my_fmt 42 "Alice")
(* 输出: "User 42 is Alice" *)

6. 类型安全的状态机

6.1 状态转移

type locked = Locked
type unlocked = Unlocked

type (_, _) action =
  | InsertCoin : (locked, unlocked) action
  | Push : (unlocked, locked) action

type 'state machine = {
  state : string;
  process : 'state;
}

let insert_coin : locked machine -> unlocked machine = fun m ->
  { state = "unlocked (" ^ m.state ^ ")"; process = Unlocked }

let push : unlocked machine -> locked machine = fun m ->
  { state = "locked (" ^ m.state ^ ")"; process = Locked }

(* 使用 *)
let start : locked machine = { state = "initial"; process = Locked }
let opened = insert_coin start
let closed = push opened
let opened_again = insert_coin closed

let () = Printf.printf "Final state: %s\n" opened_again.state

6.2 网络协议状态机

type disconnected = Disconnected
type connected = Connected
type authenticated = Authenticated

type (_, _) protocol =
  | Connect : (disconnected, connected) protocol
  | Authenticate : string -> (connected, authenticated) protocol
  | Disconnect : (_, disconnected) protocol
  | Send : string -> (authenticated, authenticated) protocol
  | Receive : (authenticated, string * authenticated) protocol

type 'state connection = {
  state : string;
  data : 'state;
}

let connect : disconnected connection -> connected connection = fun c ->
  { state = "connected"; data = Connected }

let authenticate : string -> connected connection -> authenticated connection =
  fun _pass c ->
    { state = "authenticated"; data = Authenticated }

let send : string -> authenticated connection -> authenticated connection =
  fun _msg c -> c

let disconnect : type s. s connection -> disconnected connection = fun _c ->
  { state = "disconnected"; data = Disconnected }

(* 使用 *)
let session =
  let c0 : disconnected connection = { state = "init"; data = Disconnected } in
  let c1 = connect c0 in
  let c2 = authenticate "password" c1 in
  let c3 = send "Hello" c2 in
  disconnect c3

let () = Printf.printf "Final: %s\n" session.state

7. 限制与实际应用

7.1 OCaml 依赖类型的限制

限制说明
没有完整的依赖类型类型不能依赖任意运行时值
GADT 模式匹配限制需要局部抽象类型注解
类型级编程非常笨拙,代码量大
错误消息类型级编程的错误消息几乎不可读
编译时间复杂 GADT 会显著增加编译时间
没有类型类需要手动传递操作字典

7.2 何时使用类型级编程

场景推荐度原因
长度索引容器⭐⭐⭐编译时边界检查
类型安全 DSL⭐⭐⭐⭐强烈推荐
状态机⭐⭐⭐⭐防止非法状态转移
类型安全格式化⭐⭐实用但编写困难
通用类型级编程不推荐,太复杂

7.3 实际建议

(* ✅ 好的使用场景:简单的 phantom type *)
type 'a validated = Validated of 'a
type unvalidated
type validated

let validate : unvalidated validated -> validated validated =
  fun (Validated x) -> Validated x

(* ✅ 好的使用场景:有限状态机 *)
type off = Off
type on = On
type 'state light = { state : string }

(* ⚠️ 谨慎使用:复杂的类型级计算 *)
(* 当代码变得不可读时,考虑用运行时检查替代 *)

8. 扩展阅读

资源说明
“Dependent Types in OCaml”社区博客文章
Agda/Idris 文档理解真正的依赖类型
“Singleton Types”Eisenberg, Stolarek
OCaml GADTs tutorialv2.ocaml.org
“Lightweight Dependent-Type Programming”Chen & Dunfield (2013)

💡 提示:类型级编程在 OCaml 中是一种权衡。它提供了强大的编译时安全保证,但代价是代码复杂度和可读性下降。从简单的 phantom types 和 GADTs 开始,只在真正需要时才深入类型级编程。记住:运行时检查 + 测试在大多数情况下是更务实的选择。