OCaml 教程 / 数据序列化(JSON/Protobuf)
数据序列化(JSON/Protobuf)
数据序列化是将内存中的数据结构转换为可存储或传输的格式。OCaml 支持多种序列化方式。
Yojson 库
Yojson 是 OCaml 最流行的 JSON 库。
opam install yojson ppx_deriving_yojson
解析 JSON
open Yojson.Safe
(* 从字符串解析 *)
let json = from_string {|{"name": "Alice", "age": 30, "scores": [90, 85, 95]}|}
(* 从文件解析 *)
let json = from_file "config.json"
(* 基本类型 *)
type t =
| Assoc of (string * t) list
| Bool of bool
| Float of float
| Int of int
| Intlit of string
| List of t list
| Null
| String of string
| Tuple of t list
| Variant of string * t option
| JSON 类型 | OCaml 类型 | 示例 |
|---|---|---|
| object | Assoc | {"key": "value"} |
| array | List | [1, 2, 3] |
| string | String | "hello" |
| number | Int/Float | 42, 3.14 |
| boolean | Bool | true, false |
| null | Null | null |
生成 JSON
open Yojson.Safe
(* 手动构建 *)
let json = `Assoc [
("name", `String "Alice");
("age", `Int 30);
("active", `Bool true);
("address", `Assoc [
("city", `String "Beijing");
("zip", `String "100000");
]);
("scores", `List [`Int 90; `Int 85; `Int 95]);
]
(* 转换为字符串 *)
let s = to_string json
let s_pretty = pretty_to_string json
(* 写入文件 *)
let () = to_file "output.json" json
💡 提示:to_string 输出紧凑格式,pretty_to_string 输出美化格式(便于调试)。
查询 JSON
open Yojson.Safe
let json = from_string {|{
"users": [
{"name": "Alice", "age": 30},
{"name": "Bob", "age": 25}
]
}|}
(* 使用路径查询 *)
let names = Util.(json |> member "users" |> to_list |> List.map (member "name" |> to_string))
(* 安全查询 *)
let age = Util.(json |> member "users" |> index 0 |> member "age" |> to_int_option)
(* 错误处理 *)
let get_string_default json key default =
try Util.(json |> member key |> to_string)
with _ -> default
let get_nested json path =
List.fold_left (fun acc key ->
match acc with
| Some j -> (try Some (Util.member key j) with _ -> None)
| None -> None
) (Some json) path
⚠️ 注意:Util 模块的查询函数在路径不存在时会抛出异常。使用 to_int_option 等安全版本或手动处理异常。
ppx_deriving_yojson 自动序列化
(* 定义类型并自动生成 JSON 转换 *)
type address = {
city: string;
zip: string;
country: string option;
} [@@deriving yojson]
type user = {
id: int;
name: string;
email: string;
age: int;
address: address;
tags: string list;
active: bool;
} [@@deriving yojson]
type api_response = {
success: bool;
data: user list;
message: string option [@default None];
total: int;
} [@@deriving yojson]
(* 自动生成的函数 *)
(* val user_to_yojson : user -> Yojson.Safe.t *)
(* val user_of_yojson : Yojson.Safe.t -> (user, string) result *)
(* val api_response_to_yojson : api_response -> Yojson.Safe.t *)
(* val api_response_of_yojson : Yojson.Safe.t -> (api_response, string) result *)
(* 使用 *)
let user = {
id = 1;
name = "Alice";
email = "[email protected]";
age = 30;
address = { city = "Beijing"; zip = "100000"; country = Some "CN" };
tags = ["admin"; "user"];
active = true;
}
let json = user_to_yojson user
let s = Yojson.Safe.pretty_to_string json
(* 从 JSON 解析 *)
let parsed = user_of_yojson (Yojson.Safe.from_string s)
| ppx 属性 | 说明 | 示例 |
|---|---|---|
[@key "name"] | 自定义 JSON 字段名 | {name: string [@key "user_name"]} |
[@default value] | 默认值 | {x: int [@default 0]} |
[@yojson_option] | 可选字段 | {x: int option [@yojson_option]} |
数据模型设计
(* 电商 API 数据模型 *)
(* 用户 *)
type role = Admin | User | Guest [@@deriving yojson]
type user = {
id: int;
username: string;
email: string;
role: role;
created_at: string;
metadata: Yojson.Safe.t option [@default None];
} [@@deriving yojson]
(* 商品 *)
type price = {
amount: int; (* 分 *)
currency: string; (* "CNY" *)
} [@@deriving yojson]
type product = {
id: int;
name: string;
description: string;
price: price;
category: string;
tags: string list;
stock: int;
images: string list;
} [@@deriving yojson]
(* 订单 *)
type order_status = Pending | Paid | Shipped | Delivered | Cancelled [@@deriving yojson]
type order_item = {
product_id: int;
quantity: int;
unit_price: price;
} [@@deriving yojson]
type order = {
id: string;
user_id: int;
items: order_item list;
total: price;
status: order_status;
shipping_address: address;
created_at: string;
updated_at: string;
} [@@deriving yojson]
(* API 响应 *)
type 'a paginated_response = {
data: 'a list;
total: int;
page: int;
per_page: int;
} [@@deriving yojson]
💡 提示:使用类型系统确保数据一致性。金额用整数(分)而非浮点数,避免精度问题。
Protocol Buffers (ocaml-protoc)
opam install ocaml-protoc
定义 .proto 文件
// user.proto
syntax = "proto3";
message User {
int32 id = 1;
string name = 2;
string email = 3;
int32 age = 4;
Address address = 5;
repeated string tags = 6;
}
message Address {
string city = 1;
string zip = 2;
string country = 3;
}
message GetUserRequest {
int32 user_id = 1;
}
message GetUserResponse {
User user = 1;
string error = 2;
}
service UserService {
rpc GetUser (GetUserRequest) returns (GetUserResponse);
}
编译 .proto
ocaml-protoc -ml_out . user.proto
OCaml 代码
(* 编译生成的类型 *)
(*
type user = {
id: int32;
name: string;
email: string;
age: int32;
address: address option;
tags: string list;
}
and address = {
city: string;
zip: string;
country: string;
}
*)
(* 序列化 *)
let user = User.({
id = 1l;
name = "Alice";
email = "[email protected]";
age = 30l;
address = Some Address.({ city = "Beijing"; zip = "100000"; country = "CN" });
tags = ["admin"; "user"];
})
let buf = Buffer.create 256
let encoder = Pbrt.Encoder.create ()
User.encode_pb user encoder
let bytes = Pbrt.Encoder.to_bytes encoder
(* 反序列化 *)
let decoder = Pbrt.Decoder.of_bytes bytes
let decoded = User.decode_pb decoder
⚠️ 注意:Protobuf 的 int32 在 OCaml 中是 Int32.t(32l),不是普通 int。
性能对比
| 格式 | 序列化速度 | 反序列化速度 | 大小 | 可读性 | 类型安全 |
|---|---|---|---|---|---|
| JSON (Yojson) | 中 | 中 | 大 | 高 | 低 |
| JSON (ppx) | 快 | 快 | 大 | 高 | 高 |
| Protobuf | 很快 | 很快 | 小 | 低 | 高 |
| Marshal | 最快 | 最快 | 中 | 无 | 低 |
| Sexplib | 快 | 快 | 中 | 中 | 高 |
(* 简单基准测试 *)
let benchmark name f iterations =
let start = Unix.gettimeofday () in
for _ = 1 to iterations do
ignore (f ())
done;
let elapsed = Unix.gettimeofday () -. start in
Printf.printf "%s: %.3f ms (%d iterations)\n" name (elapsed *. 1000.0) iterations
(* JSON vs Protobuf *)
let () =
let json_str = Yojson.Safe.to_string (user_to_yojson user) in
benchmark "JSON 序列化" (fun () -> Yojson.Safe.to_string (user_to_yojson user)) 10000;
benchmark "JSON 反序列化" (fun () -> user_of_yojson (Yojson.Safe.from_string json_str)) 10000;
benchmark "Protobuf 序列化" (fun () ->
let enc = Pbrt.Encoder.create () in
User.encode_pb user enc;
Pbrt.Encoder.to_bytes enc
) 10000
向后兼容
(* 版本 1 *)
type user_v1 = {
id: int;
name: string;
} [@@deriving yojson]
(* 版本 2:添加字段 *)
type user_v2 = {
id: int;
name: string;
email: string; (* 新增 *)
age: int; (* 新增 *)
} [[@deriving yojson]]
(* 兼容性处理 *)
type user_compat = {
id: int;
name: string;
email: string [@default ""];
age: int [@default 0];
} [@@deriving yojson]
(* 版本迁移 *)
let migrate_v1_to_v2 (v1 : user_v1) : user_v2 = {
id = v1.id;
name = v1.name;
email = "";
age = 0;
}
(* Protobuf 天然向后兼容:未知字段被忽略 *)
💡 提示:添加新字段时始终使用默认值,这样旧版本的 JSON 数据仍然可以正常解析。
二进制序列化 Marshal
(* Marshal 模块:快速但不安全的序列化 *)
(* 序列化 *)
type data = {
values: int array;
timestamp: float;
labels: string list;
}
let d = {
values = [|1; 2; 3; 4; 5|];
timestamp = 1234567890.0;
labels = ["a"; "b"; "c"];
}
let bytes : bytes = Marshal.to_bytes d [Marshal.Closures]
let oc = open_out_bin "data.bin"
Marshal.to_channel oc d [Marshal.Closures]
close_out oc
(* 反序列化 *)
let ic = open_in_bin "data.bin"
let d2 : data = Marshal.from_channel ic
close_in ic
(* 版本检查 *)
let marshal_with_version version data =
let header = Marshal.to_bytes version [] in
let body = Marshal.to_bytes data [] in
Bytes.cat header body
let unmarshal_with_version bytes =
let version_size = Marshal.total_size bytes 0 in
let version : int = Marshal.from_bytes bytes 0 in
let body_bytes = Bytes.sub bytes version_size (Bytes.length bytes - version_size) in
let data = Marshal.from_bytes body_bytes 0 in
(version, data)
⚠️ 注意:Marshal 不适合持久化存储或网络传输,因为二进制格式依赖 OCaml 版本和平台。仅用于进程间通信或缓存。
实际应用
配置文件
(* config.ml *)
type database_config = {
host: string;
port: int;
name: string;
user: string;
password: string;
} [@@deriving yojson]
type server_config = {
host: string;
port: int;
debug: bool;
} [@@deriving yojson]
type app_config = {
database: database_config;
server: server_config;
log_level: string;
} [@@deriving yojson]
(* 加载配置 *)
let load_config path =
try
let json = Yojson.Safe.from_file path in
app_config_of_yojson json
with
| Yojson.Json_error msg -> Error msg
| Sys_error msg -> Error msg
(* 默认配置 *)
let default_config = {
database = {
host = "localhost";
port = 5432;
name = "mydb";
user = "postgres";
password = "";
};
server = {
host = "0.0.0.0";
port = 8080;
debug = false;
};
log_level = "info";
}
(* 合并配置 *)
let merge_config default loaded = {
database = {
host = (if loaded.database.host = "" then default.database.host else loaded.database.host);
port = (if loaded.database.port = 0 then default.database.port else loaded.database.port);
name = (if loaded.database.name = "" then default.database.name else loaded.database.name);
user = (if loaded.database.user = "" then default.database.user else loaded.database.user);
password = (if loaded.database.password = "" then default.database.password else loaded.database.password);
};
server = {
host = (if loaded.server.host = "" then default.server.host else loaded.server.host);
port = (if loaded.server.port = 0 then default.server.port else loaded.server.port);
debug = loaded.server.debug;
};
log_level = (if loaded.log_level = "" then default.log_level else loaded.log_level);
}
日志
type log_entry = {
timestamp: string;
level: string;
message: string;
context: (string * Yojson.Safe.t) list;
trace_id: string option;
} [@@deriving yojson]
let log ~level ~message ?context () =
let entry = {
timestamp = Ptime.to_rfc3339 (Ptime_clock.now ());
level;
message;
context = Option.value context ~default:[];
trace_id = None;
} in
let json = log_entry_to_yojson entry in
Printf.printf "%s\n" (Yojson.Safe.to_string json)
let () =
log ~level:"info" ~message:"用户登录" ~context:[
("user_id", `Int 123);
("ip", `String "192.168.1.1");
] ()
扩展阅读
上一节:CLI 工具开发 下一节:数据库操作(Caqti)