强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

OCaml 教程 / 数据序列化(JSON/Protobuf)

数据序列化(JSON/Protobuf)

数据序列化是将内存中的数据结构转换为可存储或传输的格式。OCaml 支持多种序列化方式。

Yojson 库

Yojson 是 OCaml 最流行的 JSON 库。

opam install yojson ppx_deriving_yojson

解析 JSON

open Yojson.Safe

(* 从字符串解析 *)
let json = from_string {|{"name": "Alice", "age": 30, "scores": [90, 85, 95]}|}

(* 从文件解析 *)
let json = from_file "config.json"

(* 基本类型 *)
type t =
  | Assoc of (string * t) list
  | Bool of bool
  | Float of float
  | Int of int
  | Intlit of string
  | List of t list
  | Null
  | String of string
  | Tuple of t list
  | Variant of string * t option
JSON 类型OCaml 类型示例
objectAssoc{"key": "value"}
arrayList[1, 2, 3]
stringString"hello"
numberInt/Float42, 3.14
booleanBooltrue, false
nullNullnull

生成 JSON

open Yojson.Safe

(* 手动构建 *)
let json = `Assoc [
  ("name", `String "Alice");
  ("age", `Int 30);
  ("active", `Bool true);
  ("address", `Assoc [
    ("city", `String "Beijing");
    ("zip", `String "100000");
  ]);
  ("scores", `List [`Int 90; `Int 85; `Int 95]);
]

(* 转换为字符串 *)
let s = to_string json
let s_pretty = pretty_to_string json

(* 写入文件 *)
let () = to_file "output.json" json

💡 提示to_string 输出紧凑格式,pretty_to_string 输出美化格式(便于调试)。

查询 JSON

open Yojson.Safe

let json = from_string {|{
  "users": [
    {"name": "Alice", "age": 30},
    {"name": "Bob", "age": 25}
  ]
}|}

(* 使用路径查询 *)
let names = Util.(json |> member "users" |> to_list |> List.map (member "name" |> to_string))

(* 安全查询 *)
let age = Util.(json |> member "users" |> index 0 |> member "age" |> to_int_option)

(* 错误处理 *)
let get_string_default json key default =
  try Util.(json |> member key |> to_string)
  with _ -> default

let get_nested json path =
  List.fold_left (fun acc key ->
    match acc with
    | Some j -> (try Some (Util.member key j) with _ -> None)
    | None -> None
  ) (Some json) path

⚠️ 注意Util 模块的查询函数在路径不存在时会抛出异常。使用 to_int_option 等安全版本或手动处理异常。

ppx_deriving_yojson 自动序列化

(* 定义类型并自动生成 JSON 转换 *)
type address = {
  city: string;
  zip: string;
  country: string option;
} [@@deriving yojson]

type user = {
  id: int;
  name: string;
  email: string;
  age: int;
  address: address;
  tags: string list;
  active: bool;
} [@@deriving yojson]

type api_response = {
  success: bool;
  data: user list;
  message: string option [@default None];
  total: int;
} [@@deriving yojson]

(* 自动生成的函数 *)
(* val user_to_yojson : user -> Yojson.Safe.t *)
(* val user_of_yojson : Yojson.Safe.t -> (user, string) result *)
(* val api_response_to_yojson : api_response -> Yojson.Safe.t *)
(* val api_response_of_yojson : Yojson.Safe.t -> (api_response, string) result *)

(* 使用 *)
let user = {
  id = 1;
  name = "Alice";
  email = "[email protected]";
  age = 30;
  address = { city = "Beijing"; zip = "100000"; country = Some "CN" };
  tags = ["admin"; "user"];
  active = true;
}

let json = user_to_yojson user
let s = Yojson.Safe.pretty_to_string json

(* 从 JSON 解析 *)
let parsed = user_of_yojson (Yojson.Safe.from_string s)
ppx 属性说明示例
[@key "name"]自定义 JSON 字段名{name: string [@key "user_name"]}
[@default value]默认值{x: int [@default 0]}
[@yojson_option]可选字段{x: int option [@yojson_option]}

数据模型设计

(* 电商 API 数据模型 *)

(* 用户 *)
type role = Admin | User | Guest [@@deriving yojson]

type user = {
  id: int;
  username: string;
  email: string;
  role: role;
  created_at: string;
  metadata: Yojson.Safe.t option [@default None];
} [@@deriving yojson]

(* 商品 *)
type price = {
  amount: int;       (* 分 *)
  currency: string;  (* "CNY" *)
} [@@deriving yojson]

type product = {
  id: int;
  name: string;
  description: string;
  price: price;
  category: string;
  tags: string list;
  stock: int;
  images: string list;
} [@@deriving yojson]

(* 订单 *)
type order_status = Pending | Paid | Shipped | Delivered | Cancelled [@@deriving yojson]

type order_item = {
  product_id: int;
  quantity: int;
  unit_price: price;
} [@@deriving yojson]

type order = {
  id: string;
  user_id: int;
  items: order_item list;
  total: price;
  status: order_status;
  shipping_address: address;
  created_at: string;
  updated_at: string;
} [@@deriving yojson]

(* API 响应 *)
type 'a paginated_response = {
  data: 'a list;
  total: int;
  page: int;
  per_page: int;
} [@@deriving yojson]

💡 提示:使用类型系统确保数据一致性。金额用整数(分)而非浮点数,避免精度问题。

Protocol Buffers (ocaml-protoc)

opam install ocaml-protoc

定义 .proto 文件

// user.proto
syntax = "proto3";

message User {
  int32 id = 1;
  string name = 2;
  string email = 3;
  int32 age = 4;
  Address address = 5;
  repeated string tags = 6;
}

message Address {
  string city = 1;
  string zip = 2;
  string country = 3;
}

message GetUserRequest {
  int32 user_id = 1;
}

message GetUserResponse {
  User user = 1;
  string error = 2;
}

service UserService {
  rpc GetUser (GetUserRequest) returns (GetUserResponse);
}

编译 .proto

ocaml-protoc -ml_out . user.proto

OCaml 代码

(* 编译生成的类型 *)
(*
type user = {
  id: int32;
  name: string;
  email: string;
  age: int32;
  address: address option;
  tags: string list;
}

and address = {
  city: string;
  zip: string;
  country: string;
}
*)

(* 序列化 *)
let user = User.({
  id = 1l;
  name = "Alice";
  email = "[email protected]";
  age = 30l;
  address = Some Address.({ city = "Beijing"; zip = "100000"; country = "CN" });
  tags = ["admin"; "user"];
})

let buf = Buffer.create 256
let encoder = Pbrt.Encoder.create ()
User.encode_pb user encoder
let bytes = Pbrt.Encoder.to_bytes encoder

(* 反序列化 *)
let decoder = Pbrt.Decoder.of_bytes bytes
let decoded = User.decode_pb decoder

⚠️ 注意:Protobuf 的 int32 在 OCaml 中是 Int32.t32l),不是普通 int

性能对比

格式序列化速度反序列化速度大小可读性类型安全
JSON (Yojson)
JSON (ppx)
Protobuf很快很快
Marshal最快最快
Sexplib
(* 简单基准测试 *)
let benchmark name f iterations =
  let start = Unix.gettimeofday () in
  for _ = 1 to iterations do
    ignore (f ())
  done;
  let elapsed = Unix.gettimeofday () -. start in
  Printf.printf "%s: %.3f ms (%d iterations)\n" name (elapsed *. 1000.0) iterations

(* JSON vs Protobuf *)
let () =
  let json_str = Yojson.Safe.to_string (user_to_yojson user) in
  benchmark "JSON 序列化" (fun () -> Yojson.Safe.to_string (user_to_yojson user)) 10000;
  benchmark "JSON 反序列化" (fun () -> user_of_yojson (Yojson.Safe.from_string json_str)) 10000;
  benchmark "Protobuf 序列化" (fun () ->
    let enc = Pbrt.Encoder.create () in
    User.encode_pb user enc;
    Pbrt.Encoder.to_bytes enc
  ) 10000

向后兼容

(* 版本 1 *)
type user_v1 = {
  id: int;
  name: string;
} [@@deriving yojson]

(* 版本 2:添加字段 *)
type user_v2 = {
  id: int;
  name: string;
  email: string;           (* 新增 *)
  age: int;                (* 新增 *)
} [[@deriving yojson]]

(* 兼容性处理 *)
type user_compat = {
  id: int;
  name: string;
  email: string [@default ""];
  age: int [@default 0];
} [@@deriving yojson]

(* 版本迁移 *)
let migrate_v1_to_v2 (v1 : user_v1) : user_v2 = {
  id = v1.id;
  name = v1.name;
  email = "";
  age = 0;
}

(* Protobuf 天然向后兼容:未知字段被忽略 *)

💡 提示:添加新字段时始终使用默认值,这样旧版本的 JSON 数据仍然可以正常解析。

二进制序列化 Marshal

(* Marshal 模块:快速但不安全的序列化 *)

(* 序列化 *)
type data = {
  values: int array;
  timestamp: float;
  labels: string list;
}

let d = {
  values = [|1; 2; 3; 4; 5|];
  timestamp = 1234567890.0;
  labels = ["a"; "b"; "c"];
}

let bytes : bytes = Marshal.to_bytes d [Marshal.Closures]
let oc = open_out_bin "data.bin"
Marshal.to_channel oc d [Marshal.Closures]
close_out oc

(* 反序列化 *)
let ic = open_in_bin "data.bin"
let d2 : data = Marshal.from_channel ic
close_in ic

(* 版本检查 *)
let marshal_with_version version data =
  let header = Marshal.to_bytes version [] in
  let body = Marshal.to_bytes data [] in
  Bytes.cat header body

let unmarshal_with_version bytes =
  let version_size = Marshal.total_size bytes 0 in
  let version : int = Marshal.from_bytes bytes 0 in
  let body_bytes = Bytes.sub bytes version_size (Bytes.length bytes - version_size) in
  let data = Marshal.from_bytes body_bytes 0 in
  (version, data)

⚠️ 注意Marshal 不适合持久化存储或网络传输,因为二进制格式依赖 OCaml 版本和平台。仅用于进程间通信或缓存。

实际应用

配置文件

(* config.ml *)
type database_config = {
  host: string;
  port: int;
  name: string;
  user: string;
  password: string;
} [@@deriving yojson]

type server_config = {
  host: string;
  port: int;
  debug: bool;
} [@@deriving yojson]

type app_config = {
  database: database_config;
  server: server_config;
  log_level: string;
} [@@deriving yojson]

(* 加载配置 *)
let load_config path =
  try
    let json = Yojson.Safe.from_file path in
    app_config_of_yojson json
  with
  | Yojson.Json_error msg -> Error msg
  | Sys_error msg -> Error msg

(* 默认配置 *)
let default_config = {
  database = {
    host = "localhost";
    port = 5432;
    name = "mydb";
    user = "postgres";
    password = "";
  };
  server = {
    host = "0.0.0.0";
    port = 8080;
    debug = false;
  };
  log_level = "info";
}

(* 合并配置 *)
let merge_config default loaded = {
  database = {
    host = (if loaded.database.host = "" then default.database.host else loaded.database.host);
    port = (if loaded.database.port = 0 then default.database.port else loaded.database.port);
    name = (if loaded.database.name = "" then default.database.name else loaded.database.name);
    user = (if loaded.database.user = "" then default.database.user else loaded.database.user);
    password = (if loaded.database.password = "" then default.database.password else loaded.database.password);
  };
  server = {
    host = (if loaded.server.host = "" then default.server.host else loaded.server.host);
    port = (if loaded.server.port = 0 then default.server.port else loaded.server.port);
    debug = loaded.server.debug;
  };
  log_level = (if loaded.log_level = "" then default.log_level else loaded.log_level);
}

日志

type log_entry = {
  timestamp: string;
  level: string;
  message: string;
  context: (string * Yojson.Safe.t) list;
  trace_id: string option;
} [@@deriving yojson]

let log ~level ~message ?context () =
  let entry = {
    timestamp = Ptime.to_rfc3339 (Ptime_clock.now ());
    level;
    message;
    context = Option.value context ~default:[];
    trace_id = None;
  } in
  let json = log_entry_to_yojson entry in
  Printf.printf "%s\n" (Yojson.Safe.to_string json)

let () =
  log ~level:"info" ~message:"用户登录" ~context:[
    ("user_id", `Int 123);
    ("ip", `String "192.168.1.1");
  ] ()

扩展阅读


上一节CLI 工具开发 下一节数据库操作(Caqti)