07 - Rust 实践 / Rust Implementation
Rust 实践 / Rust Implementation
本章介绍如何在 Rust 中使用 MessagePack,重点讲解 rmp-serde 生态、自定义序列化、零拷贝技术和性能优化。
This chapter covers using MessagePack in Rust, focusing on the rmp-serde ecosystem, custom serialization, zero-copy techniques, and performance optimization.
📖 库概览 / Library Overview
Rust 的 MessagePack 生态基于 serde 框架,核心库包括:
| 库 / Crate | 功能 / Purpose | 推荐度 |
|---|---|---|
rmp-serde | serde 集成,高层 API | ⭐⭐⭐⭐⭐ |
rmp | 底层编解码,低级 API | ⭐⭐⭐⭐ |
rmpv | Value 类型(动态类型) | ⭐⭐⭐⭐ |
Cargo.toml
[dependencies]
rmp-serde = "1.3"
rmp = "0.8"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" # 用于对比
💻 基础使用 / Basic Usage
序列化 / 反序列化
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
struct User {
id: u32,
name: String,
scores: Vec<u32>,
active: bool,
}
fn main() {
// ========== 序列化 ==========
let user = User {
id: 1001,
name: "Alice".to_string(),
scores: vec![95, 87, 92],
active: true,
};
let data = rmp_serde::to_vec(&user).unwrap();
println!("编码大小: {} bytes", data.len()); // ~28 bytes
// ========== 反序列化 ==========
let decoded: User = rmp_serde::from_slice(&data).unwrap();
println!("解码结果: {:?}", decoded);
// User { id: 1001, name: "Alice", scores: [95, 87, 92], active: true }
}
使用 HashMap
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
fn main() {
// 序列化 HashMap
let mut data = HashMap::new();
data.insert("id", 1001);
data.insert("name_len", 5);
let encoded = rmp_serde::to_vec(&data).unwrap();
// 反序列化
let decoded: HashMap<String, i32> = rmp_serde::from_slice(&encoded).unwrap();
println!("{:?}", decoded);
}
使用 Value 类型(动态)
use rmpv::Value;
fn main() {
// 构建动态值
let value = Value::Map(vec![
(Value::String("id".into()), Value::Integer(1001.into())),
(Value::String("name".into()), Value::String("Alice".into())),
(Value::String("scores".into()), Value::Array(vec![
Value::Integer(95.into()),
Value::Integer(87.into()),
Value::Integer(92.into()),
])),
]);
// 序列化 Value
let data = rmp_serde::to_vec(&value).unwrap();
// 反序列化为 Value
let decoded: Value = rmp_serde::from_slice(&data).unwrap();
println!("{:?}", decoded);
}
📖 serde 属性详解 / serde Attributes
字段重命名
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
struct Product {
#[serde(rename = "product_id")]
id: u32,
#[serde(rename = "product_name")]
name: String,
#[serde(rename = "unit_price")]
price: f64,
// 序列化和反序列化使用不同名称
#[serde(alias = "qty", rename = "quantity")]
stock: u32,
}
可选字段
#[derive(Debug, Serialize, Deserialize)]
struct Config {
host: String,
port: u16,
// Option 类型:None 序列化为 nil
timeout: Option<u64>,
// 跳过 None 值
#[serde(skip_serializing_if = "Option::is_none")]
password: Option<String>,
}
默认值
#[derive(Debug, Serialize, Deserialize)]
struct Settings {
#[serde(default)]
debug: bool, // 默认 false
#[serde(default = "default_port")]
port: u16, // 默认 8080
#[serde(default)]
tags: Vec<String>, // 默认空 Vec
}
fn default_port() -> u16 {
8080
}
跳过字段
#[derive(Debug, Serialize, Deserialize)]
struct User {
id: u32,
name: String,
// 完全跳过
#[serde(skip)]
internal_id: u64,
// 只在序列化时跳过
#[serde(skip_serializing)]
password: String,
// 只在反序列化时跳过
#[serde(skip_deserializing)]
created_at: u64,
// 零值时跳过
#[serde(skip_serializing_if = "is_zero")]
score: u32,
}
fn is_zero(val: &u32) -> bool {
*val == 0
}
枚举序列化
use serde::{Deserialize, Serialize};
// 默认:外部标签
#[derive(Debug, Serialize, Deserialize)]
enum Shape {
Circle { radius: f64 },
Rectangle { width: f64, height: f64 },
}
// 内部标签
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type")]
enum Event {
Login { user: String },
Logout { user: String },
Message { from: String, to: String, body: String },
}
// 相邻标签
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type", content = "data")]
enum Command {
Ping,
Echo(String),
Move { x: f64, y: f64 },
}
// 无标签(扁平化)
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
enum Value {
Int(i64),
Float(f64),
Text(String),
}
序列化为数组(元组结构体)
use serde::{Deserialize, Serialize};
// 序列化为数组而非 map
#[derive(Debug, Serialize, Deserialize)]
struct Point(#[serde(rename = "x")] f64, #[serde(rename = "y")] f64);
// 更紧凑
#[derive(Debug, Serialize, Deserialize)]
struct CompactPoint(f64, f64);
fn main() {
let p = Point(1.0, 2.0);
let data = rmp_serde::to_vec(&p).unwrap();
// Point: 序列化为 {"x": 1.0, "y": 2.0} (map 模式)
let cp = CompactPoint(1.0, 2.0);
let data = rmp_serde::to_vec(&cp).unwrap();
// CompactPoint: 序列化为 [1.0, 2.0] (array 模式, 更紧凑)
}
💻 自定义序列化 / Custom Serialization
使用 #[serde(serialize_with)]
use serde::{Deserialize, Serialize, Serializer, Deserializer};
use serde::ser::SerializeStruct;
// 自定义时间序列化
#[derive(Debug)]
struct Timestamp(u64);
impl Serialize for Timestamp {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
// 直接序列化为 u64
serializer.serialize_u64(self.0)
}
}
impl<'de> Deserialize<'de> for Timestamp {
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let ts = u64::deserialize(deserializer)?;
Ok(Timestamp(ts))
}
}
#[derive(Debug, Serialize, Deserialize)]
struct Event {
id: u32,
name: String,
created: Timestamp,
}
自定义字段序列化
use serde::{Deserialize, Serialize};
// 将浮点数序列化为整数(分为单位)
mod cents {
use serde::{self, Deserialize, Deserializer, Serializer};
pub fn serialize<S>(price: &f64, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let cents = (*price * 100.0).round() as u64;
serializer.serialize_u64(cents)
}
pub fn deserialize<'de, D>(deserializer: D) -> Result<f64, D::Error>
where
D: Deserializer<'de>,
{
let cents = u64::deserialize(deserializer)?;
Ok(cents as f64 / 100.0)
}
}
#[derive(Debug, Serialize, Deserialize)]
struct Product {
id: u32,
name: String,
#[serde(with = "cents")]
price: f64, // 99.99 → 9999
}
使用 serde_with 辅助库
[dependencies]
serde_with = "3.0"
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr, DurationSeconds};
#[serde_as]
#[derive(Debug, Serialize, Deserialize)]
struct Config {
// 字符串 ↔ 整数转换
#[serde_as(as = "DisplayFromStr")]
port: u16, // "8080" ↔ 8080
// Duration 序列化
#[serde_as(as = "DurationSeconds<u64>")]
timeout: std::time::Duration,
// Base64 编码
#[serde_as(as = "serde_with::base64::Base64")]
data: Vec<u8>,
}
💻 零拷贝技术 / Zero-Copy Techniques
使用 Cow 避免复制
use std::borrow::Cow;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
struct Message<'a> {
id: u32,
// Cow 会在可能时借用而非复制
#[serde(borrow)]
topic: Cow<'a, str>,
#[serde(borrow)]
payload: Cow<'a, [u8]>,
}
fn main() {
let data = vec![/* MessagePack 二进制数据 */];
// 从字节切片反序列化,避免字符串分配
let msg: Message = rmp_serde::from_slice(&data).unwrap();
// topic 和 payload 可能直接引用 data 中的字节
// 而不是分配新的 String/Vec<u8>
}
使用 &[u8] 切片
use serde::Deserialize;
#[derive(Debug, Deserialize)]
struct Packet<'a> {
id: u32,
#[serde(borrow)]
body: &'a [u8], // 零拷贝引用原始数据
}
fn process_packet(data: &[u8]) {
let packet: Packet = rmp_serde::from_slice(data).unwrap();
// packet.body 直接引用 data 中的字节,无内存分配
println!("Body 长度: {}", packet.body.len());
}
避免不必要的 String 分配
use serde::Deserialize;
// ❌ 每次反序列化都会分配 String
#[derive(Deserialize)]
struct UserOwned {
id: u32,
name: String, // 分配
email: String, // 分配
}
// ✅ 借用原始数据中的字符串
#[derive(Deserialize)]
struct UserBorrowed<'a> {
id: u32,
#[serde(borrow)]
name: Cow<'a, str>, // 可能零拷贝
#[serde(borrow)]
email: Cow<'a, str>, // 可能零拷贝
}
💻 性能优化 / Performance Optimization
基准测试
use criterion::{criterion_group, criterion_main, Criterion};
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
struct BenchmarkData {
id: u64,
name: String,
values: Vec<f64>,
active: bool,
metadata: std::collections::HashMap<String, String>,
}
fn create_test_data() -> BenchmarkData {
let mut metadata = std::collections::HashMap::new();
metadata.insert("key1".to_string(), "value1".to_string());
metadata.insert("key2".to_string(), "value2".to_string());
BenchmarkData {
id: 12345,
name: "benchmark test".to_string(),
values: vec![1.1, 2.2, 3.3, 4.4, 5.5],
active: true,
metadata,
}
}
fn benchmark_serialize(c: &mut Criterion) {
let data = create_test_data();
c.bench_function("msgpack_serialize", |b| {
b.iter(|| rmp_serde::to_vec(&data).unwrap())
});
c.bench_function("json_serialize", |b| {
b.iter(|| serde_json::to_vec(&data).unwrap())
});
}
fn benchmark_deserialize(c: &mut Criterion) {
let data = create_test_data();
let msgpack_data = rmp_serde::to_vec(&data).unwrap();
let json_data = serde_json::to_vec(&data).unwrap();
c.bench_function("msgpack_deserialize", |b| {
b.iter(|| rmp_serde::from_slice::<BenchmarkData>(&msgpack_data).unwrap())
});
c.bench_function("json_deserialize", |b| {
b.iter(|| serde_json::from_slice::<BenchmarkData>(&json_data).unwrap())
});
}
criterion_group!(benches, benchmark_serialize, benchmark_deserialize);
criterion_main!(benches);
使用 Writer 避免中间 Vec
use std::io::Write;
// ❌ 不好:分配中间 Vec
fn serialize_to_vec<T: serde::Serialize>(value: &T) -> Vec<u8> {
rmp_serde::to_vec(value).unwrap()
}
// ✅ 好:直接写入目标
fn serialize_to_writer<W: Write, T: serde::Serialize>(writer: W, value: &T) {
rmp_serde::encode::write(writer, value).unwrap();
}
// 使用示例
fn main() {
let data = vec![1, 2, 3, 4, 5];
// 写入文件
let file = std::fs::File::create("data.msgpack").unwrap();
let writer = std::io::BufWriter::new(file);
serialize_to_writer(writer, &data);
// 写入 TCP 流
let stream = std::net::TcpStream::connect("127.0.0.1:8080").unwrap();
serialize_to_writer(stream, &data);
}
预分配缓冲区
use rmp_serde::Serializer;
use serde::Serialize;
fn serialize_with_capacity<T: Serialize>(value: &T, capacity: usize) -> Vec<u8> {
let mut buf = Vec::with_capacity(capacity);
let mut serializer = Serializer::new(&mut buf);
value.serialize(&mut serializer).unwrap();
buf
}
fn main() {
let data = vec![1u32; 1000];
// 预估大小并预分配
let estimated_size = data.len() * 5 + 10; // 估算
let encoded = serialize_with_capacity(&data, estimated_size);
println!("编码大小: {} bytes", encoded.len());
}
💻 动态类型处理 / Dynamic Typing
使用 rmpv::Value
use rmpv::Value;
use std::collections::HashMap;
fn main() {
// 构建复杂嵌套结构
let user = Value::Map(vec![
(Value::String("id".into()), Value::Integer(1001.into())),
(Value::String("name".into()), Value::String("Alice".into())),
(Value::String("tags".into()), Value::Array(vec![
Value::String("admin".into()),
Value::String("dev".into()),
])),
(Value::String("address".into()), Value::Map(vec![
(Value::String("city".into()), Value::String("北京".into())),
(Value::String("zip".into()), Value::String("100080".into())),
])),
]);
// 序列化
let data = rmp_serde::to_vec(&user).unwrap();
// 反序列化
let decoded: Value = rmp_serde::from_slice(&data).unwrap();
// 访问字段
if let Value::Map(map) = &decoded {
for (key, val) in map {
println!("{:?}: {:?}", key, val);
}
}
}
Value 与 serde_json::Value 转换
use rmpv::Value as MsgPackValue;
use serde_json::Value as JsonValue;
fn msgpack_to_json(value: &MsgPackValue) -> JsonValue {
match value {
MsgPackValue::Nil => JsonValue::Null,
MsgPackValue::Boolean(b) => JsonValue::Bool(*b),
MsgPackValue::Integer(i) => {
if let Some(n) = i.as_i64() {
JsonValue::Number(n.into())
} else if let Some(n) = i.as_u64() {
JsonValue::Number(n.into())
} else {
JsonValue::Null
}
}
MsgPackValue::F32(f) => {
serde_json::Number::from_f64(*f as f64)
.map(JsonValue::Number)
.unwrap_or(JsonValue::Null)
}
MsgPackValue::F64(f) => {
serde_json::Number::from_f64(*f)
.map(JsonValue::Number)
.unwrap_or(JsonValue::Null)
}
MsgPackValue::String(s) => JsonValue::String(s.to_string()),
MsgPackValue::Binary(b) => {
// Base64 编码
use base64::Engine;
JsonValue::String(base64::engine::general_purpose::STANDARD.encode(b))
}
MsgPackValue::Array(arr) => {
JsonValue::Array(arr.iter().map(msgpack_to_json).collect())
}
MsgPackValue::Map(map) => {
let obj: serde_json::Map<String, JsonValue> = map
.iter()
.filter_map(|(k, v)| {
let key = match k {
MsgPackValue::String(s) => s.to_string(),
other => format!("{:?}", other),
};
Some((key, msgpack_to_json(v)))
})
.collect();
JsonValue::Object(obj)
}
_ => JsonValue::Null,
}
}
⚠️ 注意事项 / Pitfalls
1. 整数类型选择
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
struct Data {
// ❌ u8 只能存 0-255
small: u8,
// ✅ 推荐用 u32 或 i64
id: u32,
// ⚠️ u64 可能与某些语言不兼容
big_id: u64,
}
2. 浮点数精度
// f32 精度有限
let val: f32 = 3.14159;
let data = rmp_serde::to_vec(&val).unwrap();
let decoded: f32 = rmp_serde::from_slice(&data).unwrap();
println!("{}", decoded); // 3.14159 (可能有微小差异)
// 使用 f64 获得更高精度
let val: f64 = 3.14159265358979;
3. &str 与 String
use serde::Deserialize;
// ❌ 不能直接反序列化为 &str(除非使用零拷贝)
let s: &str = rmp_serde::from_slice(&data)?; // 编译错误
// ✅ 使用 String
let s: String = rmp_serde::from_slice(&data)?;
// ✅ 或使用 Cow 实现零拷贝
use std::borrow::Cow;
let s: Cow<str> = rmp_serde::from_slice(&data)?;
4. 枚举编码差异
// 不同的 serde 标签模式产生不同的 MessagePack 编码
#[derive(Serialize)]
enum E {
A(i32),
B { x: i32 },
}
// 外部标签 (默认): {"A": 1} 或 {"B": {"x": 1}}
// 内部标签: {"type": "A", "value": 1}
// 相邻标签: {"type": "A", "data": 1}
// 无标签: 1 或 {"x": 1}
5. HashMap 键顺序
use std::collections::HashMap;
// HashMap 不保证键顺序
// 相同数据可能产生不同的 MessagePack 字节序列
// 如果需要确定性输出,使用 BTreeMap
use std::collections::BTreeMap;
🔗 扩展阅读 / Further Reading
| 资源 | 链接 |
|---|---|
| rmp-serde 文档 | https://docs.rs/rmp-serde/ |
| rmp 文档 | https://docs.rs/rmp/ |
| serde 文档 | https://serde.rs/ |
| Rust 性能优化 | https://nnethercote.github.io/perf-book/ |
| 零拷贝技术 | https://doc.rust-lang.org/nomicon/ |
📝 下一章 / Next: 第 8 章 - 流式处理 / Streaming — 使用 MessagePack 进行流式消息处理和网络协议集成。