OCaml 教程 / OCaml 哈希表与数组
OCaml 哈希表与数组
本文全面介绍 OCaml 中的可变数据结构:Array(数组)、Hashtbl(哈希表)、Queue(队列)、Stack(栈),以及惰性序列 Seq。每种数据结构都配有性能分析和使用场景。
Array 模块基础
OCaml 的数组是固定大小、可变的同类型元素序列。
(* 创建数组 *)
let arr1 = [| 1; 2; 3; 4; 5 |] (* 字面量 *)
let arr2 = Array.make 5 0 (* 5 个 0 *)
let arr3 = Array.init 5 (fun i -> i * i) (* [0; 1; 4; 9; 16] *)
(* 访问和修改 *)
let () =
arr2.(0) <- 10; (* 修改第一个元素 *)
arr2.(1) <- 20;
Printf.printf "arr2[0] = %d\n" arr2.(0);
Printf.printf "arr1 length = %d\n" (Array.length arr1);
Printf.printf "arr3[3] = %d\n" arr3.(3)
Array 常用函数
| 函数 | 类型 | 说明 |
|---|---|---|
make | int -> 'a -> 'a array | 创建定长数组 |
init | int -> (int -> 'a) -> 'a array | 用函数初始化 |
length | 'a array -> int | 数组长度 |
get / .(i) | 'a array -> int -> 'a | 获取元素 |
set / .(i) <- | 'a array -> int -> 'a -> unit | 设置元素 |
sub | 'a array -> int -> int -> 'a array | 子数组 |
copy | 'a array -> 'a array | 浅拷贝 |
fill | 'a array -> int -> int -> 'a -> unit | 填充 |
blit | 'a array -> int -> 'a array -> int -> int -> unit | 批量复制 |
to_list | 'a array -> 'a list | 转为列表 |
of_list | 'a list -> 'a array | 从列表创建 |
map | ('a -> 'b) -> 'a array -> 'b array | 映射 |
mapi | (int -> 'a -> 'b) -> 'a array -> 'b array | 带索引映射 |
iter | ('a -> unit) -> 'a array -> unit | 遍历 |
iteri | (int -> 'a -> unit) -> 'a array -> unit | 带索引遍历 |
fold_left | ('b -> 'a -> 'b) -> 'b -> 'a array -> 'b | 左折叠 |
fold_right | ('a -> 'b -> 'b) -> 'a array -> 'b -> 'b | 右折叠 |
sort | ('a -> 'a -> int) -> 'a array -> unit | 原地排序 |
stable_sort | ('a -> 'a -> int) -> 'a array -> unit | 稳定排序 |
数组切片与子数组
let () =
let arr = [| 0; 1; 2; 3; 4; 5; 6; 7; 8; 9 |] in
let sub = Array.sub arr 2 5 in (* 从索引 2 取 5 个 *)
Array.iter (Printf.printf "%d ") sub;
print_newline ()
(* 输出: 2 3 4 5 6 *)
(* Array.blit: 在数组间复制 *)
let () =
let src = [| 10; 20; 30 |] in
let dst = Array.make 7 0 in
Array.blit src 0 dst 2 3;
Array.iter (Printf.printf "%d ") dst;
print_newline ()
(* 输出: 0 0 10 20 30 0 0 *)
可变数组与不可变
(* 可变修改 *)
let () =
let arr = [| 1; 2; 3 |] in
arr.(1) <- 20; (* 原地修改 *)
Printf.printf "arr[1] = %d\n" arr.(1)
(* 不可变风格:生成新数组 *)
let map_in_place f arr =
Array.iteri (fun i x -> arr.(i) <- f x) arr
let () =
let arr = [| 1; 2; 3; 4; 5 |] in
map_in_place (fun x -> x * 2) arr;
Array.iter (Printf.printf "%d ") arr;
print_newline ()
(* 输出: 2 4 6 8 10 *)
(* 使用 Array.map 创建新数组(不修改原数组) *)
let () =
let original = [| 1; 2; 3 |] in
let doubled = Array.map (fun x -> x * 2) original in
Printf.printf "Original: ";
Array.iter (Printf.printf "%d ") original;
Printf.printf "\nDoubled: ";
Array.iter (Printf.printf "%d ") doubled;
print_newline ()
⚠️ 注意:数组是可变的,多个引用指向同一数组时,修改会影响所有引用。如果需要不可变语义,使用
Array.map创建新数组或使用list。
Hashtbl 模块基础
Hashtbl 实现了哈希表(字典),键值对存储,O(1) 平均查找。
(* 创建哈希表 *)
let tbl = Hashtbl.create 16 (* 初始容量 *)
(* 添加元素 *)
let () =
Hashtbl.add tbl "name" "Alice";
Hashtbl.add tbl "age" "30";
Hashtbl.add tbl "city" "Beijing";
(* 查找元素 *)
Printf.printf "Name: %s\n" (Hashtbl.find tbl "name");
(* 安全查找 *)
match Hashtbl.find_opt tbl "email" with
| Some email -> Printf.printf "Email: %s\n" email
| None -> Printf.printf "Email not found\n";
(* 遍历 *)
Hashtbl.iter (fun k v ->
Printf.printf " %s = %s\n" k v
) tbl
Hashtbl 常用函数
| 函数 | 类型 | 说明 |
|---|---|---|
create | int -> ('a, 'b) t | 创建(初始容量) |
add | ('a, 'b) t -> 'a -> 'b -> unit | 添加(允许重复键) |
replace | ('a, 'b) t -> 'a -> 'b -> unit | 替换或添加 |
find | ('a, 'b) t -> 'a -> 'b | 查找(不存在抛异常) |
find_opt | ('a, 'b) t -> 'a -> 'b option | 安全查找 |
find_all | ('a, 'b) t -> 'a -> 'b list | 查找所有(同键多值) |
mem | ('a, 'b) t -> 'a -> bool | 键是否存在 |
remove | ('a, 'b) t -> 'a -> unit | 删除键 |
length | ('a, 'b) t -> int | 键值对数量 |
iter | ('a -> 'b -> unit) -> ('a, 'b) t -> unit | 遍历 |
fold | ('a -> 'b -> 'c -> 'c) -> ('a, 'b) t -> 'c -> 'c | 折叠 |
map | ('a -> 'b -> 'c) -> ('a, 'b) t -> ('a, 'c) t | 映射值 |
filter_map_inplace | ('a -> 'b -> 'b option) -> ('a, 'b) t -> unit | 就地过滤 |
to_seq | ('a, 'b) t -> ('a * 'b) Seq.t | 转为惰性序列 |
of_seq | ('a * 'b) Seq.t -> ('a, 'b) t | 从序列创建 |
add vs replace
let () =
let tbl = Hashtbl.create 16 in
Hashtbl.add tbl "key" "first";
Hashtbl.add tbl "key" "second";
Printf.printf "All values for 'key':\n";
Hashtbl.find_all tbl "key" |> List.iter (Printf.printf " %s\n");
(* 两个值都存在 *)
let tbl2 = Hashtbl.create 16 in
Hashtbl.replace tbl2 "key" "first";
Hashtbl.replace tbl2 "key" "second";
Printf.printf "After replace: %s\n" (Hashtbl.find tbl2 "key")
(* 只有 "second" *)
💡 提示:
add允许同键多值(用find_all查找所有),replace覆盖旧值。大多数场景应使用replace。
哈希函数
(* 默认使用 polymorphic hash *)
let tbl : (string, int) Hashtbl.t = Hashtbl.create 16
(* 自定义哈希函数 *)
module StringHash = Hashtbl.Make(struct
type t = string
let equal = String.equal
let hash s =
let h = ref 0 in
String.iter (fun c ->
h := (!h * 31 + Char.code c) land max_int
) s;
!h
end)
let () =
let tbl = StringHash.create 16 in
StringHash.add tbl "hello" 1;
StringHash.add tbl "world" 2;
Printf.printf "hello: %d\n" (StringHash.find tbl "hello")
哈希表性能
| 操作 | 平均时间复杂度 | 最坏情况 |
|---|---|---|
插入 add | O(1) | O(n) |
查找 find | O(1) | O(n) |
删除 remove | O(1) | O(n) |
遍历 iter | O(n) | O(n) |
⚠️ 注意:如果哈希函数分布不均匀,所有操作可能退化到 O(n)。标准库的默认哈希函数对大多数场景足够好。
Queue 模块
FIFO(先进先出)队列。
let () =
let q = Queue.create () in
Queue.push 1 q;
Queue.push 2 q;
Queue.push 3 q;
Printf.printf "Length: %d\n" (Queue.length q);
Printf.printf "Pop: %d\n" (Queue.pop q); (* 1 *)
Printf.printf "Pop: %d\n" (Queue.pop q); (* 2 *)
Printf.printf "Peek: %d\n" (Queue.peek q); (* 3 *)
Printf.printf "Length: %d\n" (Queue.length q)
(* Queue 的常用函数 *)
(*
Queue.create : unit -> 'a t
Queue.push : 'a -> 'a t -> unit
Queue.pop : 'a t -> 'a (* 抛出 Queue.Empty *)
Queue.peek : 'a t -> 'a (* 抛出 Queue.Empty *)
Queue.pop_opt : 'a t -> 'a option
Queue.peek_opt : 'a t -> 'a option
Queue.length : 'a t -> int
Queue.is_empty : 'a t -> bool
Queue.iter : ('a -> unit) -> 'a t -> unit
Queue.fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b
Queue.clear : 'a t -> unit
Queue.transfer : 'a t -> 'a t -> unit
*)
Stack 模块
LIFO(后进先出)栈。
let () =
let s = Stack.create () in
Stack.push 1 s;
Stack.push 2 s;
Stack.push 3 s;
Printf.printf "Length: %d\n" (Stack.length s);
Printf.printf "Pop: %d\n" (Stack.pop s); (* 3 *)
Printf.printf "Pop: %d\n" (Stack.pop s); (* 2 *)
Printf.printf "Top: %d\n" (Stack.top s); (* 1 *)
Printf.printf "Length: %d\n" (Stack.length s)
(* 用栈实现括号匹配 *)
let check_parens s =
let stack = Stack.create () in
let is_match open_c close_c =
match open_c, close_c with
| '(', ')' | '[', ']' | '{', '}' -> true
| _ -> false
in
try
String.iter (fun c ->
match c with
| '(' | '[' | '{' -> Stack.push c stack
| ')' | ']' | '}' ->
if Stack.is_empty stack || not (is_match (Stack.pop stack) c) then
raise Exit
| _ -> ()
) s;
Stack.is_empty stack
with
| Exit -> false
let () =
Printf.printf "(()): %b\n" (check_parens "(())");
Printf.printf "([{}]): %b\n" (check_parens "([{}])");
Printf.printf "((()): %b\n" (check_parens "((())")
Seq 惰性序列
Seq 是 OCaml 4.07+ 引入的惰性序列,按需计算元素。
(* 创建序列 *)
let naturals () =
let rec aux n () = Seq.Cons (n, aux (n + 1)) in
aux 0
let () =
naturals ()
|> Seq.take 10
|> Seq.iter (Printf.printf "%d ");
print_newline ()
(* 输出: 0 1 2 3 4 5 6 7 8 9 *)
(* 序列操作是惰性的 *)
let squares =
naturals ()
|> Seq.map (fun x -> x * x)
|> Seq.filter (fun x -> x mod 2 = 0)
let () =
squares
|> Seq.take 5
|> Seq.iter (Printf.printf "%d ");
print_newline ()
(* 输出: 0 4 16 36 64 *)
(* 无限斐波那契序列 *)
let fib () =
let rec aux a b () = Seq.Cons (a, aux b (a + b)) in
aux 0 1
let () =
fib ()
|> Seq.take 15
|> Seq.iter (Printf.printf "%d ");
print_newline ()
Seq 常用函数
| 函数 | 说明 |
|---|---|
Seq.map | 映射每个元素 |
Seq.filter | 过滤元素 |
Seq.take | 取前 n 个 |
Seq.drop | 跳过前 n 个 |
Seq.fold_left | 累积折叠 |
Seq.iter | 遍历执行 |
Seq.unfold | 展开生成序列 |
Seq.append | 连接两个序列 |
Seq.concat | 连接序列的序列 |
Seq.for_all | 全称检查 |
Seq.exists | 存在检查 |
数据结构选择指南
| 需求 | 推荐数据结构 | 原因 |
|---|---|---|
| 索引访问 | Array | O(1) 随机访问 |
| 键值查找 | Hashtbl | O(1) 平均查找 |
| 有序映射 | Map | O(log n) 有序遍历 |
| FIFO 队列 | Queue | O(1) 入队出队 |
| LIFO 栈 | Stack | O(1) 压栈弹栈 |
| 不可变列表 | list | 不可变,安全 |
| 惰性计算 | Seq | 按需计算,节省内存 |
| 大数值数组 | Bigarray | 与 C 互操作 |
Bigarray 简介
Bigarray 用于处理大型数值数组,内存由 C 管理,适合科学计算和与 C 库互操作。
(*
open Bigarray
(* 创建一维 Bigarray *)
let arr = Array1.create int c_layout 5 in
Array1.set arr 0 10;
Array1.set arr 1 20;
Array1.set arr 2 30;
Printf.printf "arr[1] = %d\n" (Array1.get arr 1);
Printf.printf "Length: %d\n" (Array1.dim arr)
(* 创建二维 Bigarray *)
let matrix = Array2.create float64 c_layout 3 4 in
Array2.set matrix 1 2 3.14;
Printf.printf "matrix[1][2] = %f\n" (Array2.get matrix 1 2)
*)
实际业务场景:词频统计
let word_freq text =
let tbl = Hashtbl.create 64 in
let words = String.split_on_char ' ' text in
List.iter (fun word ->
let word = String.lowercase_ascii (String.trim word) in
if word <> "" then begin
let count = Hashtbl.find_opt tbl word
|> Option.value ~default:0 in
Hashtbl.replace tbl word (count + 1)
end
) words;
(* 转为列表并排序 *)
Hashtbl.fold (fun word count acc -> (word, count) :: acc) tbl []
|> List.sort (fun (_, a) (_, b) -> compare b a)
let () =
let text = "the quick brown fox jumps over the lazy dog the fox the dog" in
let freq = word_freq text in
Printf.printf "Word frequencies:\n";
List.iter (fun (word, count) ->
Printf.printf " %-10s %d\n" word count
) (List.take 5 freq)