强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

OCaml 教程 / OCaml 哈希表与数组

OCaml 哈希表与数组

本文全面介绍 OCaml 中的可变数据结构:Array(数组)、Hashtbl(哈希表)、Queue(队列)、Stack(栈),以及惰性序列 Seq。每种数据结构都配有性能分析和使用场景。

Array 模块基础

OCaml 的数组是固定大小、可变的同类型元素序列。

(* 创建数组 *)
let arr1 = [| 1; 2; 3; 4; 5 |]       (* 字面量 *)
let arr2 = Array.make 5 0             (* 5 个 0 *)
let arr3 = Array.init 5 (fun i -> i * i)  (* [0; 1; 4; 9; 16] *)

(* 访问和修改 *)
let () =
  arr2.(0) <- 10;     (* 修改第一个元素 *)
  arr2.(1) <- 20;
  Printf.printf "arr2[0] = %d\n" arr2.(0);
  Printf.printf "arr1 length = %d\n" (Array.length arr1);
  Printf.printf "arr3[3] = %d\n" arr3.(3)

Array 常用函数

函数类型说明
makeint -> 'a -> 'a array创建定长数组
initint -> (int -> 'a) -> 'a array用函数初始化
length'a array -> int数组长度
get / .(i)'a array -> int -> 'a获取元素
set / .(i) <-'a array -> int -> 'a -> unit设置元素
sub'a array -> int -> int -> 'a array子数组
copy'a array -> 'a array浅拷贝
fill'a array -> int -> int -> 'a -> unit填充
blit'a array -> int -> 'a array -> int -> int -> unit批量复制
to_list'a array -> 'a list转为列表
of_list'a list -> 'a array从列表创建
map('a -> 'b) -> 'a array -> 'b array映射
mapi(int -> 'a -> 'b) -> 'a array -> 'b array带索引映射
iter('a -> unit) -> 'a array -> unit遍历
iteri(int -> 'a -> unit) -> 'a array -> unit带索引遍历
fold_left('b -> 'a -> 'b) -> 'b -> 'a array -> 'b左折叠
fold_right('a -> 'b -> 'b) -> 'a array -> 'b -> 'b右折叠
sort('a -> 'a -> int) -> 'a array -> unit原地排序
stable_sort('a -> 'a -> int) -> 'a array -> unit稳定排序

数组切片与子数组

let () =
  let arr = [| 0; 1; 2; 3; 4; 5; 6; 7; 8; 9 |] in
  let sub = Array.sub arr 2 5 in  (* 从索引 2 取 5 个 *)
  Array.iter (Printf.printf "%d ") sub;
  print_newline ()
  (* 输出: 2 3 4 5 6 *)

(* Array.blit: 在数组间复制 *)
let () =
  let src = [| 10; 20; 30 |] in
  let dst = Array.make 7 0 in
  Array.blit src 0 dst 2 3;
  Array.iter (Printf.printf "%d ") dst;
  print_newline ()
  (* 输出: 0 0 10 20 30 0 0 *)

可变数组与不可变

(* 可变修改 *)
let () =
  let arr = [| 1; 2; 3 |] in
  arr.(1) <- 20;              (* 原地修改 *)
  Printf.printf "arr[1] = %d\n" arr.(1)

(* 不可变风格:生成新数组 *)
let map_in_place f arr =
  Array.iteri (fun i x -> arr.(i) <- f x) arr

let () =
  let arr = [| 1; 2; 3; 4; 5 |] in
  map_in_place (fun x -> x * 2) arr;
  Array.iter (Printf.printf "%d ") arr;
  print_newline ()
  (* 输出: 2 4 6 8 10 *)

(* 使用 Array.map 创建新数组(不修改原数组) *)
let () =
  let original = [| 1; 2; 3 |] in
  let doubled = Array.map (fun x -> x * 2) original in
  Printf.printf "Original: ";
  Array.iter (Printf.printf "%d ") original;
  Printf.printf "\nDoubled: ";
  Array.iter (Printf.printf "%d ") doubled;
  print_newline ()

⚠️ 注意:数组是可变的,多个引用指向同一数组时,修改会影响所有引用。如果需要不可变语义,使用 Array.map 创建新数组或使用 list

Hashtbl 模块基础

Hashtbl 实现了哈希表(字典),键值对存储,O(1) 平均查找。

(* 创建哈希表 *)
let tbl = Hashtbl.create 16    (* 初始容量 *)

(* 添加元素 *)
let () =
  Hashtbl.add tbl "name" "Alice";
  Hashtbl.add tbl "age" "30";
  Hashtbl.add tbl "city" "Beijing";

  (* 查找元素 *)
  Printf.printf "Name: %s\n" (Hashtbl.find tbl "name");

  (* 安全查找 *)
  match Hashtbl.find_opt tbl "email" with
  | Some email -> Printf.printf "Email: %s\n" email
  | None -> Printf.printf "Email not found\n";

  (* 遍历 *)
  Hashtbl.iter (fun k v ->
    Printf.printf "  %s = %s\n" k v
  ) tbl

Hashtbl 常用函数

函数类型说明
createint -> ('a, 'b) t创建(初始容量)
add('a, 'b) t -> 'a -> 'b -> unit添加(允许重复键)
replace('a, 'b) t -> 'a -> 'b -> unit替换或添加
find('a, 'b) t -> 'a -> 'b查找(不存在抛异常)
find_opt('a, 'b) t -> 'a -> 'b option安全查找
find_all('a, 'b) t -> 'a -> 'b list查找所有(同键多值)
mem('a, 'b) t -> 'a -> bool键是否存在
remove('a, 'b) t -> 'a -> unit删除键
length('a, 'b) t -> int键值对数量
iter('a -> 'b -> unit) -> ('a, 'b) t -> unit遍历
fold('a -> 'b -> 'c -> 'c) -> ('a, 'b) t -> 'c -> 'c折叠
map('a -> 'b -> 'c) -> ('a, 'b) t -> ('a, 'c) t映射值
filter_map_inplace('a -> 'b -> 'b option) -> ('a, 'b) t -> unit就地过滤
to_seq('a, 'b) t -> ('a * 'b) Seq.t转为惰性序列
of_seq('a * 'b) Seq.t -> ('a, 'b) t从序列创建

add vs replace

let () =
  let tbl = Hashtbl.create 16 in
  Hashtbl.add tbl "key" "first";
  Hashtbl.add tbl "key" "second";
  Printf.printf "All values for 'key':\n";
  Hashtbl.find_all tbl "key" |> List.iter (Printf.printf "  %s\n");
  (* 两个值都存在 *)

  let tbl2 = Hashtbl.create 16 in
  Hashtbl.replace tbl2 "key" "first";
  Hashtbl.replace tbl2 "key" "second";
  Printf.printf "After replace: %s\n" (Hashtbl.find tbl2 "key")
  (* 只有 "second" *)

💡 提示add 允许同键多值(用 find_all 查找所有),replace 覆盖旧值。大多数场景应使用 replace

哈希函数

(* 默认使用 polymorphic hash *)
let tbl : (string, int) Hashtbl.t = Hashtbl.create 16

(* 自定义哈希函数 *)
module StringHash = Hashtbl.Make(struct
  type t = string
  let equal = String.equal
  let hash s =
    let h = ref 0 in
    String.iter (fun c ->
      h := (!h * 31 + Char.code c) land max_int
    ) s;
    !h
end)

let () =
  let tbl = StringHash.create 16 in
  StringHash.add tbl "hello" 1;
  StringHash.add tbl "world" 2;
  Printf.printf "hello: %d\n" (StringHash.find tbl "hello")

哈希表性能

操作平均时间复杂度最坏情况
插入 addO(1)O(n)
查找 findO(1)O(n)
删除 removeO(1)O(n)
遍历 iterO(n)O(n)

⚠️ 注意:如果哈希函数分布不均匀,所有操作可能退化到 O(n)。标准库的默认哈希函数对大多数场景足够好。

Queue 模块

FIFO(先进先出)队列。

let () =
  let q = Queue.create () in
  Queue.push 1 q;
  Queue.push 2 q;
  Queue.push 3 q;

  Printf.printf "Length: %d\n" (Queue.length q);
  Printf.printf "Pop: %d\n" (Queue.pop q);    (* 1 *)
  Printf.printf "Pop: %d\n" (Queue.pop q);    (* 2 *)
  Printf.printf "Peek: %d\n" (Queue.peek q);  (* 3 *)
  Printf.printf "Length: %d\n" (Queue.length q)

(* Queue 的常用函数 *)
(*
  Queue.create : unit -> 'a t
  Queue.push   : 'a -> 'a t -> unit
  Queue.pop    : 'a t -> 'a          (* 抛出 Queue.Empty *)
  Queue.peek   : 'a t -> 'a          (* 抛出 Queue.Empty *)
  Queue.pop_opt : 'a t -> 'a option
  Queue.peek_opt : 'a t -> 'a option
  Queue.length : 'a t -> int
  Queue.is_empty : 'a t -> bool
  Queue.iter   : ('a -> unit) -> 'a t -> unit
  Queue.fold   : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b
  Queue.clear  : 'a t -> unit
  Queue.transfer : 'a t -> 'a t -> unit
*)

Stack 模块

LIFO(后进先出)栈。

let () =
  let s = Stack.create () in
  Stack.push 1 s;
  Stack.push 2 s;
  Stack.push 3 s;

  Printf.printf "Length: %d\n" (Stack.length s);
  Printf.printf "Pop: %d\n" (Stack.pop s);    (* 3 *)
  Printf.printf "Pop: %d\n" (Stack.pop s);    (* 2 *)
  Printf.printf "Top: %d\n" (Stack.top s);    (* 1 *)
  Printf.printf "Length: %d\n" (Stack.length s)

(* 用栈实现括号匹配 *)
let check_parens s =
  let stack = Stack.create () in
  let is_match open_c close_c =
    match open_c, close_c with
    | '(', ')' | '[', ']' | '{', '}' -> true
    | _ -> false
  in
  try
    String.iter (fun c ->
      match c with
      | '(' | '[' | '{' -> Stack.push c stack
      | ')' | ']' | '}' ->
        if Stack.is_empty stack || not (is_match (Stack.pop stack) c) then
          raise Exit
      | _ -> ()
    ) s;
    Stack.is_empty stack
  with
  | Exit -> false

let () =
  Printf.printf "(()): %b\n" (check_parens "(())");
  Printf.printf "([{}]): %b\n" (check_parens "([{}])");
  Printf.printf "((()): %b\n" (check_parens "((())")

Seq 惰性序列

Seq 是 OCaml 4.07+ 引入的惰性序列,按需计算元素。

(* 创建序列 *)
let naturals () =
  let rec aux n () = Seq.Cons (n, aux (n + 1)) in
  aux 0

let () =
  naturals ()
  |> Seq.take 10
  |> Seq.iter (Printf.printf "%d ");
  print_newline ()
  (* 输出: 0 1 2 3 4 5 6 7 8 9 *)

(* 序列操作是惰性的 *)
let squares =
  naturals ()
  |> Seq.map (fun x -> x * x)
  |> Seq.filter (fun x -> x mod 2 = 0)

let () =
  squares
  |> Seq.take 5
  |> Seq.iter (Printf.printf "%d ");
  print_newline ()
  (* 输出: 0 4 16 36 64 *)

(* 无限斐波那契序列 *)
let fib () =
  let rec aux a b () = Seq.Cons (a, aux b (a + b)) in
  aux 0 1

let () =
  fib ()
  |> Seq.take 15
  |> Seq.iter (Printf.printf "%d ");
  print_newline ()

Seq 常用函数

函数说明
Seq.map映射每个元素
Seq.filter过滤元素
Seq.take取前 n 个
Seq.drop跳过前 n 个
Seq.fold_left累积折叠
Seq.iter遍历执行
Seq.unfold展开生成序列
Seq.append连接两个序列
Seq.concat连接序列的序列
Seq.for_all全称检查
Seq.exists存在检查

数据结构选择指南

需求推荐数据结构原因
索引访问ArrayO(1) 随机访问
键值查找HashtblO(1) 平均查找
有序映射MapO(log n) 有序遍历
FIFO 队列QueueO(1) 入队出队
LIFO 栈StackO(1) 压栈弹栈
不可变列表list不可变,安全
惰性计算Seq按需计算,节省内存
大数值数组Bigarray与 C 互操作

Bigarray 简介

Bigarray 用于处理大型数值数组,内存由 C 管理,适合科学计算和与 C 库互操作。

(*
open Bigarray

(* 创建一维 Bigarray *)
let arr = Array1.create int c_layout 5 in
Array1.set arr 0 10;
Array1.set arr 1 20;
Array1.set arr 2 30;
Printf.printf "arr[1] = %d\n" (Array1.get arr 1);
Printf.printf "Length: %d\n" (Array1.dim arr)

(* 创建二维 Bigarray *)
let matrix = Array2.create float64 c_layout 3 4 in
Array2.set matrix 1 2 3.14;
Printf.printf "matrix[1][2] = %f\n" (Array2.get matrix 1 2)
*)

实际业务场景:词频统计

let word_freq text =
  let tbl = Hashtbl.create 64 in
  let words = String.split_on_char ' ' text in
  List.iter (fun word ->
    let word = String.lowercase_ascii (String.trim word) in
    if word <> "" then begin
      let count = Hashtbl.find_opt tbl word
        |> Option.value ~default:0 in
      Hashtbl.replace tbl word (count + 1)
    end
  ) words;
  (* 转为列表并排序 *)
  Hashtbl.fold (fun word count acc -> (word, count) :: acc) tbl []
  |> List.sort (fun (_, a) (_, b) -> compare b a)

let () =
  let text = "the quick brown fox jumps over the lazy dog the fox the dog" in
  let freq = word_freq text in
  Printf.printf "Word frequencies:\n";
  List.iter (fun (word, count) ->
    Printf.printf "  %-10s %d\n" word count
  ) (List.take 5 freq)

扩展阅读