OCaml 教程 / 基本类型与表达式

基本类型与表达式

概述

OCaml 拥有一套精心设计的基本类型系统。所有类型在编译期确定，编译器通过 Hindley-Milner 类型推导算法自动推导大部分类型，开发者无需处处标注。

基本类型一览

类型	关键字	示例	字节大小	说明
整数	`int`	`42`, `-7`, `0xFF`	63 位（64 位平台）	1 位用于 GC 标记
浮点数	`float`	`3.14`, `1e-3`	64 位（IEEE 754）	双精度
布尔	`bool`	`true`, `false`	-	用 0/1 表示
字符	`char`	`'a'`, `'\n'`	8 位	仅支持 Latin-1
字符串	`string`	`"hello"`	可变长度	不可变（默认）
单元	`unit`	`()`	0 位	类似 void，表示副作用

整数类型

OCaml 的 int 类型在 64 位平台上是 63 位有符号整数（最低位被 GC 使用）。

(* 基本整数运算 *)
let a = 10 + 20        (* => 30 *)
let b = 100 - 42       (* => 58 *)
let c = 6 * 7          (* => 42 *)
let d = 100 / 3        (* => 33, 整数除法截断 *)
let e = 100 mod 3      (* => 1, 取模运算 *)

(* 十六进制、八进制、二进制字面量 *)
let hex = 0xFF         (* => 255 *)
let oct = 0o77         (* => 63 *)
let bin = 0b1010       (* => 10 *)

(* 边界值 *)
let max_int = max_int  (* => 4611686018427387903 *)
let min_int = min_int  (* => -4611686018427387904 *)

⚠️ 注意：OCaml 的整数溢出是环绕行为（wrap around），不会抛出异常。这是与 Java、Python 等语言的重要区别。

(* 溢出示例 *)
let overflow = max_int + 1  (* 结果为 min_int，不会报错！ *)

32 位整数与其他整数类型

(* 32 位整数（用于 FFI 和特定场景） *)
let x = Int32.of_int 42       (* int -> int32 *)
let y = Int32.add x 1l        (* 注意 1l 后缀表示 int32 字面量 *)

(* 64 位整数 *)
let z = Int64.of_int 42       (* int -> int64 *)
let w = Int64.add z 1L        (* 1L 后缀表示 int64 字面量 *)

(* 原生整数（与平台字长一致） *)
let n = Nativeint.of_int 42   (* int -> nativeint *)

类型	字面量后缀	模块
`int32`	`l`	`Int32`
`int64`	`L`	`Int64`
`nativeint`	`n`	`Nativeint`

浮点数类型

OCaml 的 float 类型是 64 位 IEEE 754 双精度浮点数。

(* 浮点运算 — 注意使用专门的运算符 *)
let pi = 3.141592653589793
let radius = 5.0
let area = pi *. radius *. radius   (* 用 *. 而不是 * *)
let circumference = 2.0 *. pi *. radius

(* 浮点除法 *)
let half = 1.0 /. 2.0               (* 用 /. 而不是 / *)

(* 浮点数函数 *)
let sqrt_2 = sqrt 2.0                (* 平方根 *)
let log_e = log 2.718281828          (* 自然对数 *)
let power = 2.0 ** 10.0              (* 幂运算，=> 1024.0 *)

(* 特殊浮点值 *)
let nan_value = nan                   (* NaN *)
let inf_value = infinity              (* 正无穷 *)
let neg_inf = neg_infinity            (* 负无穷 *)

⚠️ 注意：OCaml 中整数和浮点数使用不同的运算符，这是初学者最容易犯的错误：

操作	整数	浮点数
加法	`+`	`+.`
减法	`-`	`-.`
乘法	`*`	`*.`
除法	`/`	`/.`
取模	`mod`	`mod_float`
取负	`~-`	`~-.`

浮点运算陷阱

(* 经典的浮点精度问题 *)
let result = 0.1 +. 0.2
(* => 0.30000000000000004，不是 0.3 *)

(* 浮点比较要使用 epsilon *)
let epsilon = 1e-10
let float_equal a b =
  abs_float (a -. b) < epsilon

let _ = float_equal (0.1 +. 0.2) 0.3
(* => true *)

(* 不要用 = 直接比较浮点数 *)
let _ = (0.1 +. 0.2 = 0.3)
(* => false ⚠️ *)

💡 提示：金融计算中应使用定点整数或专用的十进制库，避免浮点精度问题。

布尔类型

(* 布尔字面量 *)
let is_valid = true
let has_error = false

(* 逻辑运算 *)
let a = true && false        (* 逻辑与，短路求值 *)
let b = true || false        (* 逻辑或，短路求值 *)
let c = not true             (* 逻辑非，=> false *)

(* 比较运算（对所有类型有效） *)
let _ = 1 < 2                (* => true *)
let _ = "abc" > "abd"        (* => true，字典序 *)
let _ = 1 = 1                (* 结构相等性 *)
let _ = 1 == 1               (* 物理相等性（引用相等） *)

(* 注意：结构相等 vs 物理相等 *)
let a = [1; 2; 3]
let b = [1; 2; 3]
let _ = (a = b)              (* => true，值相同 *)
let _ = (a == b)             (* => false，不是同一个对象 *)

⚠️ 注意：OCaml 中 = 是结构相等比较（比较值），== 是物理相等比较（比较内存地址）。通常应该使用 =。

字符类型

(* 字符字面量 *)
let c = 'A'
let newline = '\n'
let escaped = '\\'           (* 反斜杠 *)
let quote = '\''             (* 单引号 *)

(* 字符操作 *)
let code = Char.code 'A'     (* => 65, ASCII 码 *)
let char_of = Char.chr 97    (* => 'a' *)
let upper = Char.uppercase_ascii 'a'  (* => 'A' *)
let lower = Char.lowercase_ascii 'B'  (* => 'b' *)
let is_digit = Char.code '0' <= Char.code '5' && Char.code '5' <= Char.code '9'

(* 字符转字符串 *)
let s = String.make 1 'x'   (* => "x" *)

⚠️ 注意：OCaml 的 char 类型仅支持 Latin-1 编码（0-255），不直接支持 Unicode。处理中文等多字节字符需要使用 string 或第三方库（如 Uutf）。

字符串类型

(* 字符串字面量 *)
let greeting = "你好，OCaml！"
let empty = ""
let multiline = "第一行\n第二行\n第三行"

(* 字符串拼接 *)
let full = greeting ^ " " ^ "欢迎！"

(* 字符串长度 *)
let len = String.length greeting   (* 返回字节数，不是字符数 *)

(* 字符串索引 *)
let first = String.get greeting 0  (* 返回 char，不是子串 *)

(* 子串提取 *)
let sub = String.sub greeting 0 2 (* 从索引 0 取 2 个字节 *)

(* 字符串搜索 *)
let contains = String.contains greeting 'O'  (* => true *)

(* 遍历字符串 *)
let () =
  String.iter (fun c -> print_char c; print_char ' ') "hello"
(* => h e l l o *)

(* 格式化（使用 Printf 模块） *)
let msg = Printf.sprintf "Name: %s, Age: %d" "Alice" 30

💡 提示：OCaml 的字符串默认是不可变的（从 4.06 开始）。如需可变字符串，使用 Bytes 模块（详见第 10 篇）。

unit 类型

unit 类型只有一个值 ()，通常用于表示只有副作用的函数。

(* unit 类型函数 — 纯粹为了副作用 *)
let greet name =
  Printf.printf "Hello, %s!\n" name

(* unit 作为参数 *)
let read_line () =
  input_line stdin

(* unit 作为返回值 — 表示函数有副作用 *)
let process_data data =
  (* ... 处理数据 ... *)
  ()   (* 显式返回 unit *)

(* 在 if 表达式中使用 unit *)
let check_age age =
  if age >= 18 then
    print_endline "成年人"
  else
    print_endline "未成年"
(* else 分支自动返回 unit *)

⚠️ 注意：unit 不等同于 void。unit 是一个真正的类型，有唯一的值 ()，可以作为参数和返回值使用。

类型推导

OCaml 编译器使用 Hindley-Milner 类型推导算法，大多数情况下无需标注类型：

(* 编译器自动推导类型 *)
let x = 42                    (* int *)
let y = 3.14                  (* float *)
let b = true                  (* bool *)
let s = "hello"               (* string *)

(* 函数类型推导 *)
let add a b = a + b           (* int -> int -> int *)
let f_to_c f = (f -. 32.0) *. 5.0 /. 9.0  (* float -> float *)

(* 多态函数 *)
let identity x = x            (* 'a -> 'a，任意类型到自身 *)
let compose f g x = f (g x)   (* ('b -> 'c) -> ('a -> 'b) -> 'a -> 'c *)

💡 提示：在 utop 中输入表达式后，- : type = value 中的 type 就是编译器推导出的类型。

let 绑定

let 是 OCaml 中最核心的绑定机制：

(* 基本 let 绑定 *)
let x = 10

(* let ... in 表达式：局部绑定 *)
let result =
  let x = 5 in
  let y = 3 in
  x + y
(* result = 8，x 在此作用域外仍为 10 *)

(* 嵌套 let 绑定 *)
let computation =
  let a = 10 in
  let b = a * 2 in
  let c = b + 1 in
  a + b + c  (* => 10 + 20 + 21 = 51 *)

(* let 绑定是表达式，不是语句 *)
let x = (let y = 5 in y * y)  (* x = 25 *)

⚠️ 注意：let ... in 中的绑定是不可变的。一旦绑定，不能修改。这是函数式编程的核心理念。

类型注解

虽然 OCaml 能自动推导类型，但在某些场景下显式类型注解是必要的：

(* 函数签名注解 *)
let add (a : int) (b : int) : int = a + b

(* 变量类型注解 *)
let x : int = 42
let s : string = "hello"

(* 让类型更精确 *)
let read_input () : string =
  input_line stdin

(* 歧义消解 *)
let float_of_int_and_back (x : int) : float =
  float_of_int x +. 0.5

(* 模块签名中的类型注解 *)
let max (a : int) (b : int) : int =
  if a > b then a else b

💡 提示：建议在函数的参数和返回值上添加类型注解，这既是一种文档，也能帮助编译器给出更精确的错误信息。

if-then-else 表达式

在 OCaml 中，if-then-else 是表达式，不是语句，它会返回一个值：

(* 基本用法 *)
let abs_value x =
  if x >= 0 then x
  else -x

(* if 表达式的结果可以被绑定 *)
let grade score =
  if score >= 90 then "A"
  else if score >= 80 then "B"
  else if score >= 70 then "C"
  else if score >= 60 then "D"
  else "F"

(* if 作为表达式使用 *)
let max a b = if a > b then a else b

(* 省略 else 分支时，隐含 else () *)
let print_if_positive x =
  if x > 0 then print_int x
(* 隐含 else () *)

⚠️ 注意：当 if 没有 else 分支时，then 分支的类型必须是 unit。否则编译器会报错，因为缺少 else 分支无法产生一致的类型。

begin-end 块

begin ... end 用于在需要单个表达式的地方执行多个表达式：

(* if 的 then 分支需要执行多条语句 *)
let check_and_print x =
  if x > 0 then begin
    print_string "正数: ";
    print_int x;
    print_newline ()
  end else begin
    print_string "非正数: ";
    print_int x;
    print_newline ()
  end

(* 等价于使用括号 *)
let check_and_print' x =
  if x > 0 then (
    print_string "正数: ";
    print_int x;
    print_newline ()
  ) else (
    print_string "非正数: ";
    print_int x;
    print_newline ()
  )

💡 提示：begin ... end 和 (...) 完全等价。社区风格更推荐使用括号，但 begin-end 在复杂的嵌套表达式中可读性更好。

类型转换

OCaml 是强类型语言，不同类型之间不会隐式转换，必须显式转换：

(* int -> float *)
let x = float_of_int 42         (* => 42.0 *)
let y = Int.to_float 42         (* 同上，模块风格 *)

(* float -> int（截断，不是四舍五入） *)
let a = int_of_float 3.9        (* => 3，不是 4！ *)
let b = Int.of_float 3.9        (* 同上 *)

(* 字符串转换 *)
let s1 = string_of_int 42       (* => "42" *)
let s2 = string_of_float 3.14   (* => "3.14" *)
let s3 = string_of_bool true    (* => "true" *)

let n1 = int_of_string "42"     (* => 42 *)
let f1 = float_of_string "3.14" (* => 3.14 *)

(* char <-> int *)
let c = Char.chr 65             (* => 'A' *)
let n = Char.code 'A'           (* => 65 *)

(* bool <-> string *)
let b1 = bool_of_string "true"  (* => true *)

⚠️ 注意：int_of_float 是截断（truncate），不是四舍五入。如需四舍五入，使用 Float.round 或 Float.iround。

实用示例：温度转换器

(* temperature.ml — 温度转换器 *)

let celsius_to_fahrenheit (c : float) : float =
  c *. 9.0 /. 5.0 +. 32.0

let fahrenheit_to_celsius (f : float) : float =
  (f -. 32.0) *. 5.0 /. 9.0

let format_temperature (value : float) (unit : string) : string =
  Printf.sprintf "%.2f°%s" value unit

let () =
  let temp_c = 37.0 in
  let temp_f = celsius_to_fahrenheit temp_c in
  Printf.printf "体温: %s = %s\n"
    (format_temperature temp_c "C")
    (format_temperature temp_f "F");

  let boiling_f = 212.0 in
  let boiling_c = fahrenheit_to_celsius boiling_f in
  Printf.printf "沸点: %s = %s\n"
    (format_temperature boiling_f "F")
    (format_temperature boiling_c "C")

(* 输出:
   体温: 37.00°C = 98.60°F
   沸点: 212.00°F = 100.00°C
*)

业务场景

数据验证：利用类型系统确保输入合法性
配置解析：使用类型转换函数将字符串配置转为具体类型
数值计算：整数用于精确计数，浮点数用于科学计算
日志格式化：使用 Printf.sprintf 构造结构化日志