OCaml 教程 / OCaml 文件操作
OCaml 文件操作
文件操作是实际编程中最常见的任务之一。本文全面讲解 OCaml 中的文件读写、目录操作、路径处理,以及实战中的配置文件解析和日志处理。
文件读写基础
open_in / open_out
(* 写入文件 *)
let write_file filename content =
let oc = open_out filename in
output_string oc content;
close_out oc
(* 读取文件 *)
let read_file filename =
let ic = open_in filename in
let buf = Buffer.create 256 in
let rec loop () =
match input_line ic with
| line ->
Buffer.add_string buf line;
Buffer.add_char buf '\n';
loop ()
| exception End_of_file ->
close_in ic;
Buffer.contents buf
in
loop ()
let () =
write_file "/tmp/test_ocaml.txt" "Hello\nWorld\nOCaml\n";
let content = read_file "/tmp/test_ocaml.txt" in
Printf.printf "Content:\n%s" content
文件写入方式
| 函数 | 类型 | 说明 |
|---|---|---|
open_out | string -> out_channel | 以文本模式打开(覆盖) |
open_out_bin | string -> out_channel | 以二进制模式打开 |
open_out_gen | open_flag list -> int -> string -> out_channel | 通用打开 |
output_string | out_channel -> string -> unit | 写入字符串 |
output_bytes | out_channel -> bytes -> unit | 写入字节 |
output_char | out_channel -> char -> unit | 写入字符 |
output_substring | out_channel -> string -> int -> int -> unit | 写入子串 |
flush | out_channel -> unit | 刷新缓冲区 |
close_out | out_channel -> unit | 关闭通道 |
追加写入
(* 追加模式 *)
let append_file filename content =
let oc = open_out_gen [Open_wronly; Open_creat; Open_append] 0o644 filename in
output_string oc content;
close_out oc
let () =
write_file "/tmp/append_test.txt" "Line 1\n";
append_file "/tmp/append_test.txt" "Line 2\n";
append_file "/tmp/append_test.txt" "Line 3\n";
print_string (read_file "/tmp/append_test.txt")
通道关闭与异常安全
⚠️ 注意:如果在
open_in和close_in之间发生异常,通道可能不会被关闭,导致资源泄漏。应使用Fun.protect。
(* 不安全的做法 *)
let unsafe_read filename =
let ic = open_in filename in
let line = input_line ic in (* 可能抛出异常 *)
close_in ic; (* 如果上面异常,这行不会执行 *)
line
(* 安全的做法:使用保护器 *)
let with_open_in filename f =
let ic = open_in filename in
Fun.protect ~finally:(fun () -> close_in ic) (fun () -> f ic)
let with_open_out filename f =
let oc = open_out filename in
Fun.protect ~finally:(fun () -> close_out oc) (fun () -> f oc)
(* 使用保护器 *)
let safe_read_first_line filename =
with_open_in filename (fun ic -> input_line ic)
let safe_write filename content =
with_open_out filename (fun oc ->
output_string oc content
)
(* 更完整的保护器,处理关闭异常 *)
let with_open_in_full filename f =
let ic = open_in filename in
match f ic with
| result ->
close_in ic;
result
| exception exn ->
(try close_in ic with _ -> ());
raise exn
使用 Scanf 进行文件解析
Scanf 模块提供了结构化的输入解析。
(* 解析格式化输入 *)
let parse_numbers filename =
with_open_in filename (fun ic ->
let scanner = Scanf.Scanning.from_channel ic in
let rec loop acc =
try
Scanf.bscanf scanner "%d %d\n" (fun a b ->
loop ((a, b) :: acc)
)
with
| End_of_file -> List.rev acc
| Scanf.Scan_failure msg ->
Printf.eprintf "Parse error: %s\n" msg;
List.rev acc
in
loop []
)
(* 使用示例 *)
let () =
safe_write "/tmp/numbers.txt" "1 2\n3 4\n5 6\n";
let pairs = parse_numbers "/tmp/numbers.txt" in
List.iter (fun (a, b) ->
Printf.printf "(%d, %d)\n" a b
) pairs
(* 从字符串解析 *)
let parse_from_string s =
Scanf.sscanf s "Name: %s, Age: %d" (fun name age ->
(name, age)
)
let () =
let (name, age) = parse_from_string "Name: Alice, Age: 30" in
Printf.printf "%s is %d years old\n" name age
目录操作
Sys 模块
(* 文件存在性检查 *)
let file_exists path = Sys.file_exists path
(* 判断是文件还是目录 *)
let is_directory path =
Sys.file_exists path && Sys.is_directory path
(* 获取当前工作目录 *)
let cwd = Sys.getcwd ()
(* 获取文件大小 *)
let file_size path = (Unix.stat path).st_size
(* 列出目录内容 *)
let list_dir path =
let entries = Sys.readdir path in
Array.to_list entries
(* 递归列出所有文件 *)
let rec list_files_recursive path =
let entries = Sys.readdir path in
Array.fold_left (fun acc entry ->
let full_path = Filename.concat path entry in
if Sys.is_directory full_path then
acc @ list_files_recursive full_path
else
acc @ [full_path]
) [] entries
let () =
Printf.printf "CWD: %s\n" (Sys.getcwd ());
Printf.printf "File exists: %b\n" (file_exists "/tmp");
Printf.printf "Is dir: %b\n" (is_directory "/tmp")
Sys 文件操作
| 函数 | 说明 |
|---|---|
Sys.file_exists | 文件是否存在 |
Sys.is_directory | 是否为目录 |
Sys.remove | 删除文件 |
Sys.rename | 重命名/移动 |
Sys.getcwd | 获取当前目录 |
Sys.chdir | 改变当前目录 |
Sys.readdir | 列出目录内容 |
Sys.mkdir | 创建目录(OCaml 4.12+) |
Sys.rmdir | 删除目录(OCaml 4.12+) |
Unix 模块目录操作
(*
open Unix
let create_dir path perm = mkdir path perm
let remove_dir path = rmdir path
let get_stat path = stat path
*)
文件路径处理
Filename 模块提供跨平台的路径操作。
(* 路径拼接 *)
let path = Filename.concat "/home/user" "documents"
(* "/home/user/documents" *)
(* 获取文件名 *)
let name = Filename.basename "/home/user/test.ml"
(* "test.ml" *)
(* 获取目录名 *)
let dir = Filename.dirname "/home/user/test.ml"
(* "/home/user" *)
(* 获取扩展名 *)
let ext = Filename.extension "test.ml"
(* ".ml" *)
(* 去掉扩展名 *)
let base = Filename.remove_extension "test.ml"
(* "test" *)
(* 检查是否为绝对路径 *)
let is_abs = Filename.is_relative "/home/user" (* false *)
let is_rel = Filename.is_relative "test.ml" (* true *)
(* 临时文件名 *)
let temp = Filename.temp_file "ocaml_" ".tmp"
let () =
Printf.printf "Path: %s\n" path;
Printf.printf "Name: %s\n" name;
Printf.printf "Dir: %s\n" dir;
Printf.printf "Ext: %s\n" ext;
Printf.printf "Base: %s\n" base;
Printf.printf "Temp: %s\n" temp
临时文件处理
(* 创建并使用临时文件 *)
let with_temp_file prefix suffix f =
let filename = Filename.temp_file prefix suffix in
Fun.protect
~finally:(fun () ->
try Sys.remove filename with _ -> ())
(fun () -> f filename)
let () =
with_temp_file "test_" ".txt" (fun filename ->
safe_write filename "Temporary content\n";
let content = read_file filename in
Printf.printf "Temp content: %s" content
);
(* 临时文件已被自动删除 *)
Printf.printf "Temp file cleaned up\n"
(* 临时目录 *)
let with_temp_dir prefix f =
let dir = Filename.temp_file prefix "" in
Sys.remove dir; (* 删除同名文件(如果存在) *)
Unix.mkdir dir 0o755;
Fun.protect
~finally:(fun () ->
(* 递归删除临时目录的简化版本 *)
try Unix.rmdir dir with _ -> ())
(fun () -> f dir)
配置文件解析实战
(* 简单的 key=value 配置文件解析 *)
module Config = struct
type t = (string * string) list
let parse content : t =
String.split_on_char '\n' content
|> List.map String.trim
|> List.filter (fun line ->
line <> "" && not (String.length line > 0 && line.[0] = '#'))
|> List.filter_map (fun line ->
match String.index_opt line '=' with
| None -> None
| Some idx ->
let key = String.trim (String.sub line 0 idx) in
let value = String.trim (String.sub line (idx + 1)
(String.length line - idx - 1)) in
Some (key, value))
let get config key = List.assoc_opt key config
let get_or ~default config key =
Option.value ~default (get config key)
let load filename = parse (read_file filename)
let save filename config =
let content = List.map (fun (k, v) ->
Printf.sprintf "%s = %s" k v
) config |> String.concat "\n" in
safe_write filename content
end
let () =
let config_content = {|
# Database configuration
host = localhost
port = 5432
database = myapp
user = admin
# App settings
debug = true
max_connections = 100
|} in
let config = Config.parse config_content in
Printf.printf "Host: %s\n" (Config.get_or ~default:"127.0.0.1" config "host");
Printf.printf "Port: %s\n" (Config.get_or ~default:"3306" config "port");
Printf.printf "Debug: %s\n" (Config.get_or ~default:"false" config "debug")
日志文件处理实战
type log_entry = {
timestamp : string;
level : string;
message : string;
}
let parse_log_line line =
(* 格式: [2026-05-11 10:30:00] [INFO] message *)
try
Scanf.sscanf line "[%s %s] [%s] %[^\n]"
(fun date time level msg ->
Some {
timestamp = date ^ " " ^ time;
level;
message = msg;
})
with
| _ -> None
let filter_logs_by_level level entries =
List.filter (fun e -> e.level = level) entries
let count_by_level entries =
let tbl = Hashtbl.create 4 in
List.iter (fun e ->
let count = Hashtbl.find_opt tbl e.level
|> Option.value ~default:0 in
Hashtbl.replace tbl e.level (count + 1)
) entries;
Hashtbl.fold (fun level count acc -> (level, count) :: acc) tbl []
let read_log_file filename =
with_open_in filename (fun ic ->
let rec loop acc =
match input_line ic with
| line ->
let entry = parse_log_line line in
let acc = match entry with
| Some e -> e :: acc
| None -> acc
in
loop acc
| exception End_of_file -> List.rev acc
in
loop []
)
let () =
let log_content = {|
[2026-05-11 10:30:00] [INFO] Application started
[2026-05-11 10:30:01] [DEBUG] Loading configuration
[2026-05-11 10:30:02] [INFO] Configuration loaded
[2026-05-11 10:30:05] [WARN] Low memory warning
[2026-05-11 10:30:10] [ERROR] Database connection failed
[2026-05-11 10:30:11] [INFO] Retrying connection
[2026-05-11 10:30:15] [INFO] Connected to database
|} in
with_temp_file "log_" ".log" (fun filename ->
safe_write filename (String.trim log_content);
let entries = read_log_file filename in
Printf.printf "Total entries: %d\n" (List.length entries);
let errors = filter_logs_by_level "ERROR" entries in
Printf.printf "Errors: %d\n" (List.length errors);
List.iter (fun (level, count) ->
Printf.printf " %s: %d\n" level count
) (count_by_level entries)
)
JSON 文件读写
使用 Yojson 库(需要 opam install yojson):
(*
(* 需要安装 yojson: opam install yojson *)
open Yojson.Basic.Util
let read_json filename =
Yojson.Basic.from_file filename
let get_string_field json field =
json |> member field |> to_string
let get_int_field json field =
json |> member field |> to_int
let parse_users json =
json |> member "users" |> to_list |> List.map (fun user ->
let name = user |> member "name" |> to_string in
let age = user |> member "age" |> to_int in
let email = user |> member "email" |> to_string in
(name, age, email)
)
let json_content = {|
{
"users": [
{"name": "Alice", "age": 30, "email": "[email protected]"},
{"name": "Bob", "age": 25, "email": "[email protected]"}
]
}
|}
let () =
let json = Yojson.Basic.from_string json_content in
let users = parse_users json in
List.iter (fun (name, age, email) ->
Printf.printf "%s (%d) - %s\n" name age email
) users
*)
💡 提示:如果没有第三方 JSON 库,也可以使用简单的手写解析器或
Scanf来处理结构化的 JSON 数据。但推荐使用 Yojson 这样成熟的库。