强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

OCaml 教程 / OCaml 文件操作

OCaml 文件操作

文件操作是实际编程中最常见的任务之一。本文全面讲解 OCaml 中的文件读写、目录操作、路径处理,以及实战中的配置文件解析和日志处理。

文件读写基础

open_in / open_out

(* 写入文件 *)
let write_file filename content =
  let oc = open_out filename in
  output_string oc content;
  close_out oc

(* 读取文件 *)
let read_file filename =
  let ic = open_in filename in
  let buf = Buffer.create 256 in
  let rec loop () =
    match input_line ic with
    | line ->
      Buffer.add_string buf line;
      Buffer.add_char buf '\n';
      loop ()
    | exception End_of_file ->
      close_in ic;
      Buffer.contents buf
  in
  loop ()

let () =
  write_file "/tmp/test_ocaml.txt" "Hello\nWorld\nOCaml\n";
  let content = read_file "/tmp/test_ocaml.txt" in
  Printf.printf "Content:\n%s" content

文件写入方式

函数类型说明
open_outstring -> out_channel以文本模式打开(覆盖)
open_out_binstring -> out_channel以二进制模式打开
open_out_genopen_flag list -> int -> string -> out_channel通用打开
output_stringout_channel -> string -> unit写入字符串
output_bytesout_channel -> bytes -> unit写入字节
output_charout_channel -> char -> unit写入字符
output_substringout_channel -> string -> int -> int -> unit写入子串
flushout_channel -> unit刷新缓冲区
close_outout_channel -> unit关闭通道

追加写入

(* 追加模式 *)
let append_file filename content =
  let oc = open_out_gen [Open_wronly; Open_creat; Open_append] 0o644 filename in
  output_string oc content;
  close_out oc

let () =
  write_file "/tmp/append_test.txt" "Line 1\n";
  append_file "/tmp/append_test.txt" "Line 2\n";
  append_file "/tmp/append_test.txt" "Line 3\n";
  print_string (read_file "/tmp/append_test.txt")

通道关闭与异常安全

⚠️ 注意:如果在 open_inclose_in 之间发生异常,通道可能不会被关闭,导致资源泄漏。应使用 Fun.protect

(* 不安全的做法 *)
let unsafe_read filename =
  let ic = open_in filename in
  let line = input_line ic in  (* 可能抛出异常 *)
  close_in ic;                  (* 如果上面异常,这行不会执行 *)
  line

(* 安全的做法:使用保护器 *)
let with_open_in filename f =
  let ic = open_in filename in
  Fun.protect ~finally:(fun () -> close_in ic) (fun () -> f ic)

let with_open_out filename f =
  let oc = open_out filename in
  Fun.protect ~finally:(fun () -> close_out oc) (fun () -> f oc)

(* 使用保护器 *)
let safe_read_first_line filename =
  with_open_in filename (fun ic -> input_line ic)

let safe_write filename content =
  with_open_out filename (fun oc ->
    output_string oc content
  )

(* 更完整的保护器,处理关闭异常 *)
let with_open_in_full filename f =
  let ic = open_in filename in
  match f ic with
  | result ->
    close_in ic;
    result
  | exception exn ->
    (try close_in ic with _ -> ());
    raise exn

使用 Scanf 进行文件解析

Scanf 模块提供了结构化的输入解析。

(* 解析格式化输入 *)
let parse_numbers filename =
  with_open_in filename (fun ic ->
    let scanner = Scanf.Scanning.from_channel ic in
    let rec loop acc =
      try
        Scanf.bscanf scanner "%d %d\n" (fun a b ->
          loop ((a, b) :: acc)
        )
      with
      | End_of_file -> List.rev acc
      | Scanf.Scan_failure msg ->
        Printf.eprintf "Parse error: %s\n" msg;
        List.rev acc
    in
    loop []
  )

(* 使用示例 *)
let () =
  safe_write "/tmp/numbers.txt" "1 2\n3 4\n5 6\n";
  let pairs = parse_numbers "/tmp/numbers.txt" in
  List.iter (fun (a, b) ->
    Printf.printf "(%d, %d)\n" a b
  ) pairs

(* 从字符串解析 *)
let parse_from_string s =
  Scanf.sscanf s "Name: %s, Age: %d" (fun name age ->
    (name, age)
  )

let () =
  let (name, age) = parse_from_string "Name: Alice, Age: 30" in
  Printf.printf "%s is %d years old\n" name age

目录操作

Sys 模块

(* 文件存在性检查 *)
let file_exists path = Sys.file_exists path

(* 判断是文件还是目录 *)
let is_directory path =
  Sys.file_exists path && Sys.is_directory path

(* 获取当前工作目录 *)
let cwd = Sys.getcwd ()

(* 获取文件大小 *)
let file_size path = (Unix.stat path).st_size

(* 列出目录内容 *)
let list_dir path =
  let entries = Sys.readdir path in
  Array.to_list entries

(* 递归列出所有文件 *)
let rec list_files_recursive path =
  let entries = Sys.readdir path in
  Array.fold_left (fun acc entry ->
    let full_path = Filename.concat path entry in
    if Sys.is_directory full_path then
      acc @ list_files_recursive full_path
    else
      acc @ [full_path]
  ) [] entries

let () =
  Printf.printf "CWD: %s\n" (Sys.getcwd ());
  Printf.printf "File exists: %b\n" (file_exists "/tmp");
  Printf.printf "Is dir: %b\n" (is_directory "/tmp")

Sys 文件操作

函数说明
Sys.file_exists文件是否存在
Sys.is_directory是否为目录
Sys.remove删除文件
Sys.rename重命名/移动
Sys.getcwd获取当前目录
Sys.chdir改变当前目录
Sys.readdir列出目录内容
Sys.mkdir创建目录(OCaml 4.12+)
Sys.rmdir删除目录(OCaml 4.12+)

Unix 模块目录操作

(*
open Unix

let create_dir path perm = mkdir path perm
let remove_dir path = rmdir path
let get_stat path = stat path
*)

文件路径处理

Filename 模块提供跨平台的路径操作。

(* 路径拼接 *)
let path = Filename.concat "/home/user" "documents"
(* "/home/user/documents" *)

(* 获取文件名 *)
let name = Filename.basename "/home/user/test.ml"
(* "test.ml" *)

(* 获取目录名 *)
let dir = Filename.dirname "/home/user/test.ml"
(* "/home/user" *)

(* 获取扩展名 *)
let ext = Filename.extension "test.ml"
(* ".ml" *)

(* 去掉扩展名 *)
let base = Filename.remove_extension "test.ml"
(* "test" *)

(* 检查是否为绝对路径 *)
let is_abs = Filename.is_relative "/home/user"   (* false *)
let is_rel = Filename.is_relative "test.ml"      (* true *)

(* 临时文件名 *)
let temp = Filename.temp_file "ocaml_" ".tmp"

let () =
  Printf.printf "Path: %s\n" path;
  Printf.printf "Name: %s\n" name;
  Printf.printf "Dir: %s\n" dir;
  Printf.printf "Ext: %s\n" ext;
  Printf.printf "Base: %s\n" base;
  Printf.printf "Temp: %s\n" temp

临时文件处理

(* 创建并使用临时文件 *)
let with_temp_file prefix suffix f =
  let filename = Filename.temp_file prefix suffix in
  Fun.protect
    ~finally:(fun () ->
      try Sys.remove filename with _ -> ())
    (fun () -> f filename)

let () =
  with_temp_file "test_" ".txt" (fun filename ->
    safe_write filename "Temporary content\n";
    let content = read_file filename in
    Printf.printf "Temp content: %s" content
  );
  (* 临时文件已被自动删除 *)
  Printf.printf "Temp file cleaned up\n"

(* 临时目录 *)
let with_temp_dir prefix f =
  let dir = Filename.temp_file prefix "" in
  Sys.remove dir;   (* 删除同名文件(如果存在) *)
  Unix.mkdir dir 0o755;
  Fun.protect
    ~finally:(fun () ->
      (* 递归删除临时目录的简化版本 *)
      try Unix.rmdir dir with _ -> ())
    (fun () -> f dir)

配置文件解析实战

(* 简单的 key=value 配置文件解析 *)
module Config = struct
  type t = (string * string) list

  let parse content : t =
    String.split_on_char '\n' content
    |> List.map String.trim
    |> List.filter (fun line ->
      line <> "" && not (String.length line > 0 && line.[0] = '#'))
    |> List.filter_map (fun line ->
      match String.index_opt line '=' with
      | None -> None
      | Some idx ->
        let key = String.trim (String.sub line 0 idx) in
        let value = String.trim (String.sub line (idx + 1)
          (String.length line - idx - 1)) in
        Some (key, value))

  let get config key = List.assoc_opt key config
  let get_or ~default config key =
    Option.value ~default (get config key)

  let load filename = parse (read_file filename)

  let save filename config =
    let content = List.map (fun (k, v) ->
      Printf.sprintf "%s = %s" k v
    ) config |> String.concat "\n" in
    safe_write filename content
end

let () =
  let config_content = {|
# Database configuration
host = localhost
port = 5432
database = myapp
user = admin

# App settings
debug = true
max_connections = 100
|} in
  let config = Config.parse config_content in
  Printf.printf "Host: %s\n" (Config.get_or ~default:"127.0.0.1" config "host");
  Printf.printf "Port: %s\n" (Config.get_or ~default:"3306" config "port");
  Printf.printf "Debug: %s\n" (Config.get_or ~default:"false" config "debug")

日志文件处理实战

type log_entry = {
  timestamp : string;
  level : string;
  message : string;
}

let parse_log_line line =
  (* 格式: [2026-05-11 10:30:00] [INFO] message *)
  try
    Scanf.sscanf line "[%s %s] [%s] %[^\n]"
      (fun date time level msg ->
        Some {
          timestamp = date ^ " " ^ time;
          level;
          message = msg;
        })
  with
  | _ -> None

let filter_logs_by_level level entries =
  List.filter (fun e -> e.level = level) entries

let count_by_level entries =
  let tbl = Hashtbl.create 4 in
  List.iter (fun e ->
    let count = Hashtbl.find_opt tbl e.level
      |> Option.value ~default:0 in
    Hashtbl.replace tbl e.level (count + 1)
  ) entries;
  Hashtbl.fold (fun level count acc -> (level, count) :: acc) tbl []

let read_log_file filename =
  with_open_in filename (fun ic ->
    let rec loop acc =
      match input_line ic with
      | line ->
        let entry = parse_log_line line in
        let acc = match entry with
          | Some e -> e :: acc
          | None -> acc
        in
        loop acc
      | exception End_of_file -> List.rev acc
    in
    loop []
  )

let () =
  let log_content = {|
[2026-05-11 10:30:00] [INFO] Application started
[2026-05-11 10:30:01] [DEBUG] Loading configuration
[2026-05-11 10:30:02] [INFO] Configuration loaded
[2026-05-11 10:30:05] [WARN] Low memory warning
[2026-05-11 10:30:10] [ERROR] Database connection failed
[2026-05-11 10:30:11] [INFO] Retrying connection
[2026-05-11 10:30:15] [INFO] Connected to database
|} in
  with_temp_file "log_" ".log" (fun filename ->
    safe_write filename (String.trim log_content);
    let entries = read_log_file filename in
    Printf.printf "Total entries: %d\n" (List.length entries);
    let errors = filter_logs_by_level "ERROR" entries in
    Printf.printf "Errors: %d\n" (List.length errors);
    List.iter (fun (level, count) ->
      Printf.printf "  %s: %d\n" level count
    ) (count_by_level entries)
  )

JSON 文件读写

使用 Yojson 库(需要 opam install yojson):

(*
(* 需要安装 yojson: opam install yojson *)

open Yojson.Basic.Util

let read_json filename =
  Yojson.Basic.from_file filename

let get_string_field json field =
  json |> member field |> to_string

let get_int_field json field =
  json |> member field |> to_int

let parse_users json =
  json |> member "users" |> to_list |> List.map (fun user ->
    let name = user |> member "name" |> to_string in
    let age = user |> member "age" |> to_int in
    let email = user |> member "email" |> to_string in
    (name, age, email)
  )

let json_content = {|
{
  "users": [
    {"name": "Alice", "age": 30, "email": "[email protected]"},
    {"name": "Bob", "age": 25, "email": "[email protected]"}
  ]
}
|}

let () =
  let json = Yojson.Basic.from_string json_content in
  let users = parse_users json in
  List.iter (fun (name, age, email) ->
    Printf.printf "%s (%d) - %s\n" name age email
  ) users
*)

💡 提示:如果没有第三方 JSON 库,也可以使用简单的手写解析器或 Scanf 来处理结构化的 JSON 数据。但推荐使用 Yojson 这样成熟的库。

扩展阅读