12 - 文件与 I/O
第 12 章:文件与 I/O
掌握文件读写、常用数据格式处理和 pathlib 路径操作。
12.1 基本文件操作
12.1.1 读写文本文件
# 写入
with open("data.txt", "w", encoding="utf-8") as f:
f.write("第一行\n")
f.write("第二行\n")
# 读取整个文件
with open("data.txt", "r", encoding="utf-8") as f:
content = f.read()
# 按行读取
with open("data.txt", "r", encoding="utf-8") as f:
for line in f:
print(line.strip())
# 读取为列表
with open("data.txt", "r", encoding="utf-8") as f:
lines = f.readlines() # ['第一行\n', '第二行\n']
# 追加模式
with open("data.txt", "a", encoding="utf-8") as f:
f.write("第三行\n")
12.1.2 文件模式
| 模式 | 说明 |
|---|---|
r | 只读(默认) |
w | 写入(覆盖) |
a | 追加 |
x | 创建(文件已存在则失败) |
b | 二进制模式(如 rb, wb) |
+ | 读写模式(如 r+, w+) |
12.1.3 二进制文件
# 读取二进制文件
with open("image.jpg", "rb") as f:
data = f.read()
print(len(data)) # 字节数
# 写入二进制文件
with open("copy.jpg", "wb") as f:
f.write(data)
12.2 pathlib 路径操作
12.2.1 基本用法
from pathlib import Path
# 创建路径
p = Path("/home/user/documents")
p = Path("documents") / "file.txt" # 推荐用 / 拼接
# 常用属性
print(p.name) # file.txt
print(p.stem) # file
print(p.suffix) # .txt
print(p.parent) # documents
print(p.resolve()) # 绝对路径
# 判断
p.exists()
p.is_file()
p.is_dir()
p.is_absolute()
12.2.2 读写操作
from pathlib import Path
p = Path("data.txt")
# 写入
p.write_text("Hello, World!", encoding="utf-8")
p.write_bytes(b"\x00\x01\x02")
# 读取
content = p.read_text(encoding="utf-8")
data = p.read_bytes()
12.2.3 目录操作
from pathlib import Path
# 创建目录
Path("new_dir").mkdir(parents=True, exist_ok=True)
# 遍历当前目录
for p in Path(".").iterdir():
print(p)
# glob 匹配
for p in Path(".").glob("*.py"):
print(p)
# 递归匹配
for p in Path(".").rglob("*.py"):
print(p)
# 删除文件
Path("data.txt").unlink(missing_ok=True)
# 删除空目录
Path("empty_dir").rmdir()
12.3 CSV 文件
12.3.1 基本读写
import csv
# 写入 CSV
with open("users.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["姓名", "年龄", "城市"])
writer.writerow(["Alice", 30, "北京"])
writer.writerow(["Bob", 25, "上海"])
# 读取 CSV
with open("users.csv", "r", encoding="utf-8") as f:
reader = csv.reader(f)
header = next(reader)
for row in reader:
print(row) # ['Alice', '30', '北京']
12.3.2 DictReader / DictWriter
import csv
# 写入字典格式
with open("users.csv", "w", newline="", encoding="utf-8") as f:
fieldnames = ["name", "age", "city"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({"name": "Alice", "age": 30, "city": "北京"})
# 读取字典格式
with open("users.csv", "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
print(f"{row['name']}, {row['age']}岁") # Alice, 30岁
12.4 JSON 文件
import json
# Python 对象 → JSON 字符串
data = {"name": "Alice", "age": 30, "scores": [90, 85, 92]}
json_str = json.dumps(data, ensure_ascii=False, indent=2)
# JSON 字符串 → Python 对象
parsed = json.loads(json_str)
# 写入文件
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# 从文件读取
with open("data.json", "r", encoding="utf-8") as f:
loaded = json.load(f)
自定义序列化
import json
from datetime import datetime
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
data = {"created": datetime.now()}
print(json.dumps(data, cls=DateTimeEncoder, ensure_ascii=False))
12.5 YAML 文件
# pip install pyyaml
import yaml
# 读取
with open("config.yaml", "r", encoding="utf-8") as f:
config = yaml.safe_load(f)
# 写入
data = {"database": {"host": "localhost", "port": 5432}}
with open("config.yaml", "w", encoding="utf-8") as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True)
12.6 XML 文件
import xml.etree.ElementTree as ET
# 解析 XML
tree = ET.parse("data.xml")
root = tree.getroot()
# 遍历
for child in root:
print(child.tag, child.attrib)
# 查找
for elem in root.iter("item"):
print(elem.text)
# 创建 XML
root = ET.Element("data")
item = ET.SubElement(root, "item")
item.text = "Hello"
item.set("id", "1")
tree = ET.ElementTree(root)
tree.write("output.xml", xml_declaration=True, encoding="utf-8")
12.7 数据格式对比
| 格式 | 可读性 | 体积 | 适用场景 |
|---|---|---|---|
| CSV | ⭐⭐⭐ | 小 | 表格数据 |
| JSON | ⭐⭐⭐⭐ | 中 | API 交互、配置 |
| YAML | ⭐⭐⭐⭐⭐ | 中 | 配置文件 |
| XML | ⭐⭐ | 大 | 企业系统、SOAP |
| TOML | ⭐⭐⭐⭐⭐ | 中 | Python 项目配置 |
12.8 注意事项
🔴 注意:
- 始终指定
encoding="utf-8"避免编码问题 - CSV 写入时使用
newline=""避免多余空行 json.dumps()中设置ensure_ascii=False支持中文- YAML 使用
safe_load()避免安全风险
💡 提示:
- 优先使用
pathlib而非os.path - 小文件使用
read_text()/write_text()一步到位 - 大文件逐行读取避免内存溢出
- 临时文件使用
tempfile模块
📌 业务场景:
import json
import csv
from pathlib import Path
from dataclasses import dataclass, asdict
@dataclass
class Employee:
name: str
department: str
salary: float
def export_to_formats(employees: list[Employee], base_path: Path):
"""导出员工数据为多种格式。"""
base_path.mkdir(parents=True, exist_ok=True)
data = [asdict(e) for e in employees]
# JSON
(base_path / "employees.json").write_text(
json.dumps(data, ensure_ascii=False, indent=2),
encoding="utf-8",
)
# CSV
with open(base_path / "employees.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["name", "department", "salary"])
writer.writeheader()
writer.writerows(data)
print(f"已导出到 {base_path}")
employees = [
Employee("Alice", "工程", 20000),
Employee("Bob", "设计", 18000),
]
export_to_formats(employees, Path("output"))