第 20 章:性能优化
第 20 章:性能优化
“过早优化是万恶之源。” —— Donald Knuth
20.1 基准测试
20.1.1 Benchmark 模块
require "benchmark"
# 简单基准测试
Benchmark.bm do |x|
x.report("Array:") { 1_000_000.times { [1, 2, 3, 4, 5] } }
x.report("Frozen:") { 1_000_000.times { [1, 2, 3, 4, 5].freeze } }
end
# 结果示例
# user system total real
# Array: 0.320000 0.010000 0.330000 ( 0.332145)
# Frozen: 0.210000 0.000000 0.210000 ( 0.211234)
# 比较多个方案
Benchmark.bmbm do |x|
x.report("concat:") { s = ""; 1000.times { s << "a" } }
x.report("join:") { 1000.times.map { "a" }.join }
x.report("prepend:") { s = ""; 1000.times { s = "a" + s } }
end
20.1.2 benchmark-ips gem
# Gemfile
gem "benchmark-ips"
require "benchmark/ips"
Benchmark.ips do |x|
x.config(warmup: 2, time: 5)
x.report("String concat:") do
s = ""
100.times { |i| s << i.to_s }
end
x.report("String interpolation:") do
s = ""
100.times { |i| s = "#{s}#{i}" }
end
x.report("Array join:") do
Array.new(100) { |i| i.to_s }.join
end
x.compare!
end
# 输出示例
# Comparison:
# Array join: 52345.6 i/s
# String concat: 38765.4 i/s - 1.35x slower
# String interpolation: 12345.6 i/s - 4.24x slower
20.1.3 自定义 Profiler
class Profiler
def self.measure(label = "Block")
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
result = yield
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
puts "#{label}: #{(elapsed * 1000).round(3)}ms"
result
end
def self.memory
before = `ps -o rss= -p #{Process.pid}`.to_i
result = yield
after = `ps -o rss= -p #{Process.pid}`.to_i
puts "Memory: #{after - before}KB"
result
end
end
Profiler.measure("Array creation") do
1_000_000.times { [1, 2, 3] }
end
Profiler.memory do
arr = Array.new(1_000_000) { |i| i }
end
20.2 内存分析
20.2.1 内置工具
# 查看对象数量
GC.start
puts ObjectSpace.count_objects
# 遍历对象
ObjectSpace.each_object(String) do |str|
puts str if str.length > 100
end
# GC 统计
puts GC.stat.inspect
# {:count=>12, :heap_allocated_pages=>142, :heap_free_slots=>3526, ...}
# 内存分配统计
require "objspace"
puts ObjectSpace.memsize_of("hello") # => 40
puts ObjectSpace.memsize_of([1, 2, 3]) # => 120
20.2.2 memory_profiler gem
# Gemfile
gem "memory_profiler"
require "memory_profiler"
report = MemoryProfiler.report do
1000.times do
"hello" + " world"
Array.new(100) { |i| i.to_s }
end
end
report.pretty_print
# 输出:
# Total allocated: 123456 bytes (5678 objects)
# Total retained: 0 bytes (0 objects)
#
# allocated memory by gem
# -----------------------------------
# 123456 other
#
# allocated memory by file
# -----------------------------------
# 123456 script.rb
#
# allocated memory by location
# -----------------------------------
# 61728 script.rb:5
# 61728 script.rb:6
20.2.3 内存优化技巧
# 1. 使用符号代替字符串作为标识符
# ❌
data = { "name" => "Alice", "age" => 25 }
# ✅
data = { name: "Alice", age: 25 }
# 2. 使用 freeze 冻结不变的字符串
GREETING = "Hello".freeze
# 或使用 frozen_string_literal: true
# 3. 使用 Array.new 而非 [] 创建大数组
# ❌ 可能不高效
arr = []
1000.times { |i| arr << i }
# ✅ 更高效
arr = Array.new(1000) { |i| i }
# 4. 预分配字符串
str = String.new(capacity: 1024)
100.times { |i| str << i.to_s }
# 5. 避免创建临时对象
# ❌
data.map { |s| s.upcase }.join(", ")
# ✅
data.map!(&:upcase).join(", ")
# 6. 使用 StringIO 替代字符串拼接
require "stringio"
io = StringIO.new
1000.times { |i| io << i.to_s << "\n" }
result = io.string
20.3 代码优化
20.3.1 算法优化
# 1. 选择正确的数据结构
# 查找操作
# ❌ 数组查找 O(n)
array = (1..10000).to_a
array.include?(9999) # 慢
# ✅ 集合查找 O(1)
require "set"
set = Set.new(1..10000)
set.include?(9999) # 快
# 2. 缓存计算结果
# ❌ 重复计算
def expensive_calculation(n)
(1..n).reduce(0) { |sum, i| sum + Math.sqrt(i) }
end
# ✅ 缓存结果
def cached_calculation(n)
@cache ||= {}
@cache[n] ||= (1..n).reduce(0) { |sum, i| sum + Math.sqrt(i) }
end
# 3. 使用懒加载
class User
def posts
@posts ||= Post.where(user_id: id)
end
end
# 4. 批量操作
# ❌ N+1 查询
users.each do |user|
puts user.posts.count
end
# ✅ 预加载
users.includes(:posts).each do |user|
puts user.posts.count
end
20.3.2 Ruby 特定优化
# 1. 使用 select 而非 reject 的反面
# ❌
data.reject { |x| !x.active? }
# ✅
data.select { |x| x.active? }
# 2. 使用 any? / all? 而非手动循环
# ❌
result = false
items.each { |item| result = true if item.valid? }
# ✅
result = items.any?(&:valid?)
# 3. 使用 dig 安全访问嵌套结构
# ❌
data[:user] && data[:user][:profile] && data[:user][:profile][:name]
# ✅
data.dig(:user, :profile, :name)
# 4. 使用 tally 计数
# ❌
counts = Hash.new(0)
items.each { |item| counts[item] += 1 }
# ✅
counts = items.tally
# 5. 使用 sum 而非 reduce
# ❌
numbers.reduce(0, :+)
# ✅
numbers.sum
# 6. 使用 frozen_string_literal
# frozen_string_literal: true
20.4 JIT 编译
20.4.1 YJIT(Ruby 3.1+)
# 启用 YJIT
ruby --yjit script.rb
# Ruby 3.3+ 默认启用
ruby --yjit --yjit-stats script.rb
# 查看 YJIT 统计
ruby --yjit-stats -e "puts 1 + 1"
# YJIT 友好的代码
# 1. 保持类型一致
def add(a, b)
a + b # YJIT 优化整数加法
end
# 2. 避免动态定义方法
# ❌
define_method(:foo) { ... }
# ✅
def foo; ...; end
# 3. 减少元编程
20.4.2 MJIT(Ruby 3.0+)
# MJIT 使用 C 编译器优化
ruby --mjit script.rb
# YJIT 性能更好,推荐使用
20.5 GC 优化
20.5.1 GC 调优
# 环境变量调优
RUBY_GC_HEAP_INIT_SLOTS=1000000 ruby script.rb
RUBY_GC_HEAP_GROWTH_FACTOR=1.1 ruby script.rb
RUBY_GC_MALLOC_LIMIT=100000000 ruby script.rb
20.5.2 GC 控制
# 手动 GC
GC.start
# 禁用 GC(谨慎使用)
GC.disable
# ... 代码 ...
GC.enable
# GC 统计
GC.stat(:count) # GC 次数
GC.stat(:total_freed_objects) # 释放的对象数
GC.stat(:heap_live_slots) # 活跃对象数
GC.stat(:heap_free_slots) # 空闲 slot 数
# 内存泄漏检测
def check_memory
before = GC.stat(:heap_live_slots)
yield
after = GC.stat(:heap_live_slots)
puts "Objects: #{before} → #{after} (#{after - before} allocated)"
end
check_memory do
10000.times { "hello" }
end
20.6 实际优化案例
20.6.1 N+1 查询优化
# ❌ N+1 查询(慢)
users = User.all
users.each do |user|
puts user.posts.map(&:title)
# 每次循环都查询一次 posts
end
# ✅ 预加载(快)
users = User.includes(:posts)
users.each do |user|
puts user.posts.map(&:title)
end
# ✅ 更精确的预加载
users = User.includes(:posts).where(posts: { published: true })
20.6.2 大文件处理
# ❌ 一次性读取大文件
content = File.read("huge_file.txt")
lines = content.split("\n")
# ✅ 逐行处理
File.foreach("huge_file.txt") do |line|
process_line(line.chomp)
end
# ✅ 分批处理
def process_in_batches(file_path, batch_size = 1000)
batch = []
File.foreach(file_path) do |line|
batch << line.chomp
if batch.size >= batch_size
yield batch
batch = []
end
end
yield batch unless batch.empty?
end
20.6.3 缓存优化
class Cache
def initialize(ttl: 300)
@store = {}
@ttl = ttl
end
def fetch(key)
entry = @store[key]
if entry && entry[:expires_at] > Time.now
entry[:value]
else
value = yield
@store[key] = { value: value, expires_at: Time.now + @ttl }
value
end
end
def clear
@store.clear
end
end
cache = Cache.new(ttl: 60)
# 第一次调用会计算,后续使用缓存
result = cache.fetch("expensive_data") do
expensive_computation
end
20.7 动手练习
- 基准测试比较
# 比较不同字符串拼接方式的性能
# 1. +
# 2. <<
# 3. interpolation
# 4. concat
- 优化慢代码
# 优化这段代码
def slow_code
result = []
(1..10000).each do |i|
if i % 2 == 0
result << i * i
end
end
result
end
- 内存分析
# 使用 memory_profiler 分析以下代码的内存分配
def process
10000.times.map { |i| "item_#{i}" }
end
20.8 本章小结
| 要点 | 说明 |
|---|---|
| Benchmark | 内置基准测试模块 |
| benchmark-ips | 更好的基准测试工具 |
| memory_profiler | 内存分析工具 |
| YJIT | Ruby 3.1+ 的 JIT 编译器 |
| GC 调优 | 通过环境变量调整 GC 行为 |
| 优化原则 | 先测量,再优化;算法 > 微优化 |
📖 扩展阅读
上一章:← 第 19 章:并发编程 下一章:第 21 章:Docker 部署 →