Ruby 入门指南 / 第 20 章：性能优化

第 20 章：性能优化

“过早优化是万恶之源。” —— Donald Knuth

20.1 基准测试

20.1.1 Benchmark 模块

require "benchmark"

# 简单基准测试
Benchmark.bm do |x|
  x.report("Array:") { 1_000_000.times { [1, 2, 3, 4, 5] } }
  x.report("Frozen:") { 1_000_000.times { [1, 2, 3, 4, 5].freeze } }
end

# 结果示例
#        user     system      total        real
# Array: 0.320000   0.010000   0.330000 (  0.332145)
# Frozen: 0.210000   0.000000   0.210000 (  0.211234)

# 比较多个方案
Benchmark.bmbm do |x|
  x.report("concat:") { s = ""; 1000.times { s << "a" } }
  x.report("join:")   { 1000.times.map { "a" }.join }
  x.report("prepend:") { s = ""; 1000.times { s = "a" + s } }
end

20.1.2 benchmark-ips gem

# Gemfile
gem "benchmark-ips"

require "benchmark/ips"

Benchmark.ips do |x|
  x.config(warmup: 2, time: 5)
  
  x.report("String concat:") do
    s = ""
    100.times { |i| s << i.to_s }
  end

  x.report("String interpolation:") do
    s = ""
    100.times { |i| s = "#{s}#{i}" }
  end

  x.report("Array join:") do
    Array.new(100) { |i| i.to_s }.join
  end

  x.compare!
end

# 输出示例
# Comparison:
#         Array join:    52345.6 i/s
#     String concat:    38765.4 i/s - 1.35x slower
# String interpolation: 12345.6 i/s - 4.24x slower

20.1.3 自定义 Profiler

class Profiler
  def self.measure(label = "Block")
    start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
    result = yield
    elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
    puts "#{label}: #{(elapsed * 1000).round(3)}ms"
    result
  end

  def self.memory
    before = `ps -o rss= -p #{Process.pid}`.to_i
    result = yield
    after = `ps -o rss= -p #{Process.pid}`.to_i
    puts "Memory: #{after - before}KB"
    result
  end
end

Profiler.measure("Array creation") do
  1_000_000.times { [1, 2, 3] }
end

Profiler.memory do
  arr = Array.new(1_000_000) { |i| i }
end

20.2 内存分析

20.2.1 内置工具

# 查看对象数量
GC.start
puts ObjectSpace.count_objects

# 遍历对象
ObjectSpace.each_object(String) do |str|
  puts str if str.length > 100
end

# GC 统计
puts GC.stat.inspect
# {:count=>12, :heap_allocated_pages=>142, :heap_free_slots=>3526, ...}

# 内存分配统计
require "objspace"
puts ObjectSpace.memsize_of("hello")  # => 40
puts ObjectSpace.memsize_of([1, 2, 3])  # => 120

20.2.2 memory_profiler gem

# Gemfile
gem "memory_profiler"

require "memory_profiler"

report = MemoryProfiler.report do
  1000.times do
    "hello" + " world"
    Array.new(100) { |i| i.to_s }
  end
end

report.pretty_print
# 输出：
# Total allocated: 123456 bytes (5678 objects)
# Total retained:  0 bytes (0 objects)
#
# allocated memory by gem
# -----------------------------------
#   123456  other
#
# allocated memory by file
# -----------------------------------
#   123456  script.rb
#
# allocated memory by location
# -----------------------------------
#    61728  script.rb:5
#    61728  script.rb:6

20.2.3 内存优化技巧

# 1. 使用符号代替字符串作为标识符
# ❌
data = { "name" => "Alice", "age" => 25 }
# ✅
data = { name: "Alice", age: 25 }

# 2. 使用 freeze 冻结不变的字符串
GREETING = "Hello".freeze
# 或使用 frozen_string_literal: true

# 3. 使用 Array.new 而非 [] 创建大数组
# ❌ 可能不高效
arr = []
1000.times { |i| arr << i }
# ✅ 更高效
arr = Array.new(1000) { |i| i }

# 4. 预分配字符串
str = String.new(capacity: 1024)
100.times { |i| str << i.to_s }

# 5. 避免创建临时对象
# ❌
data.map { |s| s.upcase }.join(", ")
# ✅
data.map!(&:upcase).join(", ")

# 6. 使用 StringIO 替代字符串拼接
require "stringio"
io = StringIO.new
1000.times { |i| io << i.to_s << "\n" }
result = io.string

20.3 代码优化

20.3.1 算法优化

# 1. 选择正确的数据结构
# 查找操作
# ❌ 数组查找 O(n)
array = (1..10000).to_a
array.include?(9999)  # 慢

# ✅ 集合查找 O(1)
require "set"
set = Set.new(1..10000)
set.include?(9999)  # 快

# 2. 缓存计算结果
# ❌ 重复计算
def expensive_calculation(n)
  (1..n).reduce(0) { |sum, i| sum + Math.sqrt(i) }
end

# ✅ 缓存结果
def cached_calculation(n)
  @cache ||= {}
  @cache[n] ||= (1..n).reduce(0) { |sum, i| sum + Math.sqrt(i) }
end

# 3. 使用懒加载
class User
  def posts
    @posts ||= Post.where(user_id: id)
  end
end

# 4. 批量操作
# ❌ N+1 查询
users.each do |user|
  puts user.posts.count
end

# ✅ 预加载
users.includes(:posts).each do |user|
  puts user.posts.count
end

20.3.2 Ruby 特定优化

# 1. 使用 select 而非 reject 的反面
# ❌
data.reject { |x| !x.active? }
# ✅
data.select { |x| x.active? }

# 2. 使用 any? / all? 而非手动循环
# ❌
result = false
items.each { |item| result = true if item.valid? }

# ✅
result = items.any?(&:valid?)

# 3. 使用 dig 安全访问嵌套结构
# ❌
data[:user] && data[:user][:profile] && data[:user][:profile][:name]
# ✅
data.dig(:user, :profile, :name)

# 4. 使用 tally 计数
# ❌
counts = Hash.new(0)
items.each { |item| counts[item] += 1 }
# ✅
counts = items.tally

# 5. 使用 sum 而非 reduce
# ❌
numbers.reduce(0, :+)
# ✅
numbers.sum

# 6. 使用 frozen_string_literal
# frozen_string_literal: true

20.4 JIT 编译

20.4.1 YJIT（Ruby 3.1+）

# 启用 YJIT
ruby --yjit script.rb

# Ruby 3.3+ 默认启用
ruby --yjit --yjit-stats script.rb

# 查看 YJIT 统计
ruby --yjit-stats -e "puts 1 + 1"

# YJIT 友好的代码
# 1. 保持类型一致
def add(a, b)
  a + b  # YJIT 优化整数加法
end

# 2. 避免动态定义方法
# ❌
define_method(:foo) { ... }

# ✅
def foo; ...; end

# 3. 减少元编程

20.4.2 MJIT（Ruby 3.0+）

# MJIT 使用 C 编译器优化
ruby --mjit script.rb

# YJIT 性能更好，推荐使用

20.5 GC 优化

20.5.1 GC 调优

# 环境变量调优
RUBY_GC_HEAP_INIT_SLOTS=1000000 ruby script.rb
RUBY_GC_HEAP_GROWTH_FACTOR=1.1 ruby script.rb
RUBY_GC_MALLOC_LIMIT=100000000 ruby script.rb

20.5.2 GC 控制

# 手动 GC
GC.start

# 禁用 GC（谨慎使用）
GC.disable
# ... 代码 ...
GC.enable

# GC 统计
GC.stat(:count)           # GC 次数
GC.stat(:total_freed_objects)  # 释放的对象数
GC.stat(:heap_live_slots)      # 活跃对象数
GC.stat(:heap_free_slots)      # 空闲 slot 数

# 内存泄漏检测
def check_memory
  before = GC.stat(:heap_live_slots)
  yield
  after = GC.stat(:heap_live_slots)
  puts "Objects: #{before} → #{after} (#{after - before} allocated)"
end

check_memory do
  10000.times { "hello" }
end

20.6 实际优化案例

20.6.1 N+1 查询优化

# ❌ N+1 查询（慢）
users = User.all
users.each do |user|
  puts user.posts.map(&:title)
  # 每次循环都查询一次 posts
end

# ✅ 预加载（快）
users = User.includes(:posts)
users.each do |user|
  puts user.posts.map(&:title)
end

# ✅ 更精确的预加载
users = User.includes(:posts).where(posts: { published: true })

20.6.2 大文件处理

# ❌ 一次性读取大文件
content = File.read("huge_file.txt")
lines = content.split("\n")

# ✅ 逐行处理
File.foreach("huge_file.txt") do |line|
  process_line(line.chomp)
end

# ✅ 分批处理
def process_in_batches(file_path, batch_size = 1000)
  batch = []
  File.foreach(file_path) do |line|
    batch << line.chomp
    if batch.size >= batch_size
      yield batch
      batch = []
    end
  end
  yield batch unless batch.empty?
end

20.6.3 缓存优化

class Cache
  def initialize(ttl: 300)
    @store = {}
    @ttl = ttl
  end

  def fetch(key)
    entry = @store[key]
    
    if entry && entry[:expires_at] > Time.now
      entry[:value]
    else
      value = yield
      @store[key] = { value: value, expires_at: Time.now + @ttl }
      value
    end
  end

  def clear
    @store.clear
  end
end

cache = Cache.new(ttl: 60)

# 第一次调用会计算，后续使用缓存
result = cache.fetch("expensive_data") do
  expensive_computation
end

20.7 动手练习

基准测试比较

# 比较不同字符串拼接方式的性能
# 1. +
# 2. <<
# 3. interpolation
# 4. concat

优化慢代码

# 优化这段代码
def slow_code
  result = []
  (1..10000).each do |i|
    if i % 2 == 0
      result << i * i
    end
  end
  result
end

内存分析

# 使用 memory_profiler 分析以下代码的内存分配
def process
  10000.times.map { |i| "item_#{i}" }
end

20.8 本章小结

要点	说明
Benchmark	内置基准测试模块
benchmark-ips	更好的基准测试工具
memory_profiler	内存分析工具
YJIT	Ruby 3.1+ 的 JIT 编译器
GC 调优	通过环境变量调整 GC 行为
优化原则	先测量，再优化；算法 > 微优化

📖 扩展阅读

上一章：← 第 19 章：并发编程 下一章：第 21 章：Docker 部署 →