Godot 4 GDScript 教程 / 26 - 性能优化与调试
26 - 性能优化与调试
性能优化是游戏开发中不可或缺的环节。Godot 4 引入了全新的渲染架构和性能分析工具。本文将系统性地介绍性能分析、优化策略和调试技巧。
性能监视器
内置 Profiler
Godot 编辑器内置了多个性能分析器:
| Profiler | 用途 |
|---|---|
| Performance | 整体性能指标(FPS、内存、对象数) |
| Visual Profiler | 渲染管线分析(DrawCall、着色器) |
| Script Profiler | GDScript 函数耗时分析 |
| Network Profiler | 网络带宽和 RPC 分析 |
运行时性能监控代码
# performance_monitor.gd
extends Control
@onready var fps_label: Label = %FPSLabel
@onready var memory_label: Label = %MemoryLabel
@onready var objects_label: Label = %ObjectsLabel
@onready var draw_calls_label: Label = %DrawCallsLabel
@onready var physics_label: Label = %PhysicsLabel
var update_interval: float = 0.5
var time_accumulator: float = 0.0
func _process(delta):
time_accumulator += delta
if time_accumulator < update_interval:
return
time_accumulator = 0.0
# FPS
fps_label.text = "FPS: %d" % Engine.get_frames_per_second()
# 内存
var mem_static = Performance.get_monitor(Performance.MEMORY_STATIC) / 1048576.0
var mem_video = Performance.get_monitor(Performance.MEMORY_VIDEO) / 1048576.0
memory_label.text = "内存: %.1f MB (显存: %.1f MB)" % [mem_static, mem_video]
# 对象计数
var obj_count = Performance.get_monitor(Performance.OBJECT_COUNT)
var node_count = Performance.get_monitor(Performance.OBJECT_NODE_COUNT)
objects_label.text = "对象: %d (节点: %d)" % [obj_count, node_count]
# 渲染
var draw_calls = Performance.get_monitor(Performance.RENDER_TOTAL_DRAW_CALLS_IN_FRAME)
var vertices = Performance.get_monitor(Performance.RENDER_TOTAL_PRIMITIVES_IN_FRAME)
draw_calls_label.text = "DrawCall: %d (顶点: %d)" % [draw_calls, vertices]
# 物理
var physics_fps = Performance.get_monitor(Performance.TIME_PHYSICS_PROCESS)
physics_label.text = "物理帧: %.2f ms" % (physics_fps * 1000)
Performance 常用指标
| 指标 | 说明 | 建议值 |
|---|---|---|
TIME_FPS | 帧率 | ≥ 60 |
TIME_PROCESS | 进程帧耗时 | ≤ 16.67ms |
TIME_PHYSICS_PROCESS | 物理帧耗时 | ≤ 16.67ms |
MEMORY_STATIC | 静态内存 | 视平台而定 |
MEMORY_VIDEO | 显存 | 视平台而定 |
RENDER_DRAW_CALLS_IN_FRAME | DrawCall 数量 | ≤ 100(移动端) |
RENDER_TOTAL_PRIMITIVES_IN_FRAME | 顶点数 | ≤ 100K(移动端) |
OBJECT_COUNT | 对象总数 | 越少越好 |
帧率分析
# frame_analyzer.gd
extends Node
var frame_times: Array[float] = []
const SAMPLE_SIZE = 120
func _process(delta):
frame_times.append(delta * 1000.0) # 转为毫秒
if frame_times.size() > SAMPLE_SIZE:
frame_times.pop_front()
# 每 2 秒分析一次
if frame_times.size() == SAMPLE_SIZE and Engine.get_process_frames() % 120 == 0:
_analyze_frames()
func _analyze_frames():
var sum = 0.0
var max_time = 0.0
var min_time = 999.0
var spikes = 0
for t in frame_times:
sum += t
max_time = maxf(max_time, t)
min_time = minf(min_time, t)
if t > 20.0: # 超过 20ms 的帧
spikes += 1
var avg = sum / frame_times.size()
var jank_rate = float(spikes) / frame_times.size() * 100.0
print("=== 帧率分析 ===")
print("平均: %.2f ms (%.0f FPS)" % [avg, 1000.0 / avg])
print("最小: %.2f ms (%.0f FPS)" % [min_time, 1000.0 / min_time])
print("最大: %.2f ms (%.0f FPS)" % [max_time, 1000.0 / max_time])
print("卡顿率: %.1f%%" % jank_rate)
渲染优化
减少 DrawCall
| 技术 | 说明 | 效果 |
|---|---|---|
| 多维材质 | 合并使用相同材质的网格 | ⭐⭐⭐ |
| MultiMeshInstance3D | 大量相同物体的实例化渲染 | ⭐⭐⭐⭐⭐ |
| 图集 (Atlas) | 合并小纹理为大图集 | ⭐⭐⭐ |
| LOD | 距离相关细节层次 | ⭐⭐⭐⭐ |
| 遮挡剔除 | 不渲染被遮挡的物体 | ⭐⭐⭐⭐ |
MultiMeshInstance3D 大规模渲染
# grass_renderer.gd
extends MultiMeshInstance3D
@export var grass_count: int = 10000
@export var spawn_area: Vector2 = Vector2(100, 100)
@export var grass_mesh: Mesh
func _ready():
_generate_grass()
func _generate_grass():
var multimesh = MultiMesh.new()
multimesh.mesh = grass_mesh
multimesh.instance_count = grass_count
multimesh.transform_format = MultiMesh.TRANSFORM_3D
for i in range(grass_count):
var x = randf_range(-spawn_area.x / 2, spawn_area.x / 2)
var z = randf_range(-spawn_area.y / 2, spawn_area.y / 2)
var y = _get_terrain_height(x, z)
var transform = Transform3D()
transform.origin = Vector3(x, y, z)
transform = transform.rotated(Vector3.UP, randf() * TAU)
transform = transform.scaled(Vector3.ONE * randf_range(0.8, 1.2))
multimesh.set_instance_transform(i, transform)
self.multimesh = multimesh
func _get_terrain_height(x: float, z: float) -> float:
# 从地形获取高度
return 0.0 # 占位
💡
MultiMeshInstance3D可以用单个 DrawCall 渲染数万个实例,是草地、树木、粒子等大量同质物体的最佳选择。
视锥剔除与可见性
# visibility_optimizer.gd
extends Node3D
@export var cull_distance: float = 100.0
var camera: Camera3D
func _ready():
camera = get_viewport().get_camera_3d()
func _process(_delta):
if not camera:
return
for child in get_children():
if child is Node3D:
var distance = camera.global_position.distance_to(child.global_position)
child.visible = distance < cull_distance
GDScript 优化技巧
性能对比表
| 操作 | 慢速写法 | 快速写法 | 提升 |
|---|---|---|---|
| 节点引用 | get_node("X") 每帧调用 | @onready var x = $X | 10x |
| 类型检查 | is 运算符 | 类型提示 | 2x |
| 字符串拼接 | + 运算符 | % 格式化 | 1.5x |
| 数组查找 | in 运算符 | has() / Dictionary | 100x |
| 向量计算 | 逐分量操作 | Vector2/3 运算 | 3x |
| 循环内实例化 | new() 在循环内 | 预先创建 | 2x |
实际优化示例
# ❌ 慢:每帧查找节点
func _process(delta):
get_node("HealthBar").value = health
get_node("ManaBar").value = mana
# ✅ 快:缓存节点引用
@onready var health_bar: ProgressBar = %HealthBar
@onready var mana_bar: ProgressBar = %ManaBar
func _process(delta):
health_bar.value = health
mana_bar.value = mana
# ❌ 慢:每帧创建新数组
func get_enemies():
var enemies = []
for node in get_tree().get_nodes_in_group("enemies"):
if node.is_alive():
enemies.append(node)
return enemies
# ✅ 快:缓存结果,按需更新
var alive_enemies: Array[Node] = []
var enemy_cache_dirty: bool = true
func mark_enemy_cache_dirty():
enemy_cache_dirty = true
func get_alive_enemies() -> Array[Node]:
if enemy_cache_dirty:
alive_enemies.clear()
for node in get_tree().get_nodes_in_group("enemies"):
if node.is_alive():
alive_enemies.append(node)
enemy_cache_dirty = false
return alive_enemies
# ❌ 慢:使用数组查找
var unlocked_items: Array[String] = []
func is_unlocked(item_id: String) -> bool:
return item_id in unlocked_items # O(n)
# ✅ 快:使用 Dictionary
var unlocked_items: Dictionary = {} # item_id -> true
func is_unlocked(item_id: String) -> bool:
return unlocked_items.has(item_id) # O(1)
对象池模式
# object_pool.gd
class_name ObjectPool
extends Node
@export var pooled_scene: PackedScene
@export var initial_size: int = 20
@export var auto_expand: bool = true
var available: Array[Node] = []
var in_use: Array[Node] = []
func _ready():
for i in range(initial_size):
var instance = _create_instance()
available.append(instance)
func _create_instance() -> Node:
var instance = pooled_scene.instantiate()
instance.set_process(false)
instance.set_physics_process(false)
add_child(instance)
return instance
func get_instance() -> Node:
if available.is_empty():
if auto_expand:
var instance = _create_instance()
available.append(instance)
else:
push_warning("对象池已空")
return null
var instance = available.pop_back()
in_use.append(instance)
instance.set_process(true)
instance.set_physics_process(true)
instance.show()
if instance.has_method("on_spawn"):
instance.on_spawn()
return instance
func release_instance(instance: Node):
if not instance in in_use:
return
in_use.erase(instance)
available.append(instance)
instance.set_process(false)
instance.set_physics_process(false)
instance.hide()
if instance.has_method("on_despawn"):
instance.on_despawn()
func release_all():
for instance in in_use.duplicate():
release_instance(instance)
func get_stats() -> Dictionary:
return {
"available": available.size(),
"in_use": in_use.size(),
"total": available.size() + in_use.size()
}
使用示例
# bullet_manager.gd
extends Node
@onready var bullet_pool: ObjectPool = $BulletPool
func fire_bullet(origin: Vector3, direction: Vector3, speed: float):
var bullet = bullet_pool.get_instance()
if bullet:
bullet.global_position = origin
bullet.velocity = direction * speed
func _on_bullet_hit(bullet: Node):
bullet_pool.release_instance(bullet)
LOD 系统
# lod_system.gd
extends Node3D
@export var lod_distances: Array[float] = [10.0, 25.0, 50.0]
@export var lod_meshes: Array[Mesh] = []
@export var billboard_distance: float = 80.0
@export var billboard_texture: Texture2D
@onready var mesh_instance: MeshInstance3D = $MeshInstance3D
@onready var billboard: Sprite3D = $Billboard
var camera: Camera3D
func _ready():
camera = get_viewport().get_camera_3d()
if billboard:
billboard.texture = billboard_texture
billboard.visible = false
func _process(_delta):
if not camera:
return
var distance = global_position.distance_to(camera.global_position)
# 距离超过最远 LOD 则隐藏
if distance > billboard_distance + 10.0:
mesh_instance.visible = false
if billboard:
billboard.visible = false
return
# Billboard 模式
if distance > billboard_distance:
mesh_instance.visible = false
if billboard:
billboard.visible = true
return
# 选择 LOD 级别
if billboard:
billboard.visible = false
mesh_instance.visible = true
for i in range(lod_distances.size()):
if distance < lod_distances[i]:
mesh_instance.mesh = lod_meshes[i]
return
# 使用最低 LOD
mesh_instance.mesh = lod_meshes[lod_meshes.size() - 1]
多线程优化
# threaded_pathfinding.gd
extends Node
var thread: Thread
var mutex: Mutex
var semaphore: Semaphore
var path_requests: Array[Dictionary] = []
var path_results: Array[Dictionary] = []
var exit_thread: bool = false
func _ready():
mutex = Mutex.new()
semaphore = Semaphore.new()
thread = Thread.new()
thread.start(_pathfinding_thread)
func _exit_tree():
mutex.lock()
exit_thread = true
mutex.unlock()
semaphore.post()
thread.wait_to_finish()
func request_path(from: Vector3, to: Vector3, callback: Callable):
mutex.lock()
path_requests.append({
"from": from,
"to": to,
"callback": callback
})
mutex.unlock()
semaphore.post()
func _pathfinding_thread():
while true:
semaphore.wait()
mutex.lock()
if exit_thread:
mutex.unlock()
return
if path_requests.is_empty():
mutex.unlock()
continue
var request = path_requests.pop_front()
mutex.unlock()
# 执行路径计算
var path = _calculate_path(request.from, request.to)
mutex.lock()
path_results.append({
"path": path,
"callback": request.callback
})
mutex.unlock()
func _process(_delta):
mutex.lock()
var results = path_results.duplicate()
path_results.clear()
mutex.unlock()
for result in results:
result.callback.call(result.path)
func _calculate_path(from: Vector3, to: Vector3) -> PackedVector3Array:
# 实际寻路逻辑
var path = NavigationServer3D.map_get_path(
get_world_3d().navigation_map, from, to, true
)
return path
⚠️ 注意:GDScript 中使用线程时,不要在工作线程中访问场景树节点。所有节点操作必须在主线程中完成。
内存泄漏检测
# memory_debugger.gd
extends Node
var object_counts: Dictionary = {}
var check_interval: float = 10.0
var time_accumulator: float = 0.0
func _process(delta):
time_accumulator += delta
if time_accumulator < check_interval:
return
time_accumulator = 0.0
var current_counts = {}
var object_count = Performance.get_monitor(Performance.OBJECT_COUNT)
var node_count = Performance.get_monitor(Performance.OBJECT_NODE_COUNT)
var orphan_count = Performance.get_monitor(Performance.OBJECT_ORPHAN_NODE_COUNT)
print("=== 内存检查 ===")
print("总对象数: %d" % object_count)
print("节点数: %d" % node_count)
print("孤儿节点: %d" % orphan_count)
# 孤子节点是潜在的内存泄漏
if orphan_count > 0:
push_warning("检测到 %d 个孤儿节点!" % orphan_count)
# 检测对象数增长
if object_counts.has("total"):
var diff = object_count - object_counts.total
if diff > 100:
push_warning("对象数增长了 %d,可能存在内存泄漏" % diff)
object_counts = {
"total": object_count,
"nodes": node_count,
"orphans": orphan_count
}
💡 使用
Performance.OBJECT_ORPHAN_NODE_COUNT可以快速发现未被场景树管理的节点,这些通常是内存泄漏的根源。
远程调试
# remote_debug.gd
extends Node
# 在导出版本中启用远程调试
func _ready():
# 连接到编辑器的远程调试器
# 通过命令行参数: --remote-debug tcp://编辑器IP:6007
pass
# 条件编译:只在 debug 构建中执行
func debug_draw():
if OS.is_debug_build():
queue_redraw()
func _draw():
if not OS.is_debug_build():
return
# 绘制调试信息
draw_circle(Vector2.ZERO, 50, Color.RED)
draw_string(ThemeDB.fallback_font, Vector2(10, 30),
"FPS: %d" % Engine.get_frames_per_second())
性能基准测试
# benchmark.gd
extends Node
func run_benchmark():
print("=== 性能基准测试 ===")
# 测试数学运算
_benchmark("数学运算", func():
var result = 0.0
for i in range(100000):
result += sin(float(i)) * cos(float(i))
)
# 测试数组操作
_benchmark("数组操作", func():
var arr = []
for i in range(10000):
arr.append(i)
arr.sort()
arr.reverse()
)
# 测试字典操作
_benchmark("字典操作", func():
var dict = {}
for i in range(10000):
dict["key_%d" % i] = i
for i in range(10000):
dict.has("key_%d" % i)
)
# 测试节点操作
_benchmark("节点创建/销毁", func():
for i in range(1000):
var node = Node.new()
node.name = "test_%d" % i
add_child(node)
node.queue_free()
)
func _benchmark(name: String, callable: Callable):
var start = Time.get_ticks_usec()
callable.call()
var elapsed = Time.get_ticks_usec() - start
print("%s: %.2f ms" % [name, elapsed / 1000.0])