强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

Godot 4 GDScript 教程 / 26 - 性能优化与调试

26 - 性能优化与调试

性能优化是游戏开发中不可或缺的环节。Godot 4 引入了全新的渲染架构和性能分析工具。本文将系统性地介绍性能分析、优化策略和调试技巧。


性能监视器

内置 Profiler

Godot 编辑器内置了多个性能分析器:

Profiler用途
Performance整体性能指标(FPS、内存、对象数)
Visual Profiler渲染管线分析(DrawCall、着色器)
Script ProfilerGDScript 函数耗时分析
Network Profiler网络带宽和 RPC 分析

运行时性能监控代码

# performance_monitor.gd
extends Control

@onready var fps_label: Label = %FPSLabel
@onready var memory_label: Label = %MemoryLabel
@onready var objects_label: Label = %ObjectsLabel
@onready var draw_calls_label: Label = %DrawCallsLabel
@onready var physics_label: Label = %PhysicsLabel

var update_interval: float = 0.5
var time_accumulator: float = 0.0

func _process(delta):
    time_accumulator += delta
    if time_accumulator < update_interval:
        return
    time_accumulator = 0.0

    # FPS
    fps_label.text = "FPS: %d" % Engine.get_frames_per_second()

    # 内存
    var mem_static = Performance.get_monitor(Performance.MEMORY_STATIC) / 1048576.0
    var mem_video = Performance.get_monitor(Performance.MEMORY_VIDEO) / 1048576.0
    memory_label.text = "内存: %.1f MB (显存: %.1f MB)" % [mem_static, mem_video]

    # 对象计数
    var obj_count = Performance.get_monitor(Performance.OBJECT_COUNT)
    var node_count = Performance.get_monitor(Performance.OBJECT_NODE_COUNT)
    objects_label.text = "对象: %d (节点: %d)" % [obj_count, node_count]

    # 渲染
    var draw_calls = Performance.get_monitor(Performance.RENDER_TOTAL_DRAW_CALLS_IN_FRAME)
    var vertices = Performance.get_monitor(Performance.RENDER_TOTAL_PRIMITIVES_IN_FRAME)
    draw_calls_label.text = "DrawCall: %d (顶点: %d)" % [draw_calls, vertices]

    # 物理
    var physics_fps = Performance.get_monitor(Performance.TIME_PHYSICS_PROCESS)
    physics_label.text = "物理帧: %.2f ms" % (physics_fps * 1000)

Performance 常用指标

指标说明建议值
TIME_FPS帧率≥ 60
TIME_PROCESS进程帧耗时≤ 16.67ms
TIME_PHYSICS_PROCESS物理帧耗时≤ 16.67ms
MEMORY_STATIC静态内存视平台而定
MEMORY_VIDEO显存视平台而定
RENDER_DRAW_CALLS_IN_FRAMEDrawCall 数量≤ 100(移动端)
RENDER_TOTAL_PRIMITIVES_IN_FRAME顶点数≤ 100K(移动端)
OBJECT_COUNT对象总数越少越好

帧率分析

# frame_analyzer.gd
extends Node

var frame_times: Array[float] = []
const SAMPLE_SIZE = 120

func _process(delta):
    frame_times.append(delta * 1000.0)  # 转为毫秒

    if frame_times.size() > SAMPLE_SIZE:
        frame_times.pop_front()

    # 每 2 秒分析一次
    if frame_times.size() == SAMPLE_SIZE and Engine.get_process_frames() % 120 == 0:
        _analyze_frames()

func _analyze_frames():
    var sum = 0.0
    var max_time = 0.0
    var min_time = 999.0
    var spikes = 0

    for t in frame_times:
        sum += t
        max_time = maxf(max_time, t)
        min_time = minf(min_time, t)
        if t > 20.0:  # 超过 20ms 的帧
            spikes += 1

    var avg = sum / frame_times.size()
    var jank_rate = float(spikes) / frame_times.size() * 100.0

    print("=== 帧率分析 ===")
    print("平均: %.2f ms (%.0f FPS)" % [avg, 1000.0 / avg])
    print("最小: %.2f ms (%.0f FPS)" % [min_time, 1000.0 / min_time])
    print("最大: %.2f ms (%.0f FPS)" % [max_time, 1000.0 / max_time])
    print("卡顿率: %.1f%%" % jank_rate)

渲染优化

减少 DrawCall

技术说明效果
多维材质合并使用相同材质的网格⭐⭐⭐
MultiMeshInstance3D大量相同物体的实例化渲染⭐⭐⭐⭐⭐
图集 (Atlas)合并小纹理为大图集⭐⭐⭐
LOD距离相关细节层次⭐⭐⭐⭐
遮挡剔除不渲染被遮挡的物体⭐⭐⭐⭐

MultiMeshInstance3D 大规模渲染

# grass_renderer.gd
extends MultiMeshInstance3D

@export var grass_count: int = 10000
@export var spawn_area: Vector2 = Vector2(100, 100)
@export var grass_mesh: Mesh

func _ready():
    _generate_grass()

func _generate_grass():
    var multimesh = MultiMesh.new()
    multimesh.mesh = grass_mesh
    multimesh.instance_count = grass_count
    multimesh.transform_format = MultiMesh.TRANSFORM_3D

    for i in range(grass_count):
        var x = randf_range(-spawn_area.x / 2, spawn_area.x / 2)
        var z = randf_range(-spawn_area.y / 2, spawn_area.y / 2)
        var y = _get_terrain_height(x, z)

        var transform = Transform3D()
        transform.origin = Vector3(x, y, z)
        transform = transform.rotated(Vector3.UP, randf() * TAU)
        transform = transform.scaled(Vector3.ONE * randf_range(0.8, 1.2))

        multimesh.set_instance_transform(i, transform)

    self.multimesh = multimesh

func _get_terrain_height(x: float, z: float) -> float:
    # 从地形获取高度
    return 0.0  # 占位

💡 MultiMeshInstance3D 可以用单个 DrawCall 渲染数万个实例,是草地、树木、粒子等大量同质物体的最佳选择。

视锥剔除与可见性

# visibility_optimizer.gd
extends Node3D

@export var cull_distance: float = 100.0
var camera: Camera3D

func _ready():
    camera = get_viewport().get_camera_3d()

func _process(_delta):
    if not camera:
        return

    for child in get_children():
        if child is Node3D:
            var distance = camera.global_position.distance_to(child.global_position)
            child.visible = distance < cull_distance

GDScript 优化技巧

性能对比表

操作慢速写法快速写法提升
节点引用get_node("X") 每帧调用@onready var x = $X10x
类型检查is 运算符类型提示2x
字符串拼接+ 运算符% 格式化1.5x
数组查找in 运算符has() / Dictionary100x
向量计算逐分量操作Vector2/3 运算3x
循环内实例化new() 在循环内预先创建2x

实际优化示例

# ❌ 慢:每帧查找节点
func _process(delta):
    get_node("HealthBar").value = health
    get_node("ManaBar").value = mana

# ✅ 快:缓存节点引用
@onready var health_bar: ProgressBar = %HealthBar
@onready var mana_bar: ProgressBar = %ManaBar

func _process(delta):
    health_bar.value = health
    mana_bar.value = mana
# ❌ 慢:每帧创建新数组
func get_enemies():
    var enemies = []
    for node in get_tree().get_nodes_in_group("enemies"):
        if node.is_alive():
            enemies.append(node)
    return enemies

# ✅ 快:缓存结果,按需更新
var alive_enemies: Array[Node] = []
var enemy_cache_dirty: bool = true

func mark_enemy_cache_dirty():
    enemy_cache_dirty = true

func get_alive_enemies() -> Array[Node]:
    if enemy_cache_dirty:
        alive_enemies.clear()
        for node in get_tree().get_nodes_in_group("enemies"):
            if node.is_alive():
                alive_enemies.append(node)
        enemy_cache_dirty = false
    return alive_enemies
# ❌ 慢:使用数组查找
var unlocked_items: Array[String] = []

func is_unlocked(item_id: String) -> bool:
    return item_id in unlocked_items  # O(n)

# ✅ 快:使用 Dictionary
var unlocked_items: Dictionary = {}  # item_id -> true

func is_unlocked(item_id: String) -> bool:
    return unlocked_items.has(item_id)  # O(1)

对象池模式

# object_pool.gd
class_name ObjectPool
extends Node

@export var pooled_scene: PackedScene
@export var initial_size: int = 20
@export var auto_expand: bool = true

var available: Array[Node] = []
var in_use: Array[Node] = []

func _ready():
    for i in range(initial_size):
        var instance = _create_instance()
        available.append(instance)

func _create_instance() -> Node:
    var instance = pooled_scene.instantiate()
    instance.set_process(false)
    instance.set_physics_process(false)
    add_child(instance)
    return instance

func get_instance() -> Node:
    if available.is_empty():
        if auto_expand:
            var instance = _create_instance()
            available.append(instance)
        else:
            push_warning("对象池已空")
            return null

    var instance = available.pop_back()
    in_use.append(instance)

    instance.set_process(true)
    instance.set_physics_process(true)
    instance.show()

    if instance.has_method("on_spawn"):
        instance.on_spawn()

    return instance

func release_instance(instance: Node):
    if not instance in in_use:
        return

    in_use.erase(instance)
    available.append(instance)

    instance.set_process(false)
    instance.set_physics_process(false)
    instance.hide()

    if instance.has_method("on_despawn"):
        instance.on_despawn()

func release_all():
    for instance in in_use.duplicate():
        release_instance(instance)

func get_stats() -> Dictionary:
    return {
        "available": available.size(),
        "in_use": in_use.size(),
        "total": available.size() + in_use.size()
    }

使用示例

# bullet_manager.gd
extends Node

@onready var bullet_pool: ObjectPool = $BulletPool

func fire_bullet(origin: Vector3, direction: Vector3, speed: float):
    var bullet = bullet_pool.get_instance()
    if bullet:
        bullet.global_position = origin
        bullet.velocity = direction * speed

func _on_bullet_hit(bullet: Node):
    bullet_pool.release_instance(bullet)

LOD 系统

# lod_system.gd
extends Node3D

@export var lod_distances: Array[float] = [10.0, 25.0, 50.0]
@export var lod_meshes: Array[Mesh] = []
@export var billboard_distance: float = 80.0
@export var billboard_texture: Texture2D

@onready var mesh_instance: MeshInstance3D = $MeshInstance3D
@onready var billboard: Sprite3D = $Billboard

var camera: Camera3D

func _ready():
    camera = get_viewport().get_camera_3d()
    if billboard:
        billboard.texture = billboard_texture
        billboard.visible = false

func _process(_delta):
    if not camera:
        return

    var distance = global_position.distance_to(camera.global_position)

    # 距离超过最远 LOD 则隐藏
    if distance > billboard_distance + 10.0:
        mesh_instance.visible = false
        if billboard:
            billboard.visible = false
        return

    # Billboard 模式
    if distance > billboard_distance:
        mesh_instance.visible = false
        if billboard:
            billboard.visible = true
        return

    # 选择 LOD 级别
    if billboard:
        billboard.visible = false
    mesh_instance.visible = true

    for i in range(lod_distances.size()):
        if distance < lod_distances[i]:
            mesh_instance.mesh = lod_meshes[i]
            return

    # 使用最低 LOD
    mesh_instance.mesh = lod_meshes[lod_meshes.size() - 1]

多线程优化

# threaded_pathfinding.gd
extends Node

var thread: Thread
var mutex: Mutex
var semaphore: Semaphore
var path_requests: Array[Dictionary] = []
var path_results: Array[Dictionary] = []
var exit_thread: bool = false

func _ready():
    mutex = Mutex.new()
    semaphore = Semaphore.new()
    thread = Thread.new()
    thread.start(_pathfinding_thread)

func _exit_tree():
    mutex.lock()
    exit_thread = true
    mutex.unlock()
    semaphore.post()
    thread.wait_to_finish()

func request_path(from: Vector3, to: Vector3, callback: Callable):
    mutex.lock()
    path_requests.append({
        "from": from,
        "to": to,
        "callback": callback
    })
    mutex.unlock()
    semaphore.post()

func _pathfinding_thread():
    while true:
        semaphore.wait()

        mutex.lock()
        if exit_thread:
            mutex.unlock()
            return

        if path_requests.is_empty():
            mutex.unlock()
            continue

        var request = path_requests.pop_front()
        mutex.unlock()

        # 执行路径计算
        var path = _calculate_path(request.from, request.to)

        mutex.lock()
        path_results.append({
            "path": path,
            "callback": request.callback
        })
        mutex.unlock()

func _process(_delta):
    mutex.lock()
    var results = path_results.duplicate()
    path_results.clear()
    mutex.unlock()

    for result in results:
        result.callback.call(result.path)

func _calculate_path(from: Vector3, to: Vector3) -> PackedVector3Array:
    # 实际寻路逻辑
    var path = NavigationServer3D.map_get_path(
        get_world_3d().navigation_map, from, to, true
    )
    return path

⚠️ 注意:GDScript 中使用线程时,不要在工作线程中访问场景树节点。所有节点操作必须在主线程中完成。


内存泄漏检测

# memory_debugger.gd
extends Node

var object_counts: Dictionary = {}
var check_interval: float = 10.0
var time_accumulator: float = 0.0

func _process(delta):
    time_accumulator += delta
    if time_accumulator < check_interval:
        return
    time_accumulator = 0.0

    var current_counts = {}
    var object_count = Performance.get_monitor(Performance.OBJECT_COUNT)
    var node_count = Performance.get_monitor(Performance.OBJECT_NODE_COUNT)
    var orphan_count = Performance.get_monitor(Performance.OBJECT_ORPHAN_NODE_COUNT)

    print("=== 内存检查 ===")
    print("总对象数: %d" % object_count)
    print("节点数: %d" % node_count)
    print("孤儿节点: %d" % orphan_count)

    # 孤子节点是潜在的内存泄漏
    if orphan_count > 0:
        push_warning("检测到 %d 个孤儿节点!" % orphan_count)

    # 检测对象数增长
    if object_counts.has("total"):
        var diff = object_count - object_counts.total
        if diff > 100:
            push_warning("对象数增长了 %d,可能存在内存泄漏" % diff)

    object_counts = {
        "total": object_count,
        "nodes": node_count,
        "orphans": orphan_count
    }

💡 使用 Performance.OBJECT_ORPHAN_NODE_COUNT 可以快速发现未被场景树管理的节点,这些通常是内存泄漏的根源。


远程调试

# remote_debug.gd
extends Node

# 在导出版本中启用远程调试
func _ready():
    # 连接到编辑器的远程调试器
    # 通过命令行参数: --remote-debug tcp://编辑器IP:6007
    pass

# 条件编译:只在 debug 构建中执行
func debug_draw():
    if OS.is_debug_build():
        queue_redraw()

func _draw():
    if not OS.is_debug_build():
        return

    # 绘制调试信息
    draw_circle(Vector2.ZERO, 50, Color.RED)
    draw_string(ThemeDB.fallback_font, Vector2(10, 30),
        "FPS: %d" % Engine.get_frames_per_second())

性能基准测试

# benchmark.gd
extends Node

func run_benchmark():
    print("=== 性能基准测试 ===")

    # 测试数学运算
    _benchmark("数学运算", func():
        var result = 0.0
        for i in range(100000):
            result += sin(float(i)) * cos(float(i))
    )

    # 测试数组操作
    _benchmark("数组操作", func():
        var arr = []
        for i in range(10000):
            arr.append(i)
        arr.sort()
        arr.reverse()
    )

    # 测试字典操作
    _benchmark("字典操作", func():
        var dict = {}
        for i in range(10000):
            dict["key_%d" % i] = i
        for i in range(10000):
            dict.has("key_%d" % i)
    )

    # 测试节点操作
    _benchmark("节点创建/销毁", func():
        for i in range(1000):
            var node = Node.new()
            node.name = "test_%d" % i
            add_child(node)
            node.queue_free()
    )

func _benchmark(name: String, callable: Callable):
    var start = Time.get_ticks_usec()
    callable.call()
    var elapsed = Time.get_ticks_usec() - start
    print("%s: %.2f ms" % [name, elapsed / 1000.0])

💡 扩展阅读