第 14 章:性能优化
第 14 章:性能优化
14.1 性能优化总览
空间查询性能优化的核心思路:
优化层次
├── 1. 数据层优化
│ ├── 选择合适的几何类型
│ ├── 选择合适的 SRID(投影 vs 地理)
│ └── 简化几何精度
│
├── 2. 索引层优化
│ ├── 创建空间索引
│ ├── 使用部分索引
│ └── 选择合适的索引类型
│
├── 3. 查询层优化
│ ├── 使用空间索引操作符
│ ├── 两步过滤法
│ └── 避免函数包裹索引列
│
├── 4. 架构层优化
│ ├── 表分区
│ ├── 读写分离
│ └── 预计算
│
└── 5. 配置层优化
├── shared_buffers
├── work_mem
└── effective_cache_size
14.2 几何简化
ST_Simplify
-- 使用 Douglas-Peucker 算法简化几何
-- 原始多边形可能有数千个点
SELECT ST_NPoints(geom) AS original_points
FROM districts WHERE name = '北京市';
-- 简化(容差越大,简化越激进)
SELECT
ST_NPoints(ST_Simplify(geom, 0.001)) AS simplified_1,
ST_NPoints(ST_Simplify(geom, 0.005)) AS simplified_2,
ST_NPoints(ST_Simplify(geom, 0.01)) AS simplified_3
FROM districts WHERE name = '北京市';
-- 创建简化后的列用于快速查询
ALTER TABLE districts ADD COLUMN geom_simple GEOMETRY(MultiPolygon, 4326);
UPDATE districts
SET geom_simple = ST_Simplify(geom, 0.001);
CREATE INDEX idx_districts_geom_simple ON districts USING GIST(geom_simple);
-- 近似查询使用简化几何
SELECT name FROM districts
WHERE geom_simple && ST_SetSRID(ST_MakePoint(116.4, 39.9), 4326);
-- 精确查询使用原始几何
SELECT name FROM districts
WHERE geom_simple && ST_SetSRID(ST_MakePoint(116.4, 39.9), 4326)
AND ST_Contains(geom, ST_SetSRID(ST_MakePoint(116.4, 39.9), 4326));
ST_SimplifyPreserveTopology
-- 保持拓扑的简化(避免自相交)
SELECT ST_SimplifyPreserveTopology(geom, 0.001)
FROM districts;
精度降低
-- 降低坐标精度(减少小数位数)
-- 适用于初始数据精度远超实际需求的场景
UPDATE pois
SET geom = ST_GeomFromText(
ST_AsText(geom, 6), -- 保留 6 位小数(约 0.1 米精度)
4326
);
14.3 索引策略
空间索引类型选择
-- GiST(默认推荐)
CREATE INDEX idx_table_geom ON table USING GIST(geom);
-- SP-GiST(大量随机点)
CREATE INDEX idx_table_geom ON table USING SPGIST(geom);
-- BRIN(物理有序的大表)
CREATE INDEX idx_table_geom ON table USING BRIN(geom);
部分索引
-- 只对活跃数据建索引
CREATE INDEX idx_events_active_geom
ON events USING GIST(geom)
WHERE status = 'active' AND created_at > now() - INTERVAL '30 days';
-- 只对特定类型建索引
CREATE INDEX idx_poi_restaurant_geom
ON pois USING GIST(geom)
WHERE category = '餐厅';
多列索引策略
-- PostgreSQL 不支持 GiST + B-tree 混合索引
-- 替代方案:分步查询
-- 方案 1: 先 B-tree 过滤,再 GiST 过滤
SELECT * FROM stores
WHERE store_type = '旗舰' -- B-tree 索引
AND geom && target_geom; -- GiST 索引
CREATE INDEX idx_stores_type ON stores(store_type);
CREATE INDEX idx_stores_geom ON stores USING GIST(geom);
-- 方案 2: 使用 BRIN 索引(如果数据物理有序)
-- 适用于按时间插入的 GPS 轨迹数据
14.4 查询优化技巧
两步过滤法
-- ❌ 慢:直接使用复杂空间函数
SELECT name FROM stores
WHERE ST_Contains(
(SELECT geom FROM districts WHERE name = '朝阳区'),
stores.geom
);
-- ✅ 快:先用边界框过滤,再用精确函数
SELECT name FROM stores
WHERE geom && (SELECT geom FROM districts WHERE name = '朝阳区')
AND ST_Contains(
(SELECT geom FROM districts WHERE name = '朝阳区'),
geom
);
避免对索引列使用函数
-- ❌ 错误:函数包裹索引列,索引无法使用
SELECT name FROM stores
WHERE ST_AsText(geom) LIKE 'POINT(116%';
-- ✅ 正确:直接对几何列使用索引操作符
SELECT name FROM stores
WHERE geom && ST_MakeEnvelope(116, 39, 117, 40, 4326);
-- ❌ 错误:类型转换包裹索引列
SELECT name FROM stores
WHERE geom::geography && target::geography;
-- ✅ 正确:创建 Geography 列或使用 Geography 索引
ALTER TABLE stores ADD COLUMN geog geography(Point, 4326);
UPDATE stores SET geog = geom::geography;
CREATE INDEX idx_stores_geog ON stores USING GIST(geog);
使用 LATERAL JOIN 优化
-- ❌ 慢:相关子查询
SELECT d.name,
(SELECT s.name FROM stores s
ORDER BY s.geom <-> d.geom LIMIT 1) AS nearest_store
FROM districts d;
-- ✅ 快:LATERAL JOIN
SELECT d.name, s.name AS nearest_store
FROM districts d
CROSS JOIN LATERAL (
SELECT name FROM stores
ORDER BY geom <-> d.geom
LIMIT 1
) s;
KNN 查询优化
-- 使用 <-> 操作符进行 KNN 查询
-- 必须确保 ORDER BY ... LIMIT 才能利用索引
SELECT name, ST_Distance(geom::geography, target::geography) AS distance
FROM stores
ORDER BY geom <-> target -- 利用 GiST 索引排序
LIMIT 10;
-- 对比:不使用 LIMIT 时,<-> 不会利用索引
SELECT name, ST_Distance(geom::geography, target::geography) AS distance
FROM stores
WHERE ST_DWithin(geom::geography, target::geography, 5000) -- 先缩小范围
ORDER BY geom <-> target -- 再 KNN 排序
LIMIT 10;
14.5 查询计划分析
EXPLAIN ANALYZE 详解
EXPLAIN (ANALYZE, BUFFERS, VERBOSE, FORMAT TEXT)
SELECT name FROM stores
WHERE ST_DWithin(
geom::geography,
ST_SetSRID(ST_MakePoint(116.4074, 39.9042), 4326)::geography,
3000
);
关键指标解读
Index Scan using idx_stores_geom on public.stores
Output: name
Index Cond: (stores.geom && st_expand(...))
Filter: st_dwithin((stores.geom)::geography, ..., 3000)
Rows Removed by Filter: 5
Heap Fetches: 0
Shared Hit Blocks: 12
Shared Read Blocks: 0
Planning Time: 0.234 ms
Execution Time: 0.876 ms
| 指标 | 健康值 | 问题指示 |
|---|
| Index Scan | ✅ | Seq Scan = 未使用索引 |
| Rows Removed by Filter | < 100 | 高值 = 索引选择性差 |
| Heap Fetches | 0 | 高值 = 需要 VACUUM |
| Shared Hit | 高 | Shared Read = 缓存未命中 |
| Planning Time | < 5ms | 高值 = 查询太复杂 |
| Execution Time | 取决于场景 | 关注趋势变化 |
常见问题排查
-- 问题:Seq Scan 而非 Index Scan
-- 可能原因 1: 未创建索引
CREATE INDEX idx_geom ON table USING GIST(geom);
-- 可能原因 2: 统计信息过时
ANALYZE table_name;
-- 可能原因 3: 表太小,优化器认为全表扫描更快
-- 这是正常的,无需优化
-- 可能原因 4: 函数包裹了索引列
-- 参考上文"避免对索引列使用函数"
-- 问题:Index Scan 但很慢
-- 可能原因: 查询范围太大,返回行数过多
-- 解决: 缩小查询范围或增加 LIMIT
14.6 表分区
按范围分区(时间序列空间数据)
-- 创建分区父表
CREATE TABLE gps_tracks (
id BIGSERIAL,
device_id INTEGER,
recorded_at TIMESTAMPTZ NOT NULL,
speed NUMERIC(5,2),
geom GEOMETRY(Point, 4326)
) PARTITION BY RANGE (recorded_at);
-- 创建月度分区
CREATE TABLE gps_tracks_2025_01 PARTITION OF gps_tracks
FOR VALUES FROM ('2025-01-01') TO ('2025-02-01');
CREATE TABLE gps_tracks_2025_02 PARTITION OF gps_tracks
FOR VALUES FROM ('2025-02-01') TO ('2025-03-01');
-- ... 继续创建
-- 每个分区独立创建索引
CREATE INDEX idx_tracks_2025_01_geom ON gps_tracks_2025_01 USING GIST(geom);
CREATE INDEX idx_tracks_2025_02_geom ON gps_tracks_2025_02 USING GIST(geom);
-- 自动创建分区(使用 pg_partman 扩展)
CREATE EXTENSION IF NOT EXISTS partman;
SELECT partman.create_parent(
'public.gps_tracks',
'recorded_at',
'native',
'monthly'
);
按列表分区(按区域)
CREATE TABLE pois (
id SERIAL,
name TEXT,
region VARCHAR(20) NOT NULL,
geom GEOMETRY(Point, 4326)
) PARTITION BY LIST (region);
CREATE TABLE pois_east PARTITION OF pois FOR VALUES IN ('华东', '华南');
CREATE TABLE pois_north PARTITION OF pois FOR VALUES IN ('华北', '东北');
CREATE TABLE pois_central PARTITION OF pois FOR VALUES IN ('华中', '西南', '西北');
按空间分区
-- 使用 H3 或 S2 空间分区索引
-- 方案:在表中添加 H3 索引列
CREATE EXTENSION IF NOT EXISTS h3;
ALTER TABLE pois ADD COLUMN h3_index h3index;
UPDATE pois SET h3_index = h3_geo_to_h3(ST_Y(geom), ST_X(geom), 7);
CREATE INDEX idx_pois_h3 ON pois(h3_index);
-- 查询时先用 H3 索引过滤
SELECT * FROM pois
WHERE h3_index IN (
SELECT h3_geo_to_h3(39.9042, 116.4074, 7) -- 目标 H3 格
UNION
SELECT h3_k_ring(h3_geo_to_h3(39.9042, 116.4074, 7), 1) -- 相邻格
);
14.7 预计算
预计算距离矩阵
-- 预计算热点区域的距离矩阵
CREATE TABLE distance_matrix AS
SELECT
a.id AS from_id,
b.id AS to_id,
ROUND(ST_Distance(a.geom::geography, b.geom::geography)) AS distance_m
FROM hotspots a, hotspots b
WHERE a.id < b.id;
CREATE INDEX idx_dm_from ON distance_matrix(from_id);
CREATE INDEX idx_dm_to ON distance_matrix(to_id);
-- 查询时直接使用预计算结果
SELECT from_id, to_id, distance_m
FROM distance_matrix
WHERE from_id = 1
ORDER BY distance_m
LIMIT 5;
物化视图
-- 创建空间物化视图
CREATE MATERIALIZED VIEW mv_store_coverage AS
SELECT
store_id,
ST_Union(ST_Buffer(geom::geography, 3000)::geometry) AS coverage_3km
FROM stores
GROUP BY store_id;
CREATE UNIQUE INDEX idx_mv_store_coverage ON mv_store_coverage(store_id);
CREATE INDEX idx_mv_store_coverage_geom ON mv_store_coverage USING GIST(coverage_3km);
-- 定期刷新
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_store_coverage;
14.8 数据库配置优化
关键参数
-- 查看当前配置
SHOW shared_buffers; -- 建议: 总内存的 25%
SHOW effective_cache_size; -- 建议: 总内存的 50-75%
SHOW work_mem; -- 建议: 256MB-1GB (排序/哈希操作)
SHOW maintenance_work_mem; -- 建议: 1-2GB (VACUUM/CREATE INDEX)
SHOW random_page_cost; -- SSD 设为 1.1
SHOW effective_io_concurrency; -- SSD 设为 200
-- 空间查询优化建议
SET work_mem = '512MB'; -- 复杂空间连接需要更多内存
SET enable_seqscan = off; -- 仅调试时使用,强制使用索引
SET random_page_cost = 1.1; -- SSD 存储
统计信息优化
-- 增加几何列的统计精度
ALTER TABLE stores ALTER COLUMN geom SET STATISTICS 1000;
ANALYZE stores;
-- 查看统计信息
SELECT
attname,
n_distinct,
most_common_vals,
histogram_bounds
FROM pg_stats
WHERE tablename = 'stores' AND attname = 'geom';
14.9 并行查询
-- 启用并行查询
SET max_parallel_workers_per_gather = 4;
SET parallel_tuple_cost = 0.01;
SET parallel_setup_cost = 100;
-- 查看查询计划是否使用并行
EXPLAIN ANALYZE
SELECT count(*) FROM pois
WHERE geom && ST_MakeEnvelope(116, 39, 117, 40, 4326);
-- 并行创建索引
CREATE INDEX CONCURRENTLY idx_pois_geom ON pois USING GIST(geom);
14.10 监控与诊断
空间查询监控
-- 查看活跃的空间查询
SELECT pid, query, state, wait_event_type, now() - query_start AS duration
FROM pg_stat_activity
WHERE query ILIKE '%st_%'
AND state = 'active';
-- 终止长时间运行的空间查询
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE query ILIKE '%st_%'
AND now() - query_start > INTERVAL '5 minutes';
-- 索引使用统计
SELECT
schemaname, tablename, indexname,
idx_scan, idx_tup_read, idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
JOIN pg_index USING (indexrelid)
WHERE idx_scan > 0
ORDER BY idx_tup_read DESC;
表膨胀检查
-- 检查表膨胀
SELECT
schemaname, tablename,
pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) AS total_size,
n_dead_tup,
n_live_tup,
ROUND(n_dead_tup::NUMERIC / GREATEST(n_live_tup, 1) * 100, 1) AS dead_pct
FROM pg_stat_user_tables
WHERE n_dead_tup > 10000
ORDER BY n_dead_tup DESC;
-- 重建膨胀的索引
REINDEX INDEX CONCURRENTLY idx_stores_geom;
14.11 性能基准测试
-- 创建基准测试函数
CREATE OR REPLACE FUNCTION benchmark_spatial_query(
query_text TEXT,
iterations INTEGER DEFAULT 10
) RETURNS TABLE(
avg_time NUMERIC,
min_time NUMERIC,
max_time NUMERIC,
stddev_time NUMERIC
) AS $$
DECLARE
times NUMERIC[];
start_time TIMESTAMPTZ;
elapsed NUMERIC;
BEGIN
FOR i IN 1..iterations LOOP
start_time := clock_timestamp();
EXECUTE query_text;
elapsed := EXTRACT(EPOCH FROM (clock_timestamp() - start_time)) * 1000;
times := array_append(times, elapsed);
END LOOP;
RETURN QUERY
SELECT
ROUND(avg(t), 2),
ROUND(min(t), 2),
ROUND(max(t), 2),
ROUND(stddev(t), 2)
FROM unnest(times) AS t;
END;
$$ LANGUAGE plpgsql;
-- 使用
SELECT * FROM benchmark_spatial_query(
$$SELECT count(*) FROM pois WHERE geom && ST_MakeEnvelope(116, 39, 117, 40, 4326)$$,
20
);
14.12 本章小结
| 优化层次 | 关键措施 | 预期提升 |
|---|
| 数据简化 | ST_Simplify, 降低精度 | 2-10x |
| 空间索引 | GiST/SP-GiST/BRIN | 10-100x |
| 两步过滤 | && 粗过滤 + 精确判断 | 2-5x |
| 表分区 | 按时间/区域分区 | 2-10x |
| 预计算 | 物化视图、距离矩阵 | 10-100x |
| 配置调优 | shared_buffers, work_mem | 1.5-3x |
扩展阅读