第 09 章:SQL 与图混合查询
第 09 章:SQL 与图混合查询
9.1 为什么需要混合查询?
AgensGraph 的核心优势之一是可以在同一个数据库中同时使用关系型数据和图数据。混合查询让我们能够:
典型混合场景:
关系型数据: 图数据:
┌──────────────┐ ┌──────────────┐
│ orders │ │ (Person) │
│ ├ id │ │ │ │
│ ├ user_id │ ◀─ 关联 ──▶ │ -[:KNOWS]─▶│
│ ├ amount │ │ (Company) │
│ └ status │ │ │
└──────────────┘ └──────────────┘
需求: "找出张三的社交圈子中,所有下过金额超过 1000 元订单的用户"
→ 需要同时使用图遍历和 SQL 查询
9.2 cypher() 函数:SQL 中嵌入 Cypher
9.2.1 基本语法
-- 在 SQL 中调用 Cypher 查询
SELECT *
FROM cypher('social_network', $$
MATCH (p:Person)-[:KNOWS]->(f:Person)
RETURN p.name AS person, f.name AS friend
$$) AS (person text, friend text);
9.2.2 参数说明
| 参数 | 说明 | 示例 |
|---|
| 第 1 参数 | 图名称 | 'social_network' |
| 第 2 参数 | Cypher 查询(用 $$ 包裹) | $$ MATCH (n) RETURN n $$ |
| 返回列 | 必须显式定义类型 | AS (col1 text, col2 int) |
9.2.3 结果类型映射
| Cypher 类型 | SQL 类型 | 说明 |
|---|
| Integer | int / bigint | 整数 |
| Float | float / real | 浮点数 |
| String | text / varchar | 字符串 |
| Boolean | boolean | 布尔值 |
| Vertex | vertex | 顶点类型 |
| Edge | edge | 边类型 |
| Path | path | 路径类型 |
9.3 混合查询实战
9.3.1 场景:图遍历结果与关系表 JOIN
-- 创建关系型订单表
CREATE TABLE IF NOT EXISTS orders (
order_id SERIAL PRIMARY KEY,
user_name TEXT NOT NULL,
amount DECIMAL(10,2) NOT NULL,
status TEXT DEFAULT 'pending',
created_at TIMESTAMP DEFAULT now()
);
-- 插入订单数据
INSERT INTO orders (user_name, amount, status) VALUES
('Alice', 1500.00, 'completed'),
('Bob', 800.00, 'completed'),
('Carol', 2500.00, 'completed'),
('Dave', 300.00, 'pending'),
('Alice', 450.00, 'completed');
-- 混合查询:Alice 的朋友中谁下过大额订单?
SELECT cf.friend, o.amount, o.status
FROM cypher('social_network', $$
MATCH (p:Person {name: 'Alice'})-[:KNOWS]->(f:Person)
RETURN f.name AS friend
$$) AS cf(friend text)
JOIN orders o ON cf.friend = o.user_name
WHERE o.amount > 1000;
9.3.2 场景:SQL 结果驱动图查询
-- 先用 SQL 找出 VIP 用户,再用 Cypher 查找他们的社交圈
SELECT vip.user_name, cf.social_circle
FROM (
SELECT user_name, SUM(amount) AS total_spent
FROM orders
WHERE status = 'completed'
GROUP BY user_name
HAVING SUM(amount) > 1000
) vip
JOIN LATERAL (
SELECT string_agg(friend, ', ') AS social_circle
FROM cypher('social_network', $$
MATCH (p:Person {name: $username})-[:KNOWS]->(f:Person)
RETURN f.name AS friend
$$) AS cf(friend text)
WHERE EXISTS (
SELECT 1 FROM cypher('social_network', $$
MATCH (p:Person {name: $username})
RETURN p
$$) AS c(v vertex)
)
) cf ON true;
9.3.3 场景:图查询结果写入关系表
-- 将图遍历结果持久化到关系表
CREATE TABLE IF NOT EXISTS social_analysis (
person TEXT,
friend_count INT,
analyzed_at TIMESTAMP DEFAULT now()
);
INSERT INTO social_analysis (person, friend_count)
SELECT person, friend_count
FROM cypher('social_network', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(f:Person)
RETURN p.name AS person, count(f) AS friend_count
$$) AS (person text, friend_count int);
9.4 图视图(Graph View)
9.4.1 SQL 视图包装 Cypher 查询
-- 创建视图:简化常用查询
CREATE OR REPLACE VIEW v_person_friends AS
SELECT person, friend, since
FROM cypher('social_network', $$
MATCH (p:Person)-[r:KNOWS]->(f:Person)
RETURN p.name AS person, f.name AS friend, r.since AS since
$$) AS (person text, friend text, since int);
-- 使用视图(像普通表一样查询)
SELECT * FROM v_person_friends WHERE person = 'Alice';
-- 基于视图创建更复杂的查询
SELECT friend, count(*) AS mutual_friend_count
FROM v_person_friends
WHERE person IN (
SELECT friend FROM v_person_friends WHERE person = 'Alice'
)
AND friend <> 'Alice'
GROUP BY friend
ORDER BY mutual_friend_count DESC;
9.4.2 图分析物化视图
-- 创建物化视图(定期刷新)
CREATE MATERIALIZED VIEW mv_social_metrics AS
SELECT person, friend_count, avg_friend_age
FROM cypher('social_network', $$
MATCH (p:Person)
OPTIONAL MATCH (p)-[:KNOWS]->(f:Person)
RETURN p.name AS person,
count(f) AS friend_count,
avg(f.age) AS avg_friend_age
$$) AS (person text, friend_count int, avg_friend_age float);
-- 创建索引加速物化视图查询
CREATE INDEX ON mv_social_metrics(person);
CREATE INDEX ON mv_social_metrics(friend_count);
-- 定期刷新
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_social_metrics;
9.5 联合查询模式
9.5.1 UNION 混合查询
-- 合并来自图和关系表的数据
SELECT 'graph' AS source, person_name, score
FROM cypher('social_network', $$
MATCH (p:Person)
RETURN p.name AS person_name, p.age * 10 AS score
$$) AS (person_name text, score int)
UNION ALL
SELECT 'crm' AS source, customer_name, loyalty_score
FROM crm_customers
WHERE is_active = true;
9.5.2 CTE + 图查询
-- 使用 CTE 将复杂查询分解为步骤
WITH vip_users AS (
SELECT user_name, SUM(amount) AS total_spent
FROM orders
WHERE status = 'completed'
GROUP BY user_name
HAVING SUM(amount) > 1000
),
social_data AS (
SELECT person, friend, since
FROM cypher('social_network', $$
MATCH (p:Person)-[r:KNOWS]->(f:Person)
RETURN p.name AS person, f.name AS friend, r.since AS since
$$) AS (person text, friend text, since int)
)
SELECT
v.user_name,
v.total_spent,
s.friend,
s.since AS friends_since
FROM vip_users v
JOIN social_data s ON v.user_name = s.person
ORDER BY v.total_spent DESC;
9.6 图与关系数据互操作
9.6.1 关系数据导入图
-- 步骤 1: 从关系表创建顶点
-- 使用 SQL 生成 Cypher 语句批量执行
SELECT cypher('social_network', format($$
CREATE (:Customer {name: %L, email: %L, tier: %L})
$$, name, email, tier))
FROM crm_customers
WHERE is_active = true;
-- 步骤 2: 从关系数据创建边
-- 先从 SQL 获取共同字段,再用 Cypher 创建关系
-- 先创建约束确保 MERGE 有效
CREATE CONSTRAINT customer_name_unique
ON (c:Customer)
ASSERT c.name IS UNIQUE;
9.6.2 图数据导出到关系表
-- 导出社交关系到关系表
CREATE TABLE IF NOT EXISTS exported_relationships (
person TEXT,
friend TEXT,
since INT,
exported_at TIMESTAMP DEFAULT now()
);
INSERT INTO exported_relationships (person, friend, since)
SELECT person, friend, since
FROM cypher('social_network', $$
MATCH (p:Person)-[r:KNOWS]->(f:Person)
RETURN p.name AS person, f.name AS friend, r.since AS since
$$) AS (person text, friend text, since int);
9.7 性能优化
9.7.1 cypher() 函数性能注意事项
-- ❌ 低效:在循环中调用 cypher()
-- 每次调用都有解析开销
SELECT user_name,
(SELECT count(*) FROM cypher('network', $$
MATCH (p:Person {name: $$ || quote_literal(user_name) || $$})-[:KNOWS]->(f)
RETURN f
$$) AS c(cnt int))
FROM orders;
-- ✅ 高效:批量处理
WITH social AS (
SELECT person, count(friend) AS friend_count
FROM cypher('social_network', $$
MATCH (p:Person)-[:KNOWS]->(f:Person)
RETURN p.name AS person, f.name AS friend
$$) AS (person text, friend text)
GROUP BY person
)
SELECT o.user_name, o.amount, s.friend_count
FROM orders o
LEFT JOIN social s ON o.user_name = s.person;
9.7.2 优化建议总结
| 策略 | 说明 |
|---|
| 减少 cypher() 调用次数 | 批量获取图数据,避免循环调用 |
| 使用物化视图 | 缓存频繁使用的图查询结果 |
| 合理使用索引 | 为 JOIN 条件创建索引 |
| 过滤条件提前 | 在 cypher() 内部尽量过滤 |
| EXPLAIN 分析 | 分析混合查询的执行计划 |
9.8 业务场景:金融风控混合查询
场景描述
银行需要检测可疑交易:找出"交易链路中存在高风险关系"且"金额超过阈值"的交易。
-- 创建交易表(关系型)
CREATE TABLE transactions (
txn_id SERIAL PRIMARY KEY,
from_account TEXT,
to_account TEXT,
amount DECIMAL(15,2),
txn_time TIMESTAMP,
risk_flag TEXT
);
-- 创建账户关系图
SET graph_path = risk_network;
CREATE (:Account {id: 'A001', name: '账户1', risk_level: 'low'});
CREATE (:Account {id: 'A002', name: '账户2', risk_level: 'high'});
-- ... 更多账户数据
-- 风控混合查询:交易链路分析
WITH suspicious_accounts AS (
SELECT account_id
FROM cypher('risk_network', $$
MATCH (a:Account {risk_level: 'high'})-[:TRANSFERS_TO*1..3]->(target:Account)
RETURN DISTINCT target.id AS account_id
$$) AS (account_id text)
),
high_value_txns AS (
SELECT txn_id, from_account, to_account, amount, txn_time
FROM transactions
WHERE amount > 50000
AND txn_time > now() - interval '24 hours'
)
SELECT
h.txn_id,
h.from_account,
h.to_account,
h.amount,
h.txn_time,
CASE
WHEN s.account_id IS NOT NULL THEN 'HIGH_RISK'
ELSE 'NORMAL'
END AS risk_assessment
FROM high_value_txns h
LEFT JOIN suspicious_accounts s
ON h.from_account = s.account_id
OR h.to_account = s.account_id
ORDER BY h.amount DESC;
9.9 存储过程中的图操作
-- 在 PL/pgSQL 函数中使用图查询
CREATE OR REPLACE FUNCTION get_friend_recommendations(
p_person_name TEXT,
p_limit INT DEFAULT 10
)
RETURNS TABLE(recommended_name TEXT, mutual_friends BIGINT)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY
SELECT candidate, mutual_count
FROM cypher('social_network', format($$
MATCH (p:Person {name: %L})-[:KNOWS]->(mutual)-[:KNOWS]->(candidate:Person)
WHERE NOT (p)-[:KNOWS]->(candidate)
AND candidate.name <> %L
RETURN candidate.name AS candidate, count(DISTINCT mutual) AS mutual_count
ORDER BY mutual_count DESC
LIMIT %s
$$, p_person_name, p_person_name, p_limit))
AS (candidate text, mutual_count bigint);
END;
$$;
-- 调用函数
SELECT * FROM get_friend_recommendations('Alice', 5);
9.10 本章小结
| 要点 | 说明 |
|---|
| cypher() 函数 | 在 SQL 中嵌入 Cypher 查询 |
| 结果类型 | 必须显式定义返回列和类型 |
| 混合 JOIN | 图查询结果可与关系表 JOIN |
| 视图包装 | 用 SQL 视图封装常用图查询 |
| 物化视图 | 缓存频繁使用的图查询结果 |
| 性能 | 批量处理,减少 cypher() 调用次数 |
9.11 练习
- 创建一个 SQL 视图,封装 “每个人的朋友数量” 的 Cypher 查询。
- 使用 CTE + cypher() 实现:找出所有 VIP 用户(消费总额 > 5000)的二度好友。
- 编写一个 PL/pgSQL 函数,输入用户名,返回该用户的社交网络分析报告。
9.12 扩展阅读