强曰为道

与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

第 09 章:SQL 与图混合查询

第 09 章:SQL 与图混合查询

9.1 为什么需要混合查询?

AgensGraph 的核心优势之一是可以在同一个数据库中同时使用关系型数据和图数据。混合查询让我们能够:

典型混合场景:

  关系型数据:                        图数据:
  ┌──────────────┐                  ┌──────────────┐
  │ orders       │                  │ (Person)     │
  │ ├ id         │                  │   │          │
  │ ├ user_id    │  ◀─ 关联 ──▶    │   -[:KNOWS]─▶│
  │ ├ amount     │                  │ (Company)    │
  │ └ status     │                  │              │
  └──────────────┘                  └──────────────┘

  需求: "找出张三的社交圈子中,所有下过金额超过 1000 元订单的用户"
  → 需要同时使用图遍历和 SQL 查询

9.2 cypher() 函数:SQL 中嵌入 Cypher

9.2.1 基本语法

-- 在 SQL 中调用 Cypher 查询
SELECT *
FROM cypher('social_network', $$
  MATCH (p:Person)-[:KNOWS]->(f:Person)
  RETURN p.name AS person, f.name AS friend
$$) AS (person text, friend text);

9.2.2 参数说明

参数说明示例
第 1 参数图名称'social_network'
第 2 参数Cypher 查询(用 $$ 包裹)$$ MATCH (n) RETURN n $$
返回列必须显式定义类型AS (col1 text, col2 int)

9.2.3 结果类型映射

Cypher 类型SQL 类型说明
Integerint / bigint整数
Floatfloat / real浮点数
Stringtext / varchar字符串
Booleanboolean布尔值
Vertexvertex顶点类型
Edgeedge边类型
Pathpath路径类型

9.3 混合查询实战

9.3.1 场景:图遍历结果与关系表 JOIN

-- 创建关系型订单表
CREATE TABLE IF NOT EXISTS orders (
    order_id    SERIAL PRIMARY KEY,
    user_name   TEXT NOT NULL,
    amount      DECIMAL(10,2) NOT NULL,
    status      TEXT DEFAULT 'pending',
    created_at  TIMESTAMP DEFAULT now()
);

-- 插入订单数据
INSERT INTO orders (user_name, amount, status) VALUES
    ('Alice', 1500.00, 'completed'),
    ('Bob', 800.00, 'completed'),
    ('Carol', 2500.00, 'completed'),
    ('Dave', 300.00, 'pending'),
    ('Alice', 450.00, 'completed');
-- 混合查询:Alice 的朋友中谁下过大额订单?
SELECT cf.friend, o.amount, o.status
FROM cypher('social_network', $$
  MATCH (p:Person {name: 'Alice'})-[:KNOWS]->(f:Person)
  RETURN f.name AS friend
$$) AS cf(friend text)
JOIN orders o ON cf.friend = o.user_name
WHERE o.amount > 1000;

9.3.2 场景:SQL 结果驱动图查询

-- 先用 SQL 找出 VIP 用户,再用 Cypher 查找他们的社交圈
SELECT vip.user_name, cf.social_circle
FROM (
    SELECT user_name, SUM(amount) AS total_spent
    FROM orders
    WHERE status = 'completed'
    GROUP BY user_name
    HAVING SUM(amount) > 1000
) vip
JOIN LATERAL (
    SELECT string_agg(friend, ', ') AS social_circle
    FROM cypher('social_network', $$
      MATCH (p:Person {name: $username})-[:KNOWS]->(f:Person)
      RETURN f.name AS friend
    $$) AS cf(friend text)
    WHERE EXISTS (
        SELECT 1 FROM cypher('social_network', $$
          MATCH (p:Person {name: $username})
          RETURN p
        $$) AS c(v vertex)
    )
) cf ON true;

9.3.3 场景:图查询结果写入关系表

-- 将图遍历结果持久化到关系表
CREATE TABLE IF NOT EXISTS social_analysis (
    person      TEXT,
    friend_count INT,
    analyzed_at  TIMESTAMP DEFAULT now()
);

INSERT INTO social_analysis (person, friend_count)
SELECT person, friend_count
FROM cypher('social_network', $$
  MATCH (p:Person)
  OPTIONAL MATCH (p)-[:KNOWS]->(f:Person)
  RETURN p.name AS person, count(f) AS friend_count
$$) AS (person text, friend_count int);

9.4 图视图(Graph View)

9.4.1 SQL 视图包装 Cypher 查询

-- 创建视图:简化常用查询
CREATE OR REPLACE VIEW v_person_friends AS
SELECT person, friend, since
FROM cypher('social_network', $$
  MATCH (p:Person)-[r:KNOWS]->(f:Person)
  RETURN p.name AS person, f.name AS friend, r.since AS since
$$) AS (person text, friend text, since int);

-- 使用视图(像普通表一样查询)
SELECT * FROM v_person_friends WHERE person = 'Alice';

-- 基于视图创建更复杂的查询
SELECT friend, count(*) AS mutual_friend_count
FROM v_person_friends
WHERE person IN (
    SELECT friend FROM v_person_friends WHERE person = 'Alice'
)
AND friend <> 'Alice'
GROUP BY friend
ORDER BY mutual_friend_count DESC;

9.4.2 图分析物化视图

-- 创建物化视图(定期刷新)
CREATE MATERIALIZED VIEW mv_social_metrics AS
SELECT person, friend_count, avg_friend_age
FROM cypher('social_network', $$
  MATCH (p:Person)
  OPTIONAL MATCH (p)-[:KNOWS]->(f:Person)
  RETURN p.name AS person,
         count(f) AS friend_count,
         avg(f.age) AS avg_friend_age
$$) AS (person text, friend_count int, avg_friend_age float);

-- 创建索引加速物化视图查询
CREATE INDEX ON mv_social_metrics(person);
CREATE INDEX ON mv_social_metrics(friend_count);

-- 定期刷新
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_social_metrics;

9.5 联合查询模式

9.5.1 UNION 混合查询

-- 合并来自图和关系表的数据
SELECT 'graph' AS source, person_name, score
FROM cypher('social_network', $$
  MATCH (p:Person)
  RETURN p.name AS person_name, p.age * 10 AS score
$$) AS (person_name text, score int)

UNION ALL

SELECT 'crm' AS source, customer_name, loyalty_score
FROM crm_customers
WHERE is_active = true;

9.5.2 CTE + 图查询

-- 使用 CTE 将复杂查询分解为步骤
WITH vip_users AS (
    SELECT user_name, SUM(amount) AS total_spent
    FROM orders
    WHERE status = 'completed'
    GROUP BY user_name
    HAVING SUM(amount) > 1000
),
social_data AS (
    SELECT person, friend, since
    FROM cypher('social_network', $$
      MATCH (p:Person)-[r:KNOWS]->(f:Person)
      RETURN p.name AS person, f.name AS friend, r.since AS since
    $$) AS (person text, friend text, since int)
)
SELECT
    v.user_name,
    v.total_spent,
    s.friend,
    s.since AS friends_since
FROM vip_users v
JOIN social_data s ON v.user_name = s.person
ORDER BY v.total_spent DESC;

9.6 图与关系数据互操作

9.6.1 关系数据导入图

-- 步骤 1: 从关系表创建顶点
-- 使用 SQL 生成 Cypher 语句批量执行
SELECT cypher('social_network', format($$
  CREATE (:Customer {name: %L, email: %L, tier: %L})
$$, name, email, tier))
FROM crm_customers
WHERE is_active = true;
-- 步骤 2: 从关系数据创建边
-- 先从 SQL 获取共同字段,再用 Cypher 创建关系
-- 先创建约束确保 MERGE 有效
CREATE CONSTRAINT customer_name_unique
  ON (c:Customer)
  ASSERT c.name IS UNIQUE;

9.6.2 图数据导出到关系表

-- 导出社交关系到关系表
CREATE TABLE IF NOT EXISTS exported_relationships (
    person      TEXT,
    friend      TEXT,
    since       INT,
    exported_at TIMESTAMP DEFAULT now()
);

INSERT INTO exported_relationships (person, friend, since)
SELECT person, friend, since
FROM cypher('social_network', $$
  MATCH (p:Person)-[r:KNOWS]->(f:Person)
  RETURN p.name AS person, f.name AS friend, r.since AS since
$$) AS (person text, friend text, since int);

9.7 性能优化

9.7.1 cypher() 函数性能注意事项

-- ❌ 低效:在循环中调用 cypher()
-- 每次调用都有解析开销
SELECT user_name,
    (SELECT count(*) FROM cypher('network', $$
      MATCH (p:Person {name: $$ || quote_literal(user_name) || $$})-[:KNOWS]->(f)
      RETURN f
    $$) AS c(cnt int))
FROM orders;

-- ✅ 高效:批量处理
WITH social AS (
    SELECT person, count(friend) AS friend_count
    FROM cypher('social_network', $$
      MATCH (p:Person)-[:KNOWS]->(f:Person)
      RETURN p.name AS person, f.name AS friend
    $$) AS (person text, friend text)
    GROUP BY person
)
SELECT o.user_name, o.amount, s.friend_count
FROM orders o
LEFT JOIN social s ON o.user_name = s.person;

9.7.2 优化建议总结

策略说明
减少 cypher() 调用次数批量获取图数据,避免循环调用
使用物化视图缓存频繁使用的图查询结果
合理使用索引为 JOIN 条件创建索引
过滤条件提前在 cypher() 内部尽量过滤
EXPLAIN 分析分析混合查询的执行计划

9.8 业务场景:金融风控混合查询

场景描述

银行需要检测可疑交易:找出"交易链路中存在高风险关系"且"金额超过阈值"的交易。

-- 创建交易表(关系型)
CREATE TABLE transactions (
    txn_id      SERIAL PRIMARY KEY,
    from_account TEXT,
    to_account   TEXT,
    amount       DECIMAL(15,2),
    txn_time     TIMESTAMP,
    risk_flag    TEXT
);

-- 创建账户关系图
SET graph_path = risk_network;
CREATE (:Account {id: 'A001', name: '账户1', risk_level: 'low'});
CREATE (:Account {id: 'A002', name: '账户2', risk_level: 'high'});
-- ... 更多账户数据
-- 风控混合查询:交易链路分析
WITH suspicious_accounts AS (
    SELECT account_id
    FROM cypher('risk_network', $$
      MATCH (a:Account {risk_level: 'high'})-[:TRANSFERS_TO*1..3]->(target:Account)
      RETURN DISTINCT target.id AS account_id
    $$) AS (account_id text)
),
high_value_txns AS (
    SELECT txn_id, from_account, to_account, amount, txn_time
    FROM transactions
    WHERE amount > 50000
      AND txn_time > now() - interval '24 hours'
)
SELECT
    h.txn_id,
    h.from_account,
    h.to_account,
    h.amount,
    h.txn_time,
    CASE
        WHEN s.account_id IS NOT NULL THEN 'HIGH_RISK'
        ELSE 'NORMAL'
    END AS risk_assessment
FROM high_value_txns h
LEFT JOIN suspicious_accounts s
    ON h.from_account = s.account_id
    OR h.to_account = s.account_id
ORDER BY h.amount DESC;

9.9 存储过程中的图操作

-- 在 PL/pgSQL 函数中使用图查询
CREATE OR REPLACE FUNCTION get_friend_recommendations(
    p_person_name TEXT,
    p_limit INT DEFAULT 10
)
RETURNS TABLE(recommended_name TEXT, mutual_friends BIGINT)
LANGUAGE plpgsql
AS $$
BEGIN
    RETURN QUERY
    SELECT candidate, mutual_count
    FROM cypher('social_network', format($$
      MATCH (p:Person {name: %L})-[:KNOWS]->(mutual)-[:KNOWS]->(candidate:Person)
      WHERE NOT (p)-[:KNOWS]->(candidate)
        AND candidate.name <> %L
      RETURN candidate.name AS candidate, count(DISTINCT mutual) AS mutual_count
      ORDER BY mutual_count DESC
      LIMIT %s
    $$, p_person_name, p_person_name, p_limit))
    AS (candidate text, mutual_count bigint);
END;
$$;

-- 调用函数
SELECT * FROM get_friend_recommendations('Alice', 5);

9.10 本章小结

要点说明
cypher() 函数在 SQL 中嵌入 Cypher 查询
结果类型必须显式定义返回列和类型
混合 JOIN图查询结果可与关系表 JOIN
视图包装用 SQL 视图封装常用图查询
物化视图缓存频繁使用的图查询结果
性能批量处理,减少 cypher() 调用次数

9.11 练习

  1. 创建一个 SQL 视图,封装 “每个人的朋友数量” 的 Cypher 查询。
  2. 使用 CTE + cypher() 实现:找出所有 VIP 用户(消费总额 > 5000)的二度好友。
  3. 编写一个 PL/pgSQL 函数,输入用户名,返回该用户的社交网络分析报告。

9.12 扩展阅读