强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

ClickHouse 教程 / SQL 基础(SELECT/INSERT)

SQL 基础(SELECT / INSERT)

本章介绍 ClickHouse 中最常用的 SQL 语法,包括查询、写入、表结构变更等操作。


1. 前置准备:创建示例表

-- 创建示例数据库
CREATE DATABASE IF NOT EXISTS tutorial;

-- 订单表
CREATE TABLE tutorial.orders
(
    order_id     UInt64,
    user_id      UInt64,
    product_id   UInt64,
    product_name String,
    category     LowCardinality(String),
    quantity     UInt32,
    price        Decimal128(2),
    order_date   Date,
    created_at   DateTime,
    status       LowCardinality(String),
    city         LowCardinality(String)
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(order_date)
ORDER BY (order_date, user_id, order_id);

-- 插入测试数据
INSERT INTO tutorial.orders VALUES
    (1001, 1, 101, 'iPhone 15',     'Electronics', 1, 5999.00, '2026-04-01', '2026-04-01 10:30:00', 'delivered', 'Beijing'),
    (1002, 2, 102, 'MacBook Pro',   'Electronics', 1, 12999.00, '2026-04-01', '2026-04-01 11:00:00', 'delivered', 'Shanghai'),
    (1003, 1, 103, 'AirPods Pro',   'Electronics', 2, 1899.00, '2026-04-02', '2026-04-02 09:15:00', 'shipped', 'Beijing'),
    (1004, 3, 201, '运动鞋',         'Sports',      1, 699.00, '2026-04-02', '2026-04-02 14:20:00', 'delivered', 'Guangzhou'),
    (1005, 2, 202, '瑜伽垫',         'Sports',      1, 199.00, '2026-04-03', '2026-04-03 08:45:00', 'pending', 'Shanghai'),
    (1006, 4, 101, 'iPhone 15',     'Electronics', 1, 5999.00, '2026-04-03', '2026-04-03 16:30:00', 'cancelled', 'Beijing'),
    (1007, 3, 301, 'Python编程书',   'Books',       3, 89.00, '2026-04-04', '2026-04-04 10:00:00', 'delivered', 'Guangzhou'),
    (1008, 5, 102, 'MacBook Pro',   'Electronics', 1, 12999.00, '2026-04-05', '2026-04-05 09:30:00', 'shipped', 'Shenzhen'),
    (1009, 1, 202, '瑜伽垫',         'Sports',      1, 199.00, '2026-05-01', '2026-05-01 11:00:00', 'delivered', 'Beijing'),
    (1010, 2, 302, 'ClickHouse实战', 'Books',       1, 79.00, '2026-05-01', '2026-05-01 15:45:00', 'pending', 'Shanghai');

2. SELECT 查询

2.1 基础查询

-- 选择所有列
SELECT * FROM tutorial.orders;

-- 选择特定列
SELECT order_id, product_name, price
FROM tutorial.orders;

-- 使用表达式
SELECT
    order_id,
    product_name,
    price,
    quantity,
    price * quantity AS total_amount,
    round(price * 0.9, 2) AS discounted_price
FROM tutorial.orders;

2.2 WHERE 过滤

-- 等值过滤
SELECT * FROM tutorial.orders WHERE status = 'delivered';

-- 范围过滤
SELECT * FROM tutorial.orders WHERE price >= 1000 AND price <= 10000;

-- IN 查询
SELECT * FROM tutorial.orders WHERE city IN ('Beijing', 'Shanghai');

-- LIKE 模糊匹配
SELECT * FROM tutorial.orders WHERE product_name LIKE '%Pro%';

-- 正则匹配
SELECT * FROM tutorial.orders WHERE match(product_name, '^(iPhone|MacBook)');

-- NULL 过滤
SELECT * FROM tutorial.orders WHERE status IS NOT NULL;

-- 复合条件
SELECT
    order_id,
    product_name,
    price,
    status
FROM tutorial.orders
WHERE category = 'Electronics'
  AND price > 2000
  AND status NOT IN ('cancelled')
  AND order_date >= '2026-04-01';

2.3 GROUP BY 聚合

-- 基础分组
SELECT
    category,
    count()                          AS order_count,
    sum(price * quantity)            AS total_revenue,
    avg(price)                       AS avg_price,
    min(price)                       AS min_price,
    max(price)                       AS max_price,
    uniq(user_id)                    AS unique_users,
    round(sum(price * quantity) / uniq(user_id), 2) AS arpu
FROM tutorial.orders
WHERE status != 'cancelled'
GROUP BY category
ORDER BY total_revenue DESC;

预期输出:

┌─category────┬─order_count─┬─total_revenue─┬─avg_price─┬─min_price─┬─max_price─┬─unique_users─┬───arpu─┐
│ Electronics │           5 │      41795.00 │  7299.20  │  1899.00  │ 12999.00  │            4 │ 10448.75 │
│ Sports      │           3 │       1097.00 │   365.67  │   199.00  │   699.00  │            3 │   365.67 │
│ Books       │           2 │        346.00 │    84.00  │    79.00  │    89.00  │            2 │   173.00 │
└─────────────┴─────────────┴───────────────┴───────────┴───────────┴───────────┴──────────────┴─────────┘

2.4 HAVING 过滤

-- 对聚合结果进行过滤
SELECT
    user_id,
    count()               AS order_count,
    sum(price * quantity)  AS total_spent
FROM tutorial.orders
WHERE status != 'cancelled'
GROUP BY user_id
HAVING total_spent > 1000
ORDER BY total_spent DESC;

2.5 ORDER BY 排序

-- 单列排序
SELECT * FROM tutorial.orders ORDER BY price DESC;

-- 多列排序
SELECT * FROM tutorial.orders ORDER BY order_date ASC, price DESC;

-- 按表达式排序
SELECT
    user_id,
    count() AS orders,
    sum(price * quantity) AS spent
FROM tutorial.orders
GROUP BY user_id
ORDER BY spent DESC;

-- NULLS LAST / NULLS FIRST
SELECT * FROM tutorial.orders ORDER BY city ASC NULLS LAST;

2.6 LIMIT 与 LIMIT BY

-- 标准 LIMIT
SELECT * FROM tutorial.orders ORDER BY price DESC LIMIT 5;

-- LIMIT n OFFSET m
SELECT * FROM tutorial.orders ORDER BY order_id LIMIT 3 OFFSET 2;

-- LIMIT BY:每个分组取前 N 行(ClickHouse 特有)
-- 每个 category 取价格最高的 2 个商品
SELECT
    category,
    product_name,
    price
FROM tutorial.orders
ORDER BY price DESC
LIMIT 2 BY category;

💡 提示LIMIT BY 是 ClickHouse 独有的语法,在"每个分类取 Top N"场景下非常实用,无需窗口函数。

2.7 DISTINCT 去重

-- 单列去重
SELECT DISTINCT city FROM tutorial.orders;

-- 多列去重
SELECT DISTINCT category, status FROM tutorial.orders;

-- count(DISTINCT ...) — 注意:ClickHouse 推荐使用 uniq() 函数
SELECT
    uniq(user_id)          AS unique_users,      -- 近似计算(快)
    uniqExact(user_id)     AS exact_unique_users, -- 精确计算(慢)
    count(DISTINCT city)   AS unique_cities       -- 精确计算
FROM tutorial.orders;

3. INSERT INTO 写入

3.1 VALUES 写入

-- 单行写入(不推荐,效率低)
INSERT INTO tutorial.orders VALUES
    (1011, 6, 101, 'iPhone 15', 'Electronics', 1, 5999.00, '2026-05-11', now(), 'pending', 'Beijing');

-- 批量写入(推荐)
INSERT INTO tutorial.orders VALUES
    (1012, 7, 201, '运动鞋',     'Sports',      2, 699.00,  '2026-05-11', now(), 'pending', 'Chengdu'),
    (1013, 8, 301, 'Python编程书', 'Books',       1, 89.00,   '2026-05-11', now(), 'pending', 'Wuhan'),
    (1014, 6, 102, 'MacBook Pro', 'Electronics', 1, 12999.00, '2026-05-11', now(), 'pending', 'Beijing');

3.2 SELECT 写入

-- 从查询结果写入
INSERT INTO tutorial.orders
SELECT
    order_id + 10000,
    user_id,
    product_id,
    product_name,
    category,
    quantity,
    price * 0.8,        -- 打 8 折
    order_date + INTERVAL 1 YEAR,
    created_at + INTERVAL 1 YEAR,
    status,
    city
FROM tutorial.orders
WHERE order_date >= '2026-04-01' AND order_date < '2026-05-01';

-- 创建汇总表并写入
CREATE TABLE tutorial.daily_summary
(
    summary_date Date,
    category String,
    total_orders UInt64,
    total_revenue Decimal128(2),
    avg_order_value Decimal128(2)
)
ENGINE = SummingMergeTree((total_orders, total_revenue))
ORDER BY (summary_date, category);

INSERT INTO tutorial.daily_summary
SELECT
    order_date AS summary_date,
    category,
    count() AS total_orders,
    sum(price * quantity) AS total_revenue,
    avg(price * quantity) AS avg_order_value
FROM tutorial.orders
WHERE status != 'cancelled'
GROUP BY order_date, category;

3.3 FORMAT 子句

-- JSONEachRow 格式
INSERT INTO tutorial.orders FORMAT JSONEachRow
{"order_id": 2001, "user_id": 9, "product_id": 101, "product_name": "iPhone 15", "category": "Electronics", "quantity": 1, "price": 5999.00, "order_date": "2026-05-11", "created_at": "2026-05-11 10:00:00", "status": "pending", "city": "Hangzhou"}
{"order_id": 2002, "user_id": 10, "product_id": 201, "product_name": "运动鞋", "category": "Sports", "quantity": 1, "price": 699.00, "order_date": "2026-05-11", "created_at": "2026-05-11 11:00:00", "status": "pending", "city": "Nanjing"};

-- CSV 格式
INSERT INTO tutorial.orders FORMAT CSV
2003,11,302,"ClickHouse实战","Books",1,79.00,"2026-05-11","2026-05-11 12:00:00","pending","Xian"

-- TabSeparated 格式(TSV)
INSERT INTO tutorial.orders FORMAT TabSeparated
2004	12	103	AirPods Pro	Electronics	1	1899.00	2026-05-11	2026-05-11 13:00:00	pending	Hangzhou

3.4 支持的输出格式

-- 导出为 JSON
SELECT * FROM tutorial.orders LIMIT 5 FORMAT JSON;

-- 导出为 CSV
SELECT * FROM tutorial.orders LIMIT 5 FORMAT CSVWithNames;

-- 导出为 Markdown 表格
SELECT * FROM tutorial.orders LIMIT 5 FORMAT Markdown;

-- 导出为 Parquet
SELECT * FROM tutorial.orders LIMIT 100 FORMAT Parquet;

-- 查看所有支持的格式
SELECT * FROM system.formats ORDER BY name;

4. ALTER TABLE 修改表结构

4.1 列操作

-- 添加列
ALTER TABLE tutorial.orders ADD COLUMN discount Float32 DEFAULT 1.0;
ALTER TABLE tutorial.orders ADD COLUMN remarks String AFTER status;

-- 修改列类型
ALTER TABLE tutorial.orders MODIFY COLUMN discount Decimal128(2);

-- 重命名列
ALTER TABLE tutorial.orders RENAME COLUMN remarks TO note;

-- 删除列
ALTER TABLE tutorial.orders DROP COLUMN note;

-- 添加列注释
ALTER TABLE tutorial.orders COMMENT COLUMN product_name '商品名称';

4.2 分区操作

-- 删除分区
ALTER TABLE tutorial.orders DROP PARTITION '202604';

-- 分离分区
ALTER TABLE tutorial.orders DETACH PARTITION '202605';

-- 附加分区
ALTER TABLE tutorial.orders ATTACH PARTITION '202605';

-- 冻结分区(备份用)
ALTER TABLE tutorial.orders FREEZE PARTITION '202605';

-- 移动分区到另一个表
ALTER TABLE tutorial.orders MOVE PARTITION '202604' TO TABLE tutorial.orders_archive;

4.3 数据修改

-- ⚠️ UPDATE 是异步操作(mutation),不会立即生效
ALTER TABLE tutorial.orders
UPDATE status = 'completed'
WHERE order_id = 1005;

-- ⚠️ DELETE 也是异步操作
ALTER TABLE tutorial.orders
DELETE WHERE status = 'cancelled';

-- 查看 mutation 进度
SELECT
    database,
    table,
    mutation_id,
    command,
    create_time,
    is_done,
    latest_fail_reason
FROM system.mutations
WHERE database = 'tutorial'
ORDER BY create_time DESC;

⚠️ 注意UPDATEDELETEmutation 操作,代价很高!它会重写整个数据部分。不要频繁执行,更不要用于高频更新场景。


5. CREATE / DROP / SHOW / DESCRIBE

5.1 创建表

-- 标准建表
CREATE TABLE tutorial.products
(
    product_id UInt64,
    name String,
    category LowCardinality(String),
    price Decimal128(2),
    stock UInt32,
    created_at DateTime DEFAULT now()
)
ENGINE = MergeTree()
ORDER BY product_id;

-- IF NOT EXISTS
CREATE TABLE IF NOT EXISTS tutorial.products (...);

-- AS 复制结构
CREATE TABLE tutorial.products_backup AS tutorial.products;

-- 从查询结果建表
CREATE TABLE tutorial.expensive_products
ENGINE = MergeTree()
ORDER BY product_id
AS
SELECT * FROM tutorial.products WHERE price > 5000;

5.2 删除表

DROP TABLE IF EXISTS tutorial.products_backup;

-- 清空表(ClickHouse 特有)
TRUNCATE TABLE tutorial.expensive_products;

5.3 查看表信息

-- 查看数据库
SHOW DATABASES;

-- 查看当前数据库的表
SHOW TABLES;

-- 查看所有表及引擎
SELECT name, engine, partition_key, sorting_key, total_rows
FROM system.tables
WHERE database = 'tutorial';

-- 查看表结构
DESCRIBE TABLE tutorial.orders;

-- 查看建表语句
SHOW CREATE TABLE tutorial.orders;

-- 查看表大小
SELECT
    table,
    formatReadableSize(sum(bytes_on_disk)) AS total_size,
    sum(rows) AS total_rows,
    count() AS part_count
FROM system.parts
WHERE database = 'tutorial' AND active = 1
GROUP BY table;

5.4 EXISTS 检查

-- 检查表是否存在
SELECT exists('tutorial.orders');

-- 检查数据库是否存在
SELECT exists('tutorial');

6. ClickHouse 特殊函数

6.1 materialize()

-- 将常量列物化为实际列(在 INSERT ... SELECT 中使用)
INSERT INTO tutorial.products (product_id, name, category, price, stock)
SELECT
    number,
    concat('Product_', toString(number)),
    'Default',
    100.00,
    10
FROM numbers(1000);

6.2 ignore()

-- 忽略某些列(在 INSERT 时跳过)
INSERT INTO tutorial.orders (order_id, user_id, product_id, product_name, category, quantity, price, order_date, created_at, status, city)
SELECT
    ignore()::UInt64,     -- 跳过原始 order_id
    user_id,
    product_id,
    product_name,
    ignore(),             -- 跳过 category
    quantity,
    price,
    order_date,
    created_at,
    status,
    city
FROM tutorial.orders;

6.3 常用内置函数

SELECT
    -- 类型转换
    toUInt32('123')                     AS int_val,
    toString(456)                       AS str_val,
    toDate('2026-05-11')               AS date_val,
    toDateTime('2026-05-11 10:30:00')  AS dt_val,

    -- 条件函数
    if(1 > 0, 'yes', 'no')              AS conditional,
    multiIf(
        1 > 100, 'high',
        1 > 10,  'medium',
        'low'
    )                                   AS multi_conditional,

    -- NULL 处理
    coalesce(NULL, NULL, 'default')     AS first_not_null,
    ifNull(NULL, 'default')             AS null_to_default,
    nullIf(0, 0)                        AS make_null,     -- 如果相等则返回 NULL

    -- 类型检查
    toTypeName(123)                     AS type_name,
    isFinite(1/0)                       AS is_finite,     -- false
    isNaN(0/0)                          AS is_nan;        -- true

7. 子查询与 CTE

-- 子查询
SELECT *
FROM tutorial.orders
WHERE user_id IN (
    SELECT user_id
    FROM tutorial.orders
    GROUP BY user_id
    HAVING sum(price * quantity) > 5000
);

-- CTE (WITH 子句)
WITH
    top_users AS (
        SELECT
            user_id,
            sum(price * quantity) AS total_spent
        FROM tutorial.orders
        WHERE status != 'cancelled'
        GROUP BY user_id
        ORDER BY total_spent DESC
        LIMIT 5
    ),
    user_orders AS (
        SELECT
            o.user_id,
            o.order_id,
            o.product_name,
            o.price * o.quantity AS order_amount
        FROM tutorial.orders AS o
        INNER JOIN top_users AS t ON o.user_id = t.user_id
    )
SELECT
    user_id,
    count() AS order_count,
    sum(order_amount) AS total_amount,
    groupArray(order_id) AS order_ids
FROM user_orders
GROUP BY user_id
ORDER BY total_amount DESC;

8. JOIN 操作

-- 创建关联表
CREATE TABLE tutorial.users
(
    user_id UInt64,
    username String,
    email String,
    level LowCardinality(String)
)
ENGINE = MergeTree()
ORDER BY user_id;

INSERT INTO tutorial.users VALUES
    (1, 'alice', '[email protected]', 'vip'),
    (2, 'bob',   '[email protected]', 'normal'),
    (3, 'carol', '[email protected]', 'vip'),
    (4, 'dave',  '[email protected]', 'normal'),
    (5, 'eve',   '[email protected]', 'vip');

-- INNER JOIN
SELECT
    o.order_id,
    u.username,
    o.product_name,
    o.price * o.quantity AS amount
FROM tutorial.orders AS o
INNER JOIN tutorial.users AS u ON o.user_id = u.user_id
WHERE o.status != 'cancelled';

-- LEFT JOIN(保留左表所有行)
SELECT
    u.username,
    count(o.order_id) AS order_count,
    sum(o.price * o.quantity) AS total_spent
FROM tutorial.users AS u
LEFT JOIN tutorial.orders AS o ON u.user_id = o.user_id
GROUP BY u.username
ORDER BY total_spent DESC;

-- ALL JOIN(右表全部加载到内存,适合小表关联大表)
SELECT
    o.order_id,
    u.username,
    o.product_name
FROM tutorial.orders AS o
ALL INNER JOIN tutorial.users AS u ON o.user_id = u.user_id;

-- ASOF JOIN(近似时间匹配)
-- 找到每个订单之前最近的用户状态变更
-- SELECT * FROM orders ASOF JOIN user_status ON orders.user_id = user_status.user_id AND orders.created_at >= user_status.changed_at;

⚠️ 注意:ClickHouse 的 JOIN 默认会将右表全部加载到内存(ALL JOIN)。大表关联大表时,请使用 JOIN 算法提示或 Distributed 表。


9. 实用查询模板

9.1 数据概况查询

-- 查看表的基本统计
SELECT
    count() AS total_rows,
    min(order_date) AS earliest_date,
    max(order_date) AS latest_date,
    uniq(user_id) AS unique_users,
    uniq(product_name) AS unique_products,
    round(sum(price * quantity), 2) AS total_revenue,
    round(avg(price * quantity), 2) AS avg_order_value,
    formatReadableSize(sum(bytes_on_disk)) AS data_size
FROM tutorial.orders
WHERE active = 1;

9.2 时间序列分析

-- 每日订单趋势
SELECT
    order_date,
    count() AS orders,
    sum(price * quantity) AS revenue,
    uniq(user_id) AS unique_users,
    sum(price * quantity) / uniq(user_id) AS arpu,
    countIf(status = 'delivered') / count() AS completion_rate
FROM tutorial.orders
GROUP BY order_date
ORDER BY order_date;

扩展阅读

  1. SELECT 语法
  2. INSERT 语法
  3. ALTER TABLE
  4. 支持的输出格式
  5. ClickHouse 函数大全