ClickHouse 教程 / SQL 基础(SELECT/INSERT)
SQL 基础(SELECT / INSERT)
本章介绍 ClickHouse 中最常用的 SQL 语法,包括查询、写入、表结构变更等操作。
1. 前置准备:创建示例表
-- 创建示例数据库
CREATE DATABASE IF NOT EXISTS tutorial;
-- 订单表
CREATE TABLE tutorial.orders
(
order_id UInt64,
user_id UInt64,
product_id UInt64,
product_name String,
category LowCardinality(String),
quantity UInt32,
price Decimal128(2),
order_date Date,
created_at DateTime,
status LowCardinality(String),
city LowCardinality(String)
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(order_date)
ORDER BY (order_date, user_id, order_id);
-- 插入测试数据
INSERT INTO tutorial.orders VALUES
(1001, 1, 101, 'iPhone 15', 'Electronics', 1, 5999.00, '2026-04-01', '2026-04-01 10:30:00', 'delivered', 'Beijing'),
(1002, 2, 102, 'MacBook Pro', 'Electronics', 1, 12999.00, '2026-04-01', '2026-04-01 11:00:00', 'delivered', 'Shanghai'),
(1003, 1, 103, 'AirPods Pro', 'Electronics', 2, 1899.00, '2026-04-02', '2026-04-02 09:15:00', 'shipped', 'Beijing'),
(1004, 3, 201, '运动鞋', 'Sports', 1, 699.00, '2026-04-02', '2026-04-02 14:20:00', 'delivered', 'Guangzhou'),
(1005, 2, 202, '瑜伽垫', 'Sports', 1, 199.00, '2026-04-03', '2026-04-03 08:45:00', 'pending', 'Shanghai'),
(1006, 4, 101, 'iPhone 15', 'Electronics', 1, 5999.00, '2026-04-03', '2026-04-03 16:30:00', 'cancelled', 'Beijing'),
(1007, 3, 301, 'Python编程书', 'Books', 3, 89.00, '2026-04-04', '2026-04-04 10:00:00', 'delivered', 'Guangzhou'),
(1008, 5, 102, 'MacBook Pro', 'Electronics', 1, 12999.00, '2026-04-05', '2026-04-05 09:30:00', 'shipped', 'Shenzhen'),
(1009, 1, 202, '瑜伽垫', 'Sports', 1, 199.00, '2026-05-01', '2026-05-01 11:00:00', 'delivered', 'Beijing'),
(1010, 2, 302, 'ClickHouse实战', 'Books', 1, 79.00, '2026-05-01', '2026-05-01 15:45:00', 'pending', 'Shanghai');
2. SELECT 查询
2.1 基础查询
-- 选择所有列
SELECT * FROM tutorial.orders;
-- 选择特定列
SELECT order_id, product_name, price
FROM tutorial.orders;
-- 使用表达式
SELECT
order_id,
product_name,
price,
quantity,
price * quantity AS total_amount,
round(price * 0.9, 2) AS discounted_price
FROM tutorial.orders;
2.2 WHERE 过滤
-- 等值过滤
SELECT * FROM tutorial.orders WHERE status = 'delivered';
-- 范围过滤
SELECT * FROM tutorial.orders WHERE price >= 1000 AND price <= 10000;
-- IN 查询
SELECT * FROM tutorial.orders WHERE city IN ('Beijing', 'Shanghai');
-- LIKE 模糊匹配
SELECT * FROM tutorial.orders WHERE product_name LIKE '%Pro%';
-- 正则匹配
SELECT * FROM tutorial.orders WHERE match(product_name, '^(iPhone|MacBook)');
-- NULL 过滤
SELECT * FROM tutorial.orders WHERE status IS NOT NULL;
-- 复合条件
SELECT
order_id,
product_name,
price,
status
FROM tutorial.orders
WHERE category = 'Electronics'
AND price > 2000
AND status NOT IN ('cancelled')
AND order_date >= '2026-04-01';
2.3 GROUP BY 聚合
-- 基础分组
SELECT
category,
count() AS order_count,
sum(price * quantity) AS total_revenue,
avg(price) AS avg_price,
min(price) AS min_price,
max(price) AS max_price,
uniq(user_id) AS unique_users,
round(sum(price * quantity) / uniq(user_id), 2) AS arpu
FROM tutorial.orders
WHERE status != 'cancelled'
GROUP BY category
ORDER BY total_revenue DESC;
预期输出:
┌─category────┬─order_count─┬─total_revenue─┬─avg_price─┬─min_price─┬─max_price─┬─unique_users─┬───arpu─┐
│ Electronics │ 5 │ 41795.00 │ 7299.20 │ 1899.00 │ 12999.00 │ 4 │ 10448.75 │
│ Sports │ 3 │ 1097.00 │ 365.67 │ 199.00 │ 699.00 │ 3 │ 365.67 │
│ Books │ 2 │ 346.00 │ 84.00 │ 79.00 │ 89.00 │ 2 │ 173.00 │
└─────────────┴─────────────┴───────────────┴───────────┴───────────┴───────────┴──────────────┴─────────┘
2.4 HAVING 过滤
-- 对聚合结果进行过滤
SELECT
user_id,
count() AS order_count,
sum(price * quantity) AS total_spent
FROM tutorial.orders
WHERE status != 'cancelled'
GROUP BY user_id
HAVING total_spent > 1000
ORDER BY total_spent DESC;
2.5 ORDER BY 排序
-- 单列排序
SELECT * FROM tutorial.orders ORDER BY price DESC;
-- 多列排序
SELECT * FROM tutorial.orders ORDER BY order_date ASC, price DESC;
-- 按表达式排序
SELECT
user_id,
count() AS orders,
sum(price * quantity) AS spent
FROM tutorial.orders
GROUP BY user_id
ORDER BY spent DESC;
-- NULLS LAST / NULLS FIRST
SELECT * FROM tutorial.orders ORDER BY city ASC NULLS LAST;
2.6 LIMIT 与 LIMIT BY
-- 标准 LIMIT
SELECT * FROM tutorial.orders ORDER BY price DESC LIMIT 5;
-- LIMIT n OFFSET m
SELECT * FROM tutorial.orders ORDER BY order_id LIMIT 3 OFFSET 2;
-- LIMIT BY:每个分组取前 N 行(ClickHouse 特有)
-- 每个 category 取价格最高的 2 个商品
SELECT
category,
product_name,
price
FROM tutorial.orders
ORDER BY price DESC
LIMIT 2 BY category;
💡 提示:LIMIT BY 是 ClickHouse 独有的语法,在"每个分类取 Top N"场景下非常实用,无需窗口函数。
2.7 DISTINCT 去重
-- 单列去重
SELECT DISTINCT city FROM tutorial.orders;
-- 多列去重
SELECT DISTINCT category, status FROM tutorial.orders;
-- count(DISTINCT ...) — 注意:ClickHouse 推荐使用 uniq() 函数
SELECT
uniq(user_id) AS unique_users, -- 近似计算(快)
uniqExact(user_id) AS exact_unique_users, -- 精确计算(慢)
count(DISTINCT city) AS unique_cities -- 精确计算
FROM tutorial.orders;
3. INSERT INTO 写入
3.1 VALUES 写入
-- 单行写入(不推荐,效率低)
INSERT INTO tutorial.orders VALUES
(1011, 6, 101, 'iPhone 15', 'Electronics', 1, 5999.00, '2026-05-11', now(), 'pending', 'Beijing');
-- 批量写入(推荐)
INSERT INTO tutorial.orders VALUES
(1012, 7, 201, '运动鞋', 'Sports', 2, 699.00, '2026-05-11', now(), 'pending', 'Chengdu'),
(1013, 8, 301, 'Python编程书', 'Books', 1, 89.00, '2026-05-11', now(), 'pending', 'Wuhan'),
(1014, 6, 102, 'MacBook Pro', 'Electronics', 1, 12999.00, '2026-05-11', now(), 'pending', 'Beijing');
3.2 SELECT 写入
-- 从查询结果写入
INSERT INTO tutorial.orders
SELECT
order_id + 10000,
user_id,
product_id,
product_name,
category,
quantity,
price * 0.8, -- 打 8 折
order_date + INTERVAL 1 YEAR,
created_at + INTERVAL 1 YEAR,
status,
city
FROM tutorial.orders
WHERE order_date >= '2026-04-01' AND order_date < '2026-05-01';
-- 创建汇总表并写入
CREATE TABLE tutorial.daily_summary
(
summary_date Date,
category String,
total_orders UInt64,
total_revenue Decimal128(2),
avg_order_value Decimal128(2)
)
ENGINE = SummingMergeTree((total_orders, total_revenue))
ORDER BY (summary_date, category);
INSERT INTO tutorial.daily_summary
SELECT
order_date AS summary_date,
category,
count() AS total_orders,
sum(price * quantity) AS total_revenue,
avg(price * quantity) AS avg_order_value
FROM tutorial.orders
WHERE status != 'cancelled'
GROUP BY order_date, category;
3.3 FORMAT 子句
-- JSONEachRow 格式
INSERT INTO tutorial.orders FORMAT JSONEachRow
{"order_id": 2001, "user_id": 9, "product_id": 101, "product_name": "iPhone 15", "category": "Electronics", "quantity": 1, "price": 5999.00, "order_date": "2026-05-11", "created_at": "2026-05-11 10:00:00", "status": "pending", "city": "Hangzhou"}
{"order_id": 2002, "user_id": 10, "product_id": 201, "product_name": "运动鞋", "category": "Sports", "quantity": 1, "price": 699.00, "order_date": "2026-05-11", "created_at": "2026-05-11 11:00:00", "status": "pending", "city": "Nanjing"};
-- CSV 格式
INSERT INTO tutorial.orders FORMAT CSV
2003,11,302,"ClickHouse实战","Books",1,79.00,"2026-05-11","2026-05-11 12:00:00","pending","Xian"
-- TabSeparated 格式(TSV)
INSERT INTO tutorial.orders FORMAT TabSeparated
2004 12 103 AirPods Pro Electronics 1 1899.00 2026-05-11 2026-05-11 13:00:00 pending Hangzhou
3.4 支持的输出格式
-- 导出为 JSON
SELECT * FROM tutorial.orders LIMIT 5 FORMAT JSON;
-- 导出为 CSV
SELECT * FROM tutorial.orders LIMIT 5 FORMAT CSVWithNames;
-- 导出为 Markdown 表格
SELECT * FROM tutorial.orders LIMIT 5 FORMAT Markdown;
-- 导出为 Parquet
SELECT * FROM tutorial.orders LIMIT 100 FORMAT Parquet;
-- 查看所有支持的格式
SELECT * FROM system.formats ORDER BY name;
4. ALTER TABLE 修改表结构
4.1 列操作
-- 添加列
ALTER TABLE tutorial.orders ADD COLUMN discount Float32 DEFAULT 1.0;
ALTER TABLE tutorial.orders ADD COLUMN remarks String AFTER status;
-- 修改列类型
ALTER TABLE tutorial.orders MODIFY COLUMN discount Decimal128(2);
-- 重命名列
ALTER TABLE tutorial.orders RENAME COLUMN remarks TO note;
-- 删除列
ALTER TABLE tutorial.orders DROP COLUMN note;
-- 添加列注释
ALTER TABLE tutorial.orders COMMENT COLUMN product_name '商品名称';
4.2 分区操作
-- 删除分区
ALTER TABLE tutorial.orders DROP PARTITION '202604';
-- 分离分区
ALTER TABLE tutorial.orders DETACH PARTITION '202605';
-- 附加分区
ALTER TABLE tutorial.orders ATTACH PARTITION '202605';
-- 冻结分区(备份用)
ALTER TABLE tutorial.orders FREEZE PARTITION '202605';
-- 移动分区到另一个表
ALTER TABLE tutorial.orders MOVE PARTITION '202604' TO TABLE tutorial.orders_archive;
4.3 数据修改
-- ⚠️ UPDATE 是异步操作(mutation),不会立即生效
ALTER TABLE tutorial.orders
UPDATE status = 'completed'
WHERE order_id = 1005;
-- ⚠️ DELETE 也是异步操作
ALTER TABLE tutorial.orders
DELETE WHERE status = 'cancelled';
-- 查看 mutation 进度
SELECT
database,
table,
mutation_id,
command,
create_time,
is_done,
latest_fail_reason
FROM system.mutations
WHERE database = 'tutorial'
ORDER BY create_time DESC;
⚠️ 注意:UPDATE 和 DELETE 是mutation 操作,代价很高!它会重写整个数据部分。不要频繁执行,更不要用于高频更新场景。
5. CREATE / DROP / SHOW / DESCRIBE
5.1 创建表
-- 标准建表
CREATE TABLE tutorial.products
(
product_id UInt64,
name String,
category LowCardinality(String),
price Decimal128(2),
stock UInt32,
created_at DateTime DEFAULT now()
)
ENGINE = MergeTree()
ORDER BY product_id;
-- IF NOT EXISTS
CREATE TABLE IF NOT EXISTS tutorial.products (...);
-- AS 复制结构
CREATE TABLE tutorial.products_backup AS tutorial.products;
-- 从查询结果建表
CREATE TABLE tutorial.expensive_products
ENGINE = MergeTree()
ORDER BY product_id
AS
SELECT * FROM tutorial.products WHERE price > 5000;
5.2 删除表
DROP TABLE IF EXISTS tutorial.products_backup;
-- 清空表(ClickHouse 特有)
TRUNCATE TABLE tutorial.expensive_products;
5.3 查看表信息
-- 查看数据库
SHOW DATABASES;
-- 查看当前数据库的表
SHOW TABLES;
-- 查看所有表及引擎
SELECT name, engine, partition_key, sorting_key, total_rows
FROM system.tables
WHERE database = 'tutorial';
-- 查看表结构
DESCRIBE TABLE tutorial.orders;
-- 查看建表语句
SHOW CREATE TABLE tutorial.orders;
-- 查看表大小
SELECT
table,
formatReadableSize(sum(bytes_on_disk)) AS total_size,
sum(rows) AS total_rows,
count() AS part_count
FROM system.parts
WHERE database = 'tutorial' AND active = 1
GROUP BY table;
5.4 EXISTS 检查
-- 检查表是否存在
SELECT exists('tutorial.orders');
-- 检查数据库是否存在
SELECT exists('tutorial');
6. ClickHouse 特殊函数
6.1 materialize()
-- 将常量列物化为实际列(在 INSERT ... SELECT 中使用)
INSERT INTO tutorial.products (product_id, name, category, price, stock)
SELECT
number,
concat('Product_', toString(number)),
'Default',
100.00,
10
FROM numbers(1000);
6.2 ignore()
-- 忽略某些列(在 INSERT 时跳过)
INSERT INTO tutorial.orders (order_id, user_id, product_id, product_name, category, quantity, price, order_date, created_at, status, city)
SELECT
ignore()::UInt64, -- 跳过原始 order_id
user_id,
product_id,
product_name,
ignore(), -- 跳过 category
quantity,
price,
order_date,
created_at,
status,
city
FROM tutorial.orders;
6.3 常用内置函数
SELECT
-- 类型转换
toUInt32('123') AS int_val,
toString(456) AS str_val,
toDate('2026-05-11') AS date_val,
toDateTime('2026-05-11 10:30:00') AS dt_val,
-- 条件函数
if(1 > 0, 'yes', 'no') AS conditional,
multiIf(
1 > 100, 'high',
1 > 10, 'medium',
'low'
) AS multi_conditional,
-- NULL 处理
coalesce(NULL, NULL, 'default') AS first_not_null,
ifNull(NULL, 'default') AS null_to_default,
nullIf(0, 0) AS make_null, -- 如果相等则返回 NULL
-- 类型检查
toTypeName(123) AS type_name,
isFinite(1/0) AS is_finite, -- false
isNaN(0/0) AS is_nan; -- true
7. 子查询与 CTE
-- 子查询
SELECT *
FROM tutorial.orders
WHERE user_id IN (
SELECT user_id
FROM tutorial.orders
GROUP BY user_id
HAVING sum(price * quantity) > 5000
);
-- CTE (WITH 子句)
WITH
top_users AS (
SELECT
user_id,
sum(price * quantity) AS total_spent
FROM tutorial.orders
WHERE status != 'cancelled'
GROUP BY user_id
ORDER BY total_spent DESC
LIMIT 5
),
user_orders AS (
SELECT
o.user_id,
o.order_id,
o.product_name,
o.price * o.quantity AS order_amount
FROM tutorial.orders AS o
INNER JOIN top_users AS t ON o.user_id = t.user_id
)
SELECT
user_id,
count() AS order_count,
sum(order_amount) AS total_amount,
groupArray(order_id) AS order_ids
FROM user_orders
GROUP BY user_id
ORDER BY total_amount DESC;
8. JOIN 操作
-- 创建关联表
CREATE TABLE tutorial.users
(
user_id UInt64,
username String,
email String,
level LowCardinality(String)
)
ENGINE = MergeTree()
ORDER BY user_id;
INSERT INTO tutorial.users VALUES
(1, 'alice', '[email protected]', 'vip'),
(2, 'bob', '[email protected]', 'normal'),
(3, 'carol', '[email protected]', 'vip'),
(4, 'dave', '[email protected]', 'normal'),
(5, 'eve', '[email protected]', 'vip');
-- INNER JOIN
SELECT
o.order_id,
u.username,
o.product_name,
o.price * o.quantity AS amount
FROM tutorial.orders AS o
INNER JOIN tutorial.users AS u ON o.user_id = u.user_id
WHERE o.status != 'cancelled';
-- LEFT JOIN(保留左表所有行)
SELECT
u.username,
count(o.order_id) AS order_count,
sum(o.price * o.quantity) AS total_spent
FROM tutorial.users AS u
LEFT JOIN tutorial.orders AS o ON u.user_id = o.user_id
GROUP BY u.username
ORDER BY total_spent DESC;
-- ALL JOIN(右表全部加载到内存,适合小表关联大表)
SELECT
o.order_id,
u.username,
o.product_name
FROM tutorial.orders AS o
ALL INNER JOIN tutorial.users AS u ON o.user_id = u.user_id;
-- ASOF JOIN(近似时间匹配)
-- 找到每个订单之前最近的用户状态变更
-- SELECT * FROM orders ASOF JOIN user_status ON orders.user_id = user_status.user_id AND orders.created_at >= user_status.changed_at;
⚠️ 注意:ClickHouse 的 JOIN 默认会将右表全部加载到内存(ALL JOIN)。大表关联大表时,请使用 JOIN 算法提示或 Distributed 表。
9. 实用查询模板
9.1 数据概况查询
-- 查看表的基本统计
SELECT
count() AS total_rows,
min(order_date) AS earliest_date,
max(order_date) AS latest_date,
uniq(user_id) AS unique_users,
uniq(product_name) AS unique_products,
round(sum(price * quantity), 2) AS total_revenue,
round(avg(price * quantity), 2) AS avg_order_value,
formatReadableSize(sum(bytes_on_disk)) AS data_size
FROM tutorial.orders
WHERE active = 1;
9.2 时间序列分析
-- 每日订单趋势
SELECT
order_date,
count() AS orders,
sum(price * quantity) AS revenue,
uniq(user_id) AS unique_users,
sum(price * quantity) / uniq(user_id) AS arpu,
countIf(status = 'delivered') / count() AS completion_rate
FROM tutorial.orders
GROUP BY order_date
ORDER BY order_date;