#General performance
#Track MOVE PARTITION execution time
Useful when diagnosing Clickhouse Keeper issues or observing DDL query degradation.
SELECT
toStartOfHour(event_time) AS hour,
round(avg(query_duration_ms)/1000,2) AS avg_execution_time_s,
round(quantile(.5)(query_duration_ms/1000),2) as p50,
round(quantile(.99)(query_duration_ms/1000),2) as p99,
round(quantile(.999)(query_duration_ms/1000),2) as p999,
round(min(query_duration_ms/1000),2) as min,
round(max(query_duration_ms/1000),2) as max,
count() AS query_count
FROM clusterAllReplicas('default', merge(system, 'query_log*'))
WHERE
(query LIKE '%MOVE PARTITION%')
AND type = 'QueryFinish'
AND event_date >= '2025-08-01'
GROUP BY hour
ORDER BY hour ASC;
#Most used functions
SELECT
f AS function,
count() AS hits
FROM system.query_log
ARRAY JOIN used_functions AS f
WHERE type = 'QueryFinish'
AND event_time >= now() - INTERVAL 7 DAY
AND notEmpty(used_functions)
GROUP BY f
ORDER BY hits DESC
LIMIT 50;
#Most used columns
SELECT
col AS column,
count() AS hits
FROM system.query_log
ARRAY JOIN columns AS col
WHERE type = 'QueryFinish'
AND query_kind = 'Select'
AND event_time >= now() - INTERVAL 7 DAY
AND notEmpty(columns)
GROUP BY col
ORDER BY hits DESC
LIMIT 50;
#Columns most commonly used in WHERE clauses
Useful when building or rebuilding schemas for performance. Update the table filter as needed.
WITH
any(query) AS q,
any(tables) AS _tables,
arrayJoin(extractAll(query, '\\b(?:PRE)?WHERE\\s+(.*?)\\s+(?:GROUP BY|ORDER BY|UNION|SETTINGS|FORMAT$)')) AS w,
any(columns) AS cols,
arrayFilter(x -> (position(w, extract(x, '\\.(`[^`]+`|[^\\.]+)')) > 0), columns) AS c,
arrayJoin(c) AS c2
SELECT
c2,
count()
FROM system.query_log
WHERE (event_time >= (now() - toIntervalDay(1)))
AND arrayExists(x -> (x LIKE '%my_table%'), tables)
AND (query ILIKE 'SELECT%')
GROUP BY c2
ORDER BY count() ASC
#A/B test two queries
Compare performance between two query executions by their query IDs.
WITH
query_id='8c050082-428e-4523-847a-caf29511d6ba' AS first,
query_id='618e0c55-e21d-4630-97e7-5f82e2475c32' AS second,
arrayConcat(mapKeys(ProfileEvents), ['query_duration_ms', 'read_rows', 'read_bytes', 'written_rows', 'written_bytes', 'result_rows', 'result_bytes', 'memory_usage', 'normalized_query_hash', 'peak_threads_usage', 'query_cache_usage']) AS metrics,
arrayConcat(mapValues(ProfileEvents), [query_duration_ms, read_rows, read_bytes, written_rows, written_bytes, result_rows, result_bytes, memory_usage, normalized_query_hash, peak_threads_usage, toUInt64(query_cache_usage)]) AS metrics_values
SELECT
metrics[i] AS metric,
anyIf(metrics_values[i], first) AS v1,
anyIf(metrics_values[i], second) AS v2,
formatReadableQuantity(v1 - v2)
FROM clusterAllReplicas(default, system.query_log)
ARRAY JOIN arrayEnumerate(metrics) AS i
WHERE (first OR second) AND (type = 2)
GROUP BY metric
HAVING v1 != v2
ORDER BY
(v2 - v1) / (v1 + v2) DESC,
v2 DESC,
metric ASC
#Queries consuming most CPU/RAM
The big one. Shows detailed resource consumption by query pattern.
SELECT
hostName() as host,
normalized_query_hash,
min(event_time),
max(event_time),
replace(substr(argMax(query, utime), 1, 80), '\n', ' ') AS query,
argMax(query_id, utime) AS sample_query_id,
count(),
sum(query_duration_ms) / 1000 AS QueriesDuration,
sum(ProfileEvents['RealTimeMicroseconds']) / 1000000 AS RealTime,
sum(ProfileEvents['UserTimeMicroseconds'] as utime) / 1000000 AS UserTime,
sum(ProfileEvents['SystemTimeMicroseconds']) / 1000000 AS SystemTime,
sum(ProfileEvents['DiskReadElapsedMicroseconds']) / 1000000 AS DiskReadTime,
sum(ProfileEvents['DiskWriteElapsedMicroseconds']) / 1000000 AS DiskWriteTime,
sum(ProfileEvents['NetworkSendElapsedMicroseconds']) / 1000000 AS NetworkSendTime,
sum(ProfileEvents['NetworkReceiveElapsedMicroseconds']) / 1000000 AS NetworkReceiveTime,
sum(ProfileEvents['ZooKeeperWaitMicroseconds']) / 1000000 AS ZooKeeperWaitTime,
sum(ProfileEvents['OSIOWaitMicroseconds']) / 1000000 AS OSIOWaitTime,
sum(ProfileEvents['OSCPUWaitMicroseconds']) / 1000000 AS OSCPUWaitTime,
sum(ProfileEvents['OSCPUVirtualTimeMicroseconds']) / 1000000 AS OSCPUVirtualTime,
formatReadableSize(sum(ProfileEvents['NetworkReceiveBytes'])) AS NetworkReceiveBytes,
formatReadableSize(sum(ProfileEvents['NetworkSendBytes'])) AS NetworkSendBytes,
sum(ProfileEvents['SelectedParts']) as SelectedParts,
sum(ProfileEvents['SelectedRanges']) as SelectedRanges,
sum(ProfileEvents['SelectedMarks']) as SelectedMarks,
sum(ProfileEvents['SelectedRows']) as SelectedRows,
sum(ProfileEvents['SelectedBytes']) as SelectedBytes,
sum(ProfileEvents['FileOpen']) as FileOpen,
sum(ProfileEvents['ZooKeeperTransactions']) as ZooKeeperTransactions,
formatReadableSize(sum(ProfileEvents['OSReadBytes'])) as OSReadBytesExcludePageCache,
formatReadableSize(sum(ProfileEvents['OSWriteBytes'])) as OSWriteBytesExcludePageCache,
formatReadableSize(sum(ProfileEvents['OSReadChars'])) as OSReadBytesIncludePageCache,
formatReadableSize(sum(ProfileEvents['OSWriteChars'])) as OSWriteCharsIncludePageCache,
formatReadableSize(quantile(0.97)(memory_usage)) as MemoryUsageQ97,
sum(read_rows) AS ReadRows,
formatReadableSize(sum(read_bytes)) AS ReadBytes,
sum(written_rows) AS WrittenRows,
formatReadableSize(sum(written_bytes)) AS WrittenBytes,
sum(result_rows) AS ResultRows,
formatReadableSize(sum(result_bytes)) AS ResultBytes
FROM clusterAllReplicas('default', system.query_log)
WHERE event_date >= today() AND type in (2,4)
GROUP BY
GROUPING SETS (
(normalized_query_hash, host),
(host),
())
ORDER BY OSCPUVirtualTime DESC
LIMIT 30
FORMAT Vertical;
#Storage diagnostics
#Table size and row counts
SELECT
name,
engine,
formatReadableQuantity(total_rows) AS rows,
formatReadableSize(total_bytes) AS size,
active_parts
FROM clusterAllReplicas(default, system.tables)
WHERE database = 'my_database' AND engine != 'MaterializedView'
#Check partition locations
See which disk each partition lives on.
SELECT table, partition, disk_name, count() AS num_parts
FROM system.parts
WHERE database = 'my_database' AND table = 'my_table' AND active
GROUP BY ALL
ORDER BY partition;
#Get size of parts by disk
SELECT database, table, disk_name, sum(bytes_on_disk) AS bytes
FROM system.parts
WHERE active
GROUP BY database, table, disk_name
ORDER BY database, table;
#Most parts per partition
Clickhouse won’t merge parts with combined size greater than 150 GB - check max_bytes_to_merge_at_max_space_in_pool setting.
SELECT
database,
table,
partition,
sum(rows) AS rows,
count() AS part_count
FROM system.parts
WHERE (active = 1) AND (table LIKE '%') AND (database LIKE '%')
GROUP BY database, table, partition
ORDER BY part_count DESC
LIMIT 20
#S3/R2 diagnostics
#Locate parts on remote storage
SELECT disk_name, local_path, remote_path, size
FROM system.remote_data_paths
ORDER BY local_path
LIMIT 100
#S3/R2 operation statistics
Useful for estimating external storage costs.
SELECT
sumIf(value, event = 'S3PutObject') AS S3PutObject,
sumIf(value, event = 'S3GetObject') AS S3GetObject,
sumIf(value, event = 'WriteBufferFromS3Bytes') AS WriteBufferFromS3Bytes,
sumIf(value, event = 'ReadBufferFromS3Bytes') AS ReadBufferFromS3Bytes
FROM system.events;
#Local cache
#Check if cache is populated
DESCRIBE FILESYSTEM CACHE 'r2_cache' FORMAT Vertical
#Cache statistics
SELECT * FROM system.metrics
WHERE metric ILIKE '%filesystemcache%'
ORDER BY metric;
SELECT
cache_name,
formatReadableSize(sum(size)) AS size
FROM system.filesystem_cache
GROUP BY ALL
#Replication
#Remove stuck tasks for empty partitions
Solves a common problem where tasks remain stuck in the replication queue for partitions that were already removed or never contained data.
Warning: Review the generated statements before executing.
SELECT 'ALTER TABLE ' || database || '.' || table || ' DROP PARTITION ID \''|| partition_id || '\';' FROM
(SELECT DISTINCT database, table, extract(new_part_name, '^[^_]+') AS partition_id
FROM clusterAllReplicas('{cluster}', system.replication_queue)) AS rq
LEFT JOIN
(SELECT database, table, partition_id, sum(rows) AS rows_count, count() AS part_count
FROM clusterAllReplicas('{cluster}', system.parts)
WHERE active
GROUP BY database, table, partition_id) AS p
USING (database, table, partition_id)
WHERE p.rows_count = 0 AND p.part_count = 0
FORMAT TSVRaw;
#Check for missing blocks (consistency check)
SELECT
database,
table,
partition_id,
ranges.1 AS previous_part,
ranges.2 AS next_part,
ranges.3 AS previous_block_number,
ranges.4 AS next_block_number,
range(toUInt64(previous_block_number + 1), toUInt64(next_block_number)) AS missing_block_numbers
FROM
(
WITH
arrayPopFront(groupArray(min_block_number) AS min) AS min_adj,
arrayPopBack(groupArray(max_block_number) AS max) AS max_adj,
arrayFilter((x, y, z) -> (y != (z + 1)), arrayZip(arrayPopBack(groupArray(name) AS name_arr), arrayPopFront(name_arr), max_adj, min_adj), min_adj, max_adj) AS missing_ranges
SELECT
database,
table,
partition_id,
missing_ranges
FROM
(
SELECT *
FROM system.parts
WHERE active AND (table = 'my_table') AND (partition_id = '202108') AND active
ORDER BY min_block_number ASC
)
GROUP BY database, table, partition_id
)
ARRAY JOIN missing_ranges AS ranges
#Troubleshooting
#Check cluster connectivity
SELECT count()
FROM clusterAllReplicas('{cluster}', cluster('{cluster}', system.one))
#Node errors
SELECT
hostName(),
*
FROM clusterAllReplicas('{cluster}', system.clusters)
WHERE errors_count > 0
#Compare query logs between two time intervals
WITH
toStartOfInterval(event_time, INTERVAL 5 MINUTE) = '2025-10-30 13:00:00' AS before,
toStartOfInterval(event_time, INTERVAL 5 MINUTE) = '2025-10-30 15:00:00' AS after
SELECT
normalized_query_hash,
anyIf(query, before) AS QueryBefore,
anyIf(query, after) AS QueryAfter,
countIf(before) AS CountBefore,
countIf(after) AS CountAfter,
sumIf(query_duration_ms, before) / 1000 AS QueriesDurationBefore,
sumIf(query_duration_ms, after) / 1000 AS QueriesDurationAfter,
sumIf(ProfileEvents.Values[indexOf(ProfileEvents.Names, 'OSCPUVirtualTimeMicroseconds')], before) / 1000000 AS OSCPUVirtualTimeBefore,
sumIf(ProfileEvents.Values[indexOf(ProfileEvents.Names, 'OSCPUVirtualTimeMicroseconds')], after) / 1000000 AS OSCPUVirtualTimeAfter,
sumIf(ProfileEvents.Values[indexOf(ProfileEvents.Names, 'SelectedRanges')], before) AS SelectedRangesBefore,
sumIf(ProfileEvents.Values[indexOf(ProfileEvents.Names, 'SelectedRanges')], after) AS SelectedRangesAfter,
sumIf(read_rows, before) AS ReadRowsBefore,
sumIf(read_rows, after) AS ReadRowsAfter,
formatReadableSize(sumIf(read_bytes, before)) AS ReadBytesBefore,
formatReadableSize(sumIf(read_bytes, after)) AS ReadBytesAfter
FROM system.query_log
WHERE (before OR after) AND type IN (2, 4)
GROUP BY normalized_query_hash
WITH TOTALS
ORDER BY SelectedRangesAfter - SelectedRangesBefore DESC
LIMIT 10
FORMAT Vertical
#Debug Clickhouse freezing/hanging
Get stack traces for every running thread. Useful when Clickhouse hangs for no apparent reason.
SELECT
arrayStringConcat(arrayMap(x -> concat('0x', lower(hex(x)), '\t', demangle(addressToSymbol(x))), trace), '\n') AS trace_functions,
count()
FROM system.stack_trace
GROUP BY trace_functions
ORDER BY count() DESC
SETTINGS allow_introspection_functions=1
FORMAT Vertical;