My presentation & code snippets from the Snowflake Dutch User Group Meeting

Yesterday I had the pleasure to present my topic ‘optimizing Snowflake from a cost and performance perspective’ at the Dutch User Group…

My presentation & code snippets from the Snowflake Dutch User Group Meeting
Photo by Melissa Scanu

Yesterday I had the pleasure to present my topic ‘optimizing Snowflake from a cost and performance perspective’ at the Dutch User Group. Around 25 people were present, lively discussions plus food & drinks. The session was organized by Daan Bakboord and Snowflake’s Jason Teetz at the Dutch Snowflake office.

Codesnippets

The result cache

This code snippet shows the result of running a query from the result cache and also the effect of using non deterministic SQL functions in your SQL

alter session set USE_CACHED_RESULT = FALSE; 
alter session set query_tag = 'resultcache'; 
use warehouse benchmark; 
alter warehouse benchmark set warehouse_size = XSMALL; 
 
select c.* 
from cost.altimate.store_sales_price sp 
join cost.altimate.customer c 
on sp.ss_customer_sk = c.c_customer_sk 
where c_birth_country like '%REP%' 
limit 5000000; 
 
-- 12 seconds 
 
alter session set USE_CACHED_RESULT = TRUE; 
 
select c.* 
from cost.altimate.store_sales_price sp 
join cost.altimate.customer c 
on sp.ss_customer_sk = c.c_customer_sk 
where c_birth_country like '%REP%' 
limit 5000000; 
 
--  99 ms 
 
-- adding non deterministic 
select c.* 
, current_timestamp()  
from cost.altimate.store_sales_price sp 
join cost.altimate.customer c 
on sp.ss_customer_sk = c.c_customer_sk 
where c_birth_country like '%REP%' 
limit 5000000; 
 
-- 7 seconds

Select *

Run this to find out what the difference is between selecting only 1 column and running a query with select *

alter session set USE_CACHED_RESULT = FALSE; 
alter session set query_tag = 'selectall'; 
use warehouse benchmark; 
alter warehouse benchmark set warehouse_size = XSMALL; 
 
select * 
from cost.altimate.store_sales_price sp 
join cost.altimate.customer c 
on sp.ss_customer_sk = c.c_customer_sk 
where c_birth_country like '%REP%' 
limit 5000000; 
 
-- 21 seconds: 
 
select ss_addr_sk 
from cost.altimate.store_sales_price sp 
join cost.altimate.customer c 
on sp.ss_customer_sk = c.c_customer_sk 
where c_birth_country like '%REP%' 
limit 5000000; 
 
-- 2.9 seconds:

Query Hash values

Find out what the difference is between the two hashes that are stored in the query history and see how they can help you to determine query patterns

alter session set query_tag = 'queryhash'; 
use warehouse benchmark; 
 
select ss_addr_sk, c_birth_country 
from cost.altimate.store_sales_price sp 
join cost.altimate.customer c 
on sp.ss_customer_sk = c.c_customer_sk 
where c_birth_country =  'KOREA, REPUBLIC OF' 
limit 5000000 
; 
 
 
select ss_addr_sk, c_birth_country 
from cost.altimate.store_sales_price sp 
join cost.altimate.customer c 
on sp.ss_customer_sk = c.c_customer_sk 
where c_birth_country = 'WALLIS AND FUTUNA' 
limit 5000000 
; 
 
-- identify patterns 
select query_parameterized_hash, query_hash, query_text 
from table(cost.information_schema.query_history()) 
where query_type = 'SELECT' 
order by start_time desc 
limit 10;

Warehouse clustering & spilling

Two queries to find out what the warehouse cluster utilization rate is and which queries are spilling data

alter session set query_tag = 'cluster'; 
use warehouse benchmark; 
 
select  timestamp::date datum, warehouse_name, max(cluster_number) max_clus, avg(cluster_number) avg_clus 
from snowflake.account_usage.warehouse_events_history  
where 1 = 1  
and cluster_number is not null 
and warehouse_name = 'BENCHMARK' 
group by all 
order by 1; 
 
-- spilling 
select bytes_spilled_to_local_storage, bytes_spilled_to_remote_storage, query_id 
from snowflake.account_usage.query_history 
where bytes_spilled_to_local_storage <> 0 or bytes_spilled_to_remote_storage <> 0;

Scale and Run

Run the same query with a XSMALL and MEDIUM warehouse and find out what the differences are

alter session set query_tag = 'scaleup'; 
use warehouse benchmark; 
alter warehouse benchmark set warehouse_size = XSMALL; 
alter session set USE_CACHED_RESULT = FALSE; 
 
select sp.* 
from cost.altimate.store_sales_price sp 
join cost.altimate.customer c 
on sp.ss_customer_sk = c.c_customer_sk 
where c_birth_country like '%REP%' 
limit 5000000; 
-- 20 seconds 
 
alter warehouse benchmark set warehouse_size = MEDIUM; 
 
select sp.* 
from cost.altimate.store_sales_price sp 
join cost.altimate.customer c 
on sp.ss_customer_sk = c.c_customer_sk 
where c_birth_country like '%REP%' 
limit 5000000; 
 
-- 5.7 seconds

Cost per query and query tag

Putting some things together with the cost per query and accumulate the costs per query tag

use warehouse benchmark; 
alter session set query_tag = 'querycost'; 
alter warehouse benchmark set warehouse_size = MEDIUM; 
 
-- cost for a query 
select query_tag, credits_attributed_compute  
from snowflake.account_usage.QUERY_ATTRIBUTION_HISTORY; 
 
-- duurste query 
select max(credits_attributed_compute)  
from snowflake.account_usage.QUERY_ATTRIBUTION_HISTORY; 
 
-- kosten per query tag 
select query_tag, sum(credits_attributed_compute) credits  
from snowflake.account_usage.QUERY_ATTRIBUTION_HISTORY  
group by all 
order by 2 desc;
Photo by Melissa Scanu