Skip to content

Commit

Permalink
updated-benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
data-sleek committed Dec 17, 2022
1 parent 148f286 commit 2146533
Show file tree
Hide file tree
Showing 13 changed files with 829 additions and 0 deletions.
92 changes: 92 additions & 0 deletions Aiven-Clickhouse/01-tpch_clickhouse.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
create database tpch;
USE tpch;

CREATE TABLE `customer` (
`c_custkey` UInt32 ,
`c_name` String ,
`c_address` String ,
`c_nationkey` UInt32 ,
`c_phone` String ,
`c_acctbal` Float32 ,
`c_mktsegment` String ,
`c_comment` String)
ENGINE = MergeTree ORDER BY c_custkey ;

CREATE TABLE `lineitem` (
`l_orderkey` UInt32 ,
`l_partkey` UInt32 ,
`l_suppkey` UInt32 ,
`l_linenumber` UInt32 ,
`l_quantity` Float32 ,
`l_extendedprice` Float32 ,
`l_discount` Float32 ,
`l_tax` Float32 ,
`l_returnflag` String ,
`l_linestatus` String,
`l_shipdate` date ,
`l_commitdate` date ,
`l_receiptdate` date ,
`l_shipinstruct` String ,
`l_shipmode` String ,
`l_comment` String )
ENGINE = MergeTree ORDER BY l_orderkey ;
;
CREATE TABLE `nation` (
`n_nationkey` UInt32 ,
`n_name` String ,
`n_regionkey` UInt32 ,
`n_comment` String )
ENGINE = MergeTree ORDER BY n_nationkey ;
;
CREATE TABLE `orders` (
`o_orderkey` UInt32 ,
`o_custkey` UInt32 ,
`o_orderstatus` String ,
`o_totalprice` Float32 ,
`o_orderdate` date ,
`o_orderpriority` String ,
`o_clerk` String ,
`o_shippriority` UInt32 ,
`o_comment` String)
ENGINE = MergeTree ORDER BY o_orderkey ;

CREATE TABLE `part` (
`p_partkey` UInt32 ,
`p_name` String ,
`p_mfgr` String ,
`p_brand` String ,
`p_type` String ,
`p_size` UInt32 ,
`p_container` String ,
`p_retailprice` Float32 ,
`p_comment` String)
ENGINE = MergeTree ORDER BY p_partkey ;

drop table partsupp;
CREATE TABLE `partsupp` (
`ps_partkey` UInt32 ,
`ps_suppkey` UInt32 ,
`ps_availqty` UInt32 ,
`ps_supplycost` Float32 ,
`ps_comment` String)
ENGINE MergeTree ORDER BY ps_partkey ;





CREATE TABLE `region` (
`r_regionkey` UInt32 ,
`r_name` String ,
`r_comment` String)
ENGINE = MergeTree ORDER BY r_regionkey ;

CREATE TABLE `supplier` (
`s_suppkey` UInt32 ,
`s_name` String ,
`s_address` String ,
`s_nationkey` UInt32 ,
`s_phone` String ,
`s_acctbal` Float32 ,
`s_comment` String)
ENGINE = MergeTree ORDER BY s_suppkey ;
17 changes: 17 additions & 0 deletions Aiven-Clickhouse/02-load-data.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
create database tpch ;
use tpch;

-- S3 BUCKET
For each file :
FIELDS TERMINATED BY '|'
LINES TERMINATED BY '|\n';

LOAD DATA S3 'memsql-tpch-dataset/sf_100/lineitem/'
LOAD DATA S3 'memsql-tpch-dataset/sf_100/customer/'
LOAD DATA S3 'memsql-tpch-dataset/sf_100/nation/'
LOAD DATA S3 'memsql-tpch-dataset/sf_100/orders/'
LOAD DATA S3 'memsql-tpch-dataset/sf_100/part/'
LOAD DATA S3 'memsql-tpch-dataset/sf_100/partsupp/'
LOAD DATA S3 'memsql-tpch-dataset/sf_100/region/'
LOAD DATA S3 'memsql-tpch-dataset/sf_100/supplier/'

148 changes: 148 additions & 0 deletions Aiven-Clickhouse/03-thcp_queries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
-- Query 1
select count(*) from lineitem ;
-- Query 2
select min(l_shipdate), max(l_shipdate) from lineitem limit 10 ;
-- Query 3
SELECT l_returnflag
,l_linestatus
,SUM(l_quantity) AS sum_qty
,SUM(l_extendedprice) AS sum_base_price
,SUM(l_extendedprice * (1-l_discount)) AS sum_disc_price
,SUM(l_extendedprice * (1-l_discount) * (1+l_tax)) AS sum_charge
,AVG(l_quantity) AS avg_qty
,AVG(l_extendedprice) AS avg_price
,AVG(l_discount) AS avg_disc
,COUNT(*) AS count_order
FROM lineitem
WHERE l_shipdate BETWEEN '1991-01-01' AND '1993-12-31'
GROUP BY l_returnflag
,l_linestatus
ORDER BY l_returnflag
,l_linestatus
LIMIT 10 ;
-- Query 4
SELECT *
FROM part p
JOIN partsupp p2
ON p.p_partkey = p2.ps_partkey AND p.p_partkey =1
LIMIT 10
-- Query 5
SELECT p.p_partkey
,p.p_name
,p2.ps_availqty
,s.s_name
FROM part p
JOIN partsupp p2
ON p.p_partkey = p2.ps_partkey AND p.p_partkey =1
JOIN supplier s
ON p2.ps_suppkey = s.s_suppkey
LIMIT 10 ;
-- Query 6
SELECT p.p_partkey
,p.p_name
,p2.ps_availqty
,s.s_name
FROM part AS p
INNER JOIN partsupp AS p2
ON p.p_partkey = p2.ps_partkey
INNER JOIN supplier AS s
ON p2.ps_suppkey = s.s_suppkey
LIMIT 10;
-- Query 7
SELECT p.p_partkey
,p.p_name
,s.s_name
,SUM(p2.ps_availqty) AS total_available
FROM part p
JOIN partsupp p2
ON p.p_partkey = p2.ps_partkey -- AND p.p_partkey =1
JOIN supplier s
ON p2.ps_suppkey = s.s_suppkey
GROUP BY p.p_partkey
,p.p_name
,s.s_name
LIMIT 10 ;
-- QUERY 08
SET max_memory_usage = 40000000000;
SELECT p.p_partkey
,p.p_name
,s.s_name
,SUM(p2.ps_availqty) AS total_available
,SUM(l.l_quantity) AS total_qty_ordered
FROM part p
JOIN partsupp p2
ON p.p_partkey = p2.ps_partkey
JOIN supplier s
ON p2.ps_suppkey = s.s_suppkey
JOIN lineitem l
ON l.l_partkey = p2.ps_partkey AND l.l_suppkey = p2.ps_suppkey AND p.p_partkey BETWEEN 1 AND 100000 GROUP BY p.p_partkey
,p.p_name
,s.s_name
ORDER BY total_available desc
LIMIT 10 ;
-- QUERY 09
SET max_memory_usage = 40000000000;
SELECT l_orderkey
,SUM(l_extendedprice * (1 - l_discount)) AS revenue
,o_orderdate
,o_shippriority
FROM customer, orders, lineitem
WHERE c_mktsegment = 'BUILDING'
AND c_custkey = o_custkey
AND l_orderkey = o_orderkey
AND o_orderdate < date('1995-03-15')
AND l_shipdate > date('1995-03-15')
GROUP BY l_orderkey
,o_orderdate
,o_shippriority
ORDER BY revenue desc
,o_orderdate
LIMIT 10;
-- QUERY 10
SET max_memory_usage = 40000000000;
SELECT c_custkey
,c_name
,SUM(l_extendedprice * (1 - l_discount)) AS revenue
,c_acctbal
,n_name
,c_address
,c_phone
,c_comment
FROM customer, orders, lineitem, nation
WHERE c_custkey = o_custkey
AND l_orderkey = o_orderkey
AND o_orderdate >= date('1993-10-01')
AND o_orderdate < date('1993-10-01') + interval '3' month
AND l_returnflag = 'R'
AND c_nationkey = n_nationkey
GROUP BY c_custkey
,c_name
,c_acctbal
,c_phone
,n_name
,c_address
,c_comment
ORDER BY revenue desc
LIMIT 20;
-- QUERY 11
SET max_memory_usage = 45000000000;
SELECT nation
,o_year
,SUM(amount) AS sum_profit
FROM
(
SELECT n_name AS nation
,extract(year
FROM o_orderdate) AS o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount FROM part, supplier, lineitem, partsupp, orders, nation
WHERE s_suppkey = l_suppkey
AND ps_suppkey = l_suppkey
AND ps_partkey = l_partkey
AND p_partkey = l_partkey
AND o_orderkey = l_orderkey
AND s_nationkey = n_nationkey
AND p_name like '%green%'
) AS profit
GROUP BY nation
,o_year
ORDER BY nation
,o_year desc;
Binary file not shown.
27 changes: 27 additions & 0 deletions clickhouse/clickhouse_dataset_load.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# created a 200GB Mount in DIGital O and attached to instance

cd /mnt/volume_sfo3_01
mkdir dataset
cd dataset

aws s3 cp s3://xxxx/customer.tbl ./
aws s3 cp s3://xxx/lineitem.tbl ./
aws s3 cp s3://xxx/nation.tbl ./
aws s3 cp s3://xxx/orders.tbl ./
aws s3 cp s3://xxx/part.tbl ./
aws s3 cp s3://xxx/partsupp.tbl ./
aws s3 cp s3://xxx/region.tbl ./
aws s3 cp s3://xxx/supplier.tbl ./

# Please time the load for each !!
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.customer FORMAT CSV" < /mnt/volume_sfo3_01/dataset/customer.tbl
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.lineitem FORMAT CSV" < /mnt/volume_sfo3_01/dataset/lineitem.tbl
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.nation FORMAT CSV" < /mnt/volume_sfo3_01/dataset/nation.tbl
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.orders FORMAT CSV" < /mnt/volume_sfo3_01/dataset/orders.tbl
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.part FORMAT CSV" < /mnt/volume_sfo3_01/dataset/part.tbl
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.partsupp FORMAT CSV" < /mnt/volume_sfo3_01/dataset/partsupp.tbl
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.region FORMAT CSV" < /mnt/volume_sfo3_01/dataset/region.tbl
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.supplier FORMAT CSV" < /mnt/volume_sfo3_01/dataset/supplier.tbl

# Do a count on each table to verify
clickhouse-client
20 changes: 20 additions & 0 deletions clickhouse/install_clickhouse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4

echo "deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" | sudo tee /etc/apt/sources.list.d/clickhouse.list

sudo apt update

sudo apt upgrade -y

sudo apt install clickhouse-server clickhouse-client -y

sudo apt-get install awscli -y

sudo service clickhouse-server start

sudo service clickhouse-server status

pass: xxxxx
clickhouse-client --password xxxxx

sudo apt uninstall clickhouse-server clickhouse-client -y
54 changes: 54 additions & 0 deletions clickhouse/load_tpch_dataset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
log()
{ echo "$(date +'%m/%d %H:%M:%S') $1 $2"
}

targetdir=/mnt/dataset/

# loop using array
# array=( customer lineitem nation orders part partsupp region supplier )
# for i in "${array[@]}"
# do
# echo "Compressing ${i}.tbl ..."
# gzip /mnt/volume_sfo3_01/dataset/${i}.tbl
# echo "Copying ${i}.tbl.gz ..."
# aws s3 cp ${targetdir}/${i}.tbl.gz s3://datasleek-datasets/tpch100/${i}.tbl.gz
# done

# loop using array

array=( customer lineitem nation orders part partsupp region supplier )

for i in "${array[@]}"
do
echo "Copying ${i}.tbl.gz ..."
aws s3 cp s3://datasleek-datasets/tpch100/${i}.tbl.gz ${targetdir}/
echo "Unzip ${i}.tbl.gz ..."
gunzip ${targetdir}/${i}.tbl.gz
done


for i in "${array[@]}"
do
log "Clearing table ${i}"
clickhouse-client --password Tibeun1111 --query="truncate table tpch.${i} "
log "- - - -"
done


for i in "${array[@]}"
do
log "Loading ${i}.tbl..."
clickhouse-client --password Tibeun1111 --format_csv_delimiter="|" --query="INSERT INTO tpch.${i} FORMAT CSV" < ${targetdir}/${i}.tbl
log "Done Loading ${i}.tbl..."
done


for i in "${array[@]}"
do
log "Clearing table ${i}"
clickhouse-client --password Tibeun1111 --query="select count(*) from tpch.${i} "
log "- - - -"
done



Loading

0 comments on commit 2146533

Please sign in to comment.