-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
148f286
commit 2146533
Showing
13 changed files
with
829 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
create database tpch; | ||
USE tpch; | ||
|
||
CREATE TABLE `customer` ( | ||
`c_custkey` UInt32 , | ||
`c_name` String , | ||
`c_address` String , | ||
`c_nationkey` UInt32 , | ||
`c_phone` String , | ||
`c_acctbal` Float32 , | ||
`c_mktsegment` String , | ||
`c_comment` String) | ||
ENGINE = MergeTree ORDER BY c_custkey ; | ||
|
||
CREATE TABLE `lineitem` ( | ||
`l_orderkey` UInt32 , | ||
`l_partkey` UInt32 , | ||
`l_suppkey` UInt32 , | ||
`l_linenumber` UInt32 , | ||
`l_quantity` Float32 , | ||
`l_extendedprice` Float32 , | ||
`l_discount` Float32 , | ||
`l_tax` Float32 , | ||
`l_returnflag` String , | ||
`l_linestatus` String, | ||
`l_shipdate` date , | ||
`l_commitdate` date , | ||
`l_receiptdate` date , | ||
`l_shipinstruct` String , | ||
`l_shipmode` String , | ||
`l_comment` String ) | ||
ENGINE = MergeTree ORDER BY l_orderkey ; | ||
; | ||
CREATE TABLE `nation` ( | ||
`n_nationkey` UInt32 , | ||
`n_name` String , | ||
`n_regionkey` UInt32 , | ||
`n_comment` String ) | ||
ENGINE = MergeTree ORDER BY n_nationkey ; | ||
; | ||
CREATE TABLE `orders` ( | ||
`o_orderkey` UInt32 , | ||
`o_custkey` UInt32 , | ||
`o_orderstatus` String , | ||
`o_totalprice` Float32 , | ||
`o_orderdate` date , | ||
`o_orderpriority` String , | ||
`o_clerk` String , | ||
`o_shippriority` UInt32 , | ||
`o_comment` String) | ||
ENGINE = MergeTree ORDER BY o_orderkey ; | ||
|
||
CREATE TABLE `part` ( | ||
`p_partkey` UInt32 , | ||
`p_name` String , | ||
`p_mfgr` String , | ||
`p_brand` String , | ||
`p_type` String , | ||
`p_size` UInt32 , | ||
`p_container` String , | ||
`p_retailprice` Float32 , | ||
`p_comment` String) | ||
ENGINE = MergeTree ORDER BY p_partkey ; | ||
|
||
drop table partsupp; | ||
CREATE TABLE `partsupp` ( | ||
`ps_partkey` UInt32 , | ||
`ps_suppkey` UInt32 , | ||
`ps_availqty` UInt32 , | ||
`ps_supplycost` Float32 , | ||
`ps_comment` String) | ||
ENGINE MergeTree ORDER BY ps_partkey ; | ||
|
||
|
||
|
||
|
||
|
||
CREATE TABLE `region` ( | ||
`r_regionkey` UInt32 , | ||
`r_name` String , | ||
`r_comment` String) | ||
ENGINE = MergeTree ORDER BY r_regionkey ; | ||
|
||
CREATE TABLE `supplier` ( | ||
`s_suppkey` UInt32 , | ||
`s_name` String , | ||
`s_address` String , | ||
`s_nationkey` UInt32 , | ||
`s_phone` String , | ||
`s_acctbal` Float32 , | ||
`s_comment` String) | ||
ENGINE = MergeTree ORDER BY s_suppkey ; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
create database tpch ; | ||
use tpch; | ||
|
||
-- S3 BUCKET | ||
For each file : | ||
FIELDS TERMINATED BY '|' | ||
LINES TERMINATED BY '|\n'; | ||
|
||
LOAD DATA S3 'memsql-tpch-dataset/sf_100/lineitem/' | ||
LOAD DATA S3 'memsql-tpch-dataset/sf_100/customer/' | ||
LOAD DATA S3 'memsql-tpch-dataset/sf_100/nation/' | ||
LOAD DATA S3 'memsql-tpch-dataset/sf_100/orders/' | ||
LOAD DATA S3 'memsql-tpch-dataset/sf_100/part/' | ||
LOAD DATA S3 'memsql-tpch-dataset/sf_100/partsupp/' | ||
LOAD DATA S3 'memsql-tpch-dataset/sf_100/region/' | ||
LOAD DATA S3 'memsql-tpch-dataset/sf_100/supplier/' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
-- Query 1 | ||
select count(*) from lineitem ; | ||
-- Query 2 | ||
select min(l_shipdate), max(l_shipdate) from lineitem limit 10 ; | ||
-- Query 3 | ||
SELECT l_returnflag | ||
,l_linestatus | ||
,SUM(l_quantity) AS sum_qty | ||
,SUM(l_extendedprice) AS sum_base_price | ||
,SUM(l_extendedprice * (1-l_discount)) AS sum_disc_price | ||
,SUM(l_extendedprice * (1-l_discount) * (1+l_tax)) AS sum_charge | ||
,AVG(l_quantity) AS avg_qty | ||
,AVG(l_extendedprice) AS avg_price | ||
,AVG(l_discount) AS avg_disc | ||
,COUNT(*) AS count_order | ||
FROM lineitem | ||
WHERE l_shipdate BETWEEN '1991-01-01' AND '1993-12-31' | ||
GROUP BY l_returnflag | ||
,l_linestatus | ||
ORDER BY l_returnflag | ||
,l_linestatus | ||
LIMIT 10 ; | ||
-- Query 4 | ||
SELECT * | ||
FROM part p | ||
JOIN partsupp p2 | ||
ON p.p_partkey = p2.ps_partkey AND p.p_partkey =1 | ||
LIMIT 10 | ||
-- Query 5 | ||
SELECT p.p_partkey | ||
,p.p_name | ||
,p2.ps_availqty | ||
,s.s_name | ||
FROM part p | ||
JOIN partsupp p2 | ||
ON p.p_partkey = p2.ps_partkey AND p.p_partkey =1 | ||
JOIN supplier s | ||
ON p2.ps_suppkey = s.s_suppkey | ||
LIMIT 10 ; | ||
-- Query 6 | ||
SELECT p.p_partkey | ||
,p.p_name | ||
,p2.ps_availqty | ||
,s.s_name | ||
FROM part AS p | ||
INNER JOIN partsupp AS p2 | ||
ON p.p_partkey = p2.ps_partkey | ||
INNER JOIN supplier AS s | ||
ON p2.ps_suppkey = s.s_suppkey | ||
LIMIT 10; | ||
-- Query 7 | ||
SELECT p.p_partkey | ||
,p.p_name | ||
,s.s_name | ||
,SUM(p2.ps_availqty) AS total_available | ||
FROM part p | ||
JOIN partsupp p2 | ||
ON p.p_partkey = p2.ps_partkey -- AND p.p_partkey =1 | ||
JOIN supplier s | ||
ON p2.ps_suppkey = s.s_suppkey | ||
GROUP BY p.p_partkey | ||
,p.p_name | ||
,s.s_name | ||
LIMIT 10 ; | ||
-- QUERY 08 | ||
SET max_memory_usage = 40000000000; | ||
SELECT p.p_partkey | ||
,p.p_name | ||
,s.s_name | ||
,SUM(p2.ps_availqty) AS total_available | ||
,SUM(l.l_quantity) AS total_qty_ordered | ||
FROM part p | ||
JOIN partsupp p2 | ||
ON p.p_partkey = p2.ps_partkey | ||
JOIN supplier s | ||
ON p2.ps_suppkey = s.s_suppkey | ||
JOIN lineitem l | ||
ON l.l_partkey = p2.ps_partkey AND l.l_suppkey = p2.ps_suppkey AND p.p_partkey BETWEEN 1 AND 100000 GROUP BY p.p_partkey | ||
,p.p_name | ||
,s.s_name | ||
ORDER BY total_available desc | ||
LIMIT 10 ; | ||
-- QUERY 09 | ||
SET max_memory_usage = 40000000000; | ||
SELECT l_orderkey | ||
,SUM(l_extendedprice * (1 - l_discount)) AS revenue | ||
,o_orderdate | ||
,o_shippriority | ||
FROM customer, orders, lineitem | ||
WHERE c_mktsegment = 'BUILDING' | ||
AND c_custkey = o_custkey | ||
AND l_orderkey = o_orderkey | ||
AND o_orderdate < date('1995-03-15') | ||
AND l_shipdate > date('1995-03-15') | ||
GROUP BY l_orderkey | ||
,o_orderdate | ||
,o_shippriority | ||
ORDER BY revenue desc | ||
,o_orderdate | ||
LIMIT 10; | ||
-- QUERY 10 | ||
SET max_memory_usage = 40000000000; | ||
SELECT c_custkey | ||
,c_name | ||
,SUM(l_extendedprice * (1 - l_discount)) AS revenue | ||
,c_acctbal | ||
,n_name | ||
,c_address | ||
,c_phone | ||
,c_comment | ||
FROM customer, orders, lineitem, nation | ||
WHERE c_custkey = o_custkey | ||
AND l_orderkey = o_orderkey | ||
AND o_orderdate >= date('1993-10-01') | ||
AND o_orderdate < date('1993-10-01') + interval '3' month | ||
AND l_returnflag = 'R' | ||
AND c_nationkey = n_nationkey | ||
GROUP BY c_custkey | ||
,c_name | ||
,c_acctbal | ||
,c_phone | ||
,n_name | ||
,c_address | ||
,c_comment | ||
ORDER BY revenue desc | ||
LIMIT 20; | ||
-- QUERY 11 | ||
SET max_memory_usage = 45000000000; | ||
SELECT nation | ||
,o_year | ||
,SUM(amount) AS sum_profit | ||
FROM | ||
( | ||
SELECT n_name AS nation | ||
,extract(year | ||
FROM o_orderdate) AS o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount FROM part, supplier, lineitem, partsupp, orders, nation | ||
WHERE s_suppkey = l_suppkey | ||
AND ps_suppkey = l_suppkey | ||
AND ps_partkey = l_partkey | ||
AND p_partkey = l_partkey | ||
AND o_orderkey = l_orderkey | ||
AND s_nationkey = n_nationkey | ||
AND p_name like '%green%' | ||
) AS profit | ||
GROUP BY nation | ||
,o_year | ||
ORDER BY nation | ||
,o_year desc; |
Binary file added
BIN
+633 KB
clickhouse/How To Install and Use ClickHouse on Ubuntu 20.04 | DigitalOcean.pdf
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# created a 200GB Mount in DIGital O and attached to instance | ||
|
||
cd /mnt/volume_sfo3_01 | ||
mkdir dataset | ||
cd dataset | ||
|
||
aws s3 cp s3://xxxx/customer.tbl ./ | ||
aws s3 cp s3://xxx/lineitem.tbl ./ | ||
aws s3 cp s3://xxx/nation.tbl ./ | ||
aws s3 cp s3://xxx/orders.tbl ./ | ||
aws s3 cp s3://xxx/part.tbl ./ | ||
aws s3 cp s3://xxx/partsupp.tbl ./ | ||
aws s3 cp s3://xxx/region.tbl ./ | ||
aws s3 cp s3://xxx/supplier.tbl ./ | ||
|
||
# Please time the load for each !! | ||
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.customer FORMAT CSV" < /mnt/volume_sfo3_01/dataset/customer.tbl | ||
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.lineitem FORMAT CSV" < /mnt/volume_sfo3_01/dataset/lineitem.tbl | ||
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.nation FORMAT CSV" < /mnt/volume_sfo3_01/dataset/nation.tbl | ||
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.orders FORMAT CSV" < /mnt/volume_sfo3_01/dataset/orders.tbl | ||
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.part FORMAT CSV" < /mnt/volume_sfo3_01/dataset/part.tbl | ||
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.partsupp FORMAT CSV" < /mnt/volume_sfo3_01/dataset/partsupp.tbl | ||
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.region FORMAT CSV" < /mnt/volume_sfo3_01/dataset/region.tbl | ||
clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO tpch.supplier FORMAT CSV" < /mnt/volume_sfo3_01/dataset/supplier.tbl | ||
|
||
# Do a count on each table to verify | ||
clickhouse-client |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 | ||
|
||
echo "deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" | sudo tee /etc/apt/sources.list.d/clickhouse.list | ||
|
||
sudo apt update | ||
|
||
sudo apt upgrade -y | ||
|
||
sudo apt install clickhouse-server clickhouse-client -y | ||
|
||
sudo apt-get install awscli -y | ||
|
||
sudo service clickhouse-server start | ||
|
||
sudo service clickhouse-server status | ||
|
||
pass: xxxxx | ||
clickhouse-client --password xxxxx | ||
|
||
sudo apt uninstall clickhouse-server clickhouse-client -y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
log() | ||
{ echo "$(date +'%m/%d %H:%M:%S') $1 $2" | ||
} | ||
|
||
targetdir=/mnt/dataset/ | ||
|
||
# loop using array | ||
# array=( customer lineitem nation orders part partsupp region supplier ) | ||
# for i in "${array[@]}" | ||
# do | ||
# echo "Compressing ${i}.tbl ..." | ||
# gzip /mnt/volume_sfo3_01/dataset/${i}.tbl | ||
# echo "Copying ${i}.tbl.gz ..." | ||
# aws s3 cp ${targetdir}/${i}.tbl.gz s3://datasleek-datasets/tpch100/${i}.tbl.gz | ||
# done | ||
|
||
# loop using array | ||
|
||
array=( customer lineitem nation orders part partsupp region supplier ) | ||
|
||
for i in "${array[@]}" | ||
do | ||
echo "Copying ${i}.tbl.gz ..." | ||
aws s3 cp s3://datasleek-datasets/tpch100/${i}.tbl.gz ${targetdir}/ | ||
echo "Unzip ${i}.tbl.gz ..." | ||
gunzip ${targetdir}/${i}.tbl.gz | ||
done | ||
|
||
|
||
for i in "${array[@]}" | ||
do | ||
log "Clearing table ${i}" | ||
clickhouse-client --password Tibeun1111 --query="truncate table tpch.${i} " | ||
log "- - - -" | ||
done | ||
|
||
|
||
for i in "${array[@]}" | ||
do | ||
log "Loading ${i}.tbl..." | ||
clickhouse-client --password Tibeun1111 --format_csv_delimiter="|" --query="INSERT INTO tpch.${i} FORMAT CSV" < ${targetdir}/${i}.tbl | ||
log "Done Loading ${i}.tbl..." | ||
done | ||
|
||
|
||
for i in "${array[@]}" | ||
do | ||
log "Clearing table ${i}" | ||
clickhouse-client --password Tibeun1111 --query="select count(*) from tpch.${i} " | ||
log "- - - -" | ||
done | ||
|
||
|
||
|
Oops, something went wrong.