diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql index 5ad40748a..793d3d7eb 100644 --- a/02_activities/assignments/assignment2.sql +++ b/02_activities/assignments/assignment2.sql @@ -59,9 +59,6 @@ Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR w -/* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ - - -- UNION /* 1. Using a UNION, write a query that displays the market dates with the highest and lowest total sales. diff --git a/04_this_cohort/live_code/module_2/CASE.sql b/04_this_cohort/live_code/module_2/CASE.sql new file mode 100644 index 000000000..bf4ca3063 --- /dev/null +++ b/04_this_cohort/live_code/module_2/CASE.sql @@ -0,0 +1,21 @@ +--CASE + +-- add some logic to determine which vendors come on which days +SELECT * , +CASE WHEN vendor_type = 'Fresh Focused' THEN 'Wednesday' + WHEN vendor_type = 'Prepared Foods' THEN 'Thursday' + ELSE 'Saturday' +END as day_of_specialty +-- pie day, otherwise nothing +,CASE WHEN vendor_name = "Annie's Pies" --- double quotes will work just this once! + THEN 'annie is the best' + END as annie_is_the_king +,CASE WHEN vendor_name LIKE '%pie%' +THEN 'Wednesday' +ELSE 'Friday' -- with the else, we get values for FALSE statements +END as pie_day +-- nonsense, but not a string, instead a different COLUMN +,CASE WHEN vendor_type = 'Fresh Focused' THEN vendor_owner_first_name + WHEN vendor_type = 'Prepared Foods' THEN vendor_owner_last_name +END as first_or_last +FROM vendor \ No newline at end of file diff --git a/04_this_cohort/live_code/module_2/DISTINCT.sql b/04_this_cohort/live_code/module_2/DISTINCT.sql new file mode 100644 index 000000000..57cc33332 --- /dev/null +++ b/04_this_cohort/live_code/module_2/DISTINCT.sql @@ -0,0 +1,32 @@ +-- DISTINCT + +--without distinct 4221 rows of various cust_ids +SELECT customer_id FROM customer_purchases; + +-- with distinct 26 rows of various cust_ids +SELECT DISTINCT customer_id FROM customer_purchases; + +--150 days the market was open +SELECT market_day +FROM market_date_info; + +-- market is only open wed and sat +SELECT DISTINCT market_day +FROM market_date_info; + +/* which vendor has sold products to a customer */ -- 3 rows +SELECT DISTINCT vendor_id +FROM customer_purchases; + +/* which vendor has sold products to a customer ... and which product was it? */ -- 8 rows +SELECT DISTINCT vendor_id, product_id +FROM customer_purchases; + +/* which vendor has sold products to a customer +... and which product was it? +... AND to whom was it sold*/ -- 200 rows +SELECT DISTINCT vendor_id, customer_id, product_id +FROM customer_purchases +ORDER BY customer_id ASC, product_id DESC + + diff --git a/04_this_cohort/live_code/module_2/INNER_JOIN.sql b/04_this_cohort/live_code/module_2/INNER_JOIN.sql new file mode 100644 index 000000000..e1641cccc --- /dev/null +++ b/04_this_cohort/live_code/module_2/INNER_JOIN.sql @@ -0,0 +1,29 @@ +-- INNER JOIN +-- INNER JOIN without an alias + +--get product names alongside customer_purchases ... only products that a customer has purchased +SELECT +product_name, -- come from product table +vendor_id, -- coming from the customer_purchases table ... below +market_date, +customer_id, +customer_purchases.product_id + + +FROM product +INNER JOIN customer_purchases + ON customer_purchases.product_id = product.product_id; + +-- which vendor has sold products to a customer AND which product was it AND to whom was it sold +SELECT DISTINCT vendor_id, +c.customer_id, +customer_first_name, +customer_last_name, -- go and get this name +product_id +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id + + + + diff --git a/04_this_cohort/live_code/module_2/LEFT_JOIN.sql b/04_this_cohort/live_code/module_2/LEFT_JOIN.sql new file mode 100644 index 000000000..661b60334 --- /dev/null +++ b/04_this_cohort/live_code/module_2/LEFT_JOIN.sql @@ -0,0 +1,25 @@ +-- LEFT JOIN + +-- there are products that have been bought, but are there products that have not been bought? + +SELECT DISTINCT +p.product_id +,cp.product_id as [cp.product_id] +,product_name + +FROM product as p +LEFT JOIN customer_purchases as cp + ON p.product_id = cp.product_id + +WHERE cp.product_id IS NULL; -- only show product ids that have not been sold + +-- directions matter +-- this shows ONLY producst that have been sold...because there are no products id in cp that ARENT in product +SELECT DISTINCT +p.product_id +,cp.product_id as [cp.product_id] +,product_name + +FROM customer_purchases as cp +LEFT JOIN product as p + ON p.product_id = cp.product_id diff --git a/04_this_cohort/live_code/module_2/SELECT.sql b/04_this_cohort/live_code/module_2/SELECT.sql new file mode 100644 index 000000000..41611412a --- /dev/null +++ b/04_this_cohort/live_code/module_2/SELECT.sql @@ -0,0 +1,23 @@ +-- SELECT + +-- select everything from the customer TABLE +SELECT * +FROM customer; + +-- use sql as a calculator +SELECT 1+1, 10*5, pi(); + +--add a static value +SELECT 2025 as this_year, 'August' as this_month, customer_id +FROM customer; + +-- add an order by and limit +SELECT * +FROM customer +ORDER BY customer_first_name +LIMIT 10; + +-- select multiple columns +SELECT customer_id, customer_first_name +FROM customer; + diff --git a/04_this_cohort/live_code/module_2/WHERE.sql b/04_this_cohort/live_code/module_2/WHERE.sql new file mode 100644 index 000000000..95ecaf0dd --- /dev/null +++ b/04_this_cohort/live_code/module_2/WHERE.sql @@ -0,0 +1,33 @@ +-- WHERE + +SELECT * FROM customer +WHERE customer_id = 1 +OR customer_id = 2; -- 1 or 2 +--AND customer_id = 2 -- returns nothing + +--IN +SELECT * FROM customer +WHERE customer_id IN (3,4,5) -- only customers 3,4,5 +OR customer_postal_code IN ('M4H','M1L'); -- customers in these postal codes + +--LIKE +-- all the peppers +SELECT * FROM product +WHERE product_name LIKE '%pepper%'; + +--customer with a last name starting with a +SELECT * FROM customer +WHERE customer_last_name LIKE 'a%'; + +--NULLS and Blanks +SELECT * FROM product +WHERE product_size IS NULL +OR product_size = ''; -- two single quotes, "blanks", different from nulls + +-- between another option +SELECT * +FROM customer +WHERE customer_id BETWEEN 1 AND 20 + + + diff --git a/04_this_cohort/live_code/module_2/multiple_table_joins.sql b/04_this_cohort/live_code/module_2/multiple_table_joins.sql new file mode 100644 index 000000000..1de58e79e --- /dev/null +++ b/04_this_cohort/live_code/module_2/multiple_table_joins.sql @@ -0,0 +1,42 @@ +-- multiple_table_joins + +/* which vendor has sold products to a customer +... and which product was it? +... AND to whom was it sold*/ +SELECT DISTINCT +--customer_id, +customer_first_name, +customer_last_name, +--vendor_id, +vendor_name, +--product_id +product_name + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id +INNER JOIN vendor as v + ON v.vendor_id = cp.vendor_id +INNER JOIN product as p + ON p.product_id = cp.product_id; + + +/* what if we add the dates they were purchased ? */ +SELECT DISTINCT +market_date, +--customer_id, +customer_first_name, +customer_last_name, +--vendor_id, +vendor_name, +--product_id +product_name + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id +INNER JOIN vendor as v + ON v.vendor_id = cp.vendor_id +INNER JOIN product as p + ON p.product_id = cp.product_id + diff --git a/04_this_cohort/live_code/module_3/COUNT.sql b/04_this_cohort/live_code/module_3/COUNT.sql new file mode 100644 index 000000000..d7f21c488 --- /dev/null +++ b/04_this_cohort/live_code/module_3/COUNT.sql @@ -0,0 +1,27 @@ +-- count +-- count the number of products + +SELECT COUNT(product_id) as num_of_prods +FROM product; + +--how many products per product_qty_type +SELECT product_qty_type, +COUNT(product_id) as num_of_prods +FROM product +GROUP BY product_qty_type; + +--how many products per product_qty_type and per their product_size +SELECT +product_size, +product_qty_type, +COUNT(product_id) as num_of_prods + +FROM product + +GROUP BY product_size,product_qty_type; + +--count DISTINCT +--how many unique products were bought + +SELECT count(DISTINCT product_id) as bought_products +FROM customer_purchases diff --git a/04_this_cohort/live_code/module_3/CTE.sql b/04_this_cohort/live_code/module_3/CTE.sql new file mode 100644 index 000000000..08e275a17 --- /dev/null +++ b/04_this_cohort/live_code/module_3/CTE.sql @@ -0,0 +1,38 @@ +--CTE + +--calculate sales per vendor per day + +WITH vendor_daily_sales AS ( + SELECT + md.market_date, + market_day, + market_week, + market_year, + vendor_name, + SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM customer_purchases cp + INNER JOIN market_date_info md -- get the market_day, market_week, market_year + ON cp.market_date = md.market_date + INNER JOIN vendor v + ON v.vendor_id = cp.vendor_id + + GROUP BY md.market_date, v.vendor_id + +) , -- if we want another CTE, add a comma, but not another with + +new_customer AS +( + SELECT * FROM customer +), + +-- re-aggregate the daily sales for each WEEK instead +SELECT +market_year, +market_week, +vendor_name, +SUM(sales) as weekly_sales + +FROM vendor_daily_sales +GROUP by market_year, market_week, vendor_name diff --git a/04_this_cohort/live_code/module_3/DATES.sql b/04_this_cohort/live_code/module_3/DATES.sql new file mode 100644 index 000000000..85af227b1 --- /dev/null +++ b/04_this_cohort/live_code/module_3/DATES.sql @@ -0,0 +1,27 @@ +-- dates + +-- now + +SELECT DISTINCT +DATE('now') as [now] +,DATETIME() as [right_now] + +--strftime +,strftime('%Y/%m','now') as this_year_month +,strftime('%Y-%m-%d', '2025-08-10', '+50 days') as the_future +,market_date +,strftime('%m-%d-%Y',market_date, '+50 days', '-1 year') as the_past + +--dateadd +--last date of the month +,DATE(market_date,'start of month','-1 day','start of month') as start_of_prev_month +,DATE(market_date,'start of month','-1 day') as end_of_prev_month + + +-- datediff "equiv" +,market_date +,julianday('now') - julianday(market_date) as now_md_dd-- number of days between now and each market_date +,(julianday('now') - julianday(market_date)) / 365.25 as now_md_dd_yrs -- number of YEARS between now and market_date +,(julianday('now') - julianday(market_date)) * 24 as now_md_dd_hours -- number of HOURS bewtween now and market_date + +FROM market_date_info \ No newline at end of file diff --git a/04_this_cohort/live_code/module_3/HAVING.sql b/04_this_cohort/live_code/module_3/HAVING.sql new file mode 100644 index 000000000..7d1552655 --- /dev/null +++ b/04_this_cohort/live_code/module_3/HAVING.sql @@ -0,0 +1,25 @@ +--HAVING + +-- how much did a customer spend on each day +SELECT --fifth +market_date, +customer_id, +SUM(quantity*cost_to_customer_per_qty) as total_cost + +FROM customer_purchases -- first +WHERE customer_id BETWEEN 1 AND 5 -- filtering the non-aggregated values -- second + + +GROUP BY market_date, customer_id -- third +HAVING total_cost > 50; -- filtering the aggregated values -- fourth + +-- how many products were bought? + +SELECT +count(product_id) as number_of_products, +product_id + +FROM customer_purchases +WHERE product_id <= 8 +GROUP BY product_id +HAVING count(product_id) BETWEEN 300 AND 500 diff --git a/04_this_cohort/live_code/module_3/MIN_MAX.sql b/04_this_cohort/live_code/module_3/MIN_MAX.sql new file mode 100644 index 000000000..b72e39de1 --- /dev/null +++ b/04_this_cohort/live_code/module_3/MIN_MAX.sql @@ -0,0 +1,47 @@ +-- MIN & max + +-- what is the most expensive product + +SELECT +product_name, +max(original_price) as most_expensive -- doesn't handle ties well + +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id; + +--prove it +SELECT DISTINCT +product_name, +original_price + +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id +ORDER BY original_price DESC; + +--minimum price per each product_qty_type +SELECT +product_name, +product_qty_type, +min(original_price) as least_expensive + +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id +GROUP BY product_qty_type + +order by product_qty_type ASC, original_price ASC; + +--prove it +SELECT DISTINCT +product_name, +product_qty_type, +original_price + +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id +ORDER BY product_qty_type, original_price + + diff --git a/04_this_cohort/live_code/module_3/SUM_AVG.sql b/04_this_cohort/live_code/module_3/SUM_AVG.sql new file mode 100644 index 000000000..52b0c6bde --- /dev/null +++ b/04_this_cohort/live_code/module_3/SUM_AVG.sql @@ -0,0 +1,26 @@ +-- SUM_AVG + +--how much did a customer spend each day + +SELECT +market_date, +customer_id, +SUM(quantity*cost_to_customer_per_qty) as total_cost + +FROM customer_purchases +--....JOIN +GROUP BY market_date, customer_id; + +-- how much doesw each customer spend on average +SELECT +customer_first_name, +customer_last_name, +ROUND(AVG(quantity*cost_to_customer_per_qty),2) as total_cost + +FROM customer_purchases as cp +INNER JOIN customer as c + ON cp.customer_id = c.customer_id +GROUP BY c.customer_id + + + diff --git a/04_this_cohort/live_code/module_3/arthmitic.sql b/04_this_cohort/live_code/module_3/arthmitic.sql new file mode 100644 index 000000000..b6e208389 --- /dev/null +++ b/04_this_cohort/live_code/module_3/arthmitic.sql @@ -0,0 +1,9 @@ +--arthimitic + +SELECT +10.0 / 3.0 as division, +cast(10.0 as INT) / cast(3.0 as int) as integer_division, + +power(2,2), +pi(), +ceiling(4.5) \ No newline at end of file diff --git a/04_this_cohort/live_code/module_3/subquery_joins.sql b/04_this_cohort/live_code/module_3/subquery_joins.sql new file mode 100644 index 000000000..2d8512eb0 --- /dev/null +++ b/04_this_cohort/live_code/module_3/subquery_joins.sql @@ -0,0 +1,25 @@ +-- subqueries : JOIN + +-- "what is the signle item that has been bought in the greatest quantity + +SELECT product_name +,max(quantity_purchased) + +FROM product p +INNER JOIN ( + SELECT product_id + ,COUNT(quantity) as quantity_purchased + --,case when type of the product is unit then sum else count + + FROM customer_purchases + GROUP BY product_id +) x ON p.product_id = x.product_id; + +-- simple subquery in a FROM statement "inflation" +SELECT DISTINCT product_id, inflation +FROM ( + SELECT product_id, cost_to_customer_per_qty, + CASE WHEN cost_to_customer_per_qty < '1.00' THEN cost_to_customer_per_qty*5 + ELSE cost_to_customer_per_qty END as inflation +FROM customer_purchases ) + diff --git a/04_this_cohort/live_code/module_3/subquery_where.sql b/04_this_cohort/live_code/module_3/subquery_where.sql new file mode 100644 index 000000000..cf1f4ba7b --- /dev/null +++ b/04_this_cohort/live_code/module_3/subquery_where.sql @@ -0,0 +1,43 @@ +--subqueries: WHERE + +-- how much did each customer spend at each vendor for each day at the market WHEN IT RAINS + +SELECT +market_date +,customer_id +,vendor_id +,SUM(quantity*cost_to_customer_per_qty) as total_cost + +FROM customer_purchases + +--filter by rain_flag +-- "what dates was it raining" +WHERE market_date IN + ( + SELECT market_date + FROM market_date_info + WHERE market_rain_flag = 1 + ) + +GROUP BY market_date +,customer_id +,vendor_id; + +-- what is the name of the vendor who sells pie +SELECT DISTINCT vendor_name + +FROM vendor v +INNER JOIN vendor_inventory vi + ON v.vendor_id = vi.vendor_id + +WHERE product_id IN ( + SELECT product_id + FROM product + WHERE product_name LIKE '%pie%' +) + + + + + + diff --git a/04_this_cohort/live_code/module_3/temp_table.sql b/04_this_cohort/live_code/module_3/temp_table.sql new file mode 100644 index 000000000..8f3c5c0cb --- /dev/null +++ b/04_this_cohort/live_code/module_3/temp_table.sql @@ -0,0 +1,25 @@ +-- temp tables + +-- if a table named new_vendor_inventory exists, delete it, other do NOTHING +DROP TABLE IF EXISTS temp.new_vendor_inventory; + +--make the TABLE +CREATE TABLE temp.new_vendor_inventory AS + +-- definition of the TABLE +SELECT * +,original_price * 5 as inflation +FROM vendor_inventory; + + +-- put the table into another temp table + +DROP TABLE IF EXISTS temp.new_new_vendor_inventory; + + +CREATE TABLE temp.new_new_vendor_inventory AS +SELECT * , +inflation*2 as super_inflation +FROM temp.new_vendor_inventory; + +SELECT * FROM new_new_vendor_inventory \ No newline at end of file diff --git a/04_this_cohort/live_code/module_4/FULL_OUTER_JOIN_UNION.sql b/04_this_cohort/live_code/module_4/FULL_OUTER_JOIN_UNION.sql new file mode 100644 index 000000000..d2e9d7205 --- /dev/null +++ b/04_this_cohort/live_code/module_4/FULL_OUTER_JOIN_UNION.sql @@ -0,0 +1,40 @@ +--FULL OUTER JOIN WITH A UNION +-- two stores, determine which costumes they have in stock +DROP TABLE IF EXISTS temp.store1; +CREATE TEMP TABLE IF NOT EXISTS temp.store1 +( +costume TEXT, +quantity INT +); + +INSERT INTO temp.store1 +VALUES("tiger",6), + ("elephant",2), + ("princess", 4); + + +DROP TABLE IF EXISTS temp.store2; +CREATE TEMP TABLE IF NOT EXISTS temp.store2 +( +costume TEXT, +quantity INT +); + +INSERT INTO temp.store2 +VALUES("tiger",2), + ("dancer",7), + ("superhero", 5); + + + +SELECT s1.costume, s1.quantity as store1_quantity, s2.quantity as store2_quantity, 'top query' as location +FROM store1 s1 +LEFT JOIN store2 s2 on s1.costume = s2.costume + +UNION ALL + +SELECT s2.costume, s1.quantity, s2.quantity, 'bottom query' +FROM store2 as s2 +LEFT JOIN store1 s1 on s1.costume = s2.costume +WHERE s1.costume IS NULL + \ No newline at end of file diff --git a/04_this_cohort/live_code/module_4/IFNULL_NULLIF.sql b/04_this_cohort/live_code/module_4/IFNULL_NULLIF.sql new file mode 100644 index 000000000..b4a608508 --- /dev/null +++ b/04_this_cohort/live_code/module_4/IFNULL_NULLIF.sql @@ -0,0 +1,24 @@ +--IFNULL and coalesce & NULLIF + +SELECT * +,IFNULL(product_size,'Unknown') + +--replace with another COLUMN +,IFNULL(product_size, product_qty_type) +,coalesce(product_size, product_qty_type) +,coalesce(product_size,product_qty_type,'missing') -- if the first value is null, then the second value, if that is null, then the third value (missing) + +,IFNULL(IFNULL(product_size, product_qty_type),'missing') -- same as above but with two ifnulls + +FROM product; + +SELECT * +,coalesce(product_size,'Unknown') -- we aren't successfully handling the blank value +--nullif +,NULLIF(product_size,'') -- find the values in product_size that "blanks" and set them to null +,coalesce(NULLIF(product_size,''),'Unknown') +,coalesce(NULLIF(TRIM(product_size),''),'Unknown') -- a trimmed blank so all white space becomes blank ' ' = '' + +FROM product + +WHERE NULLIF(product_size,'') IS NULL -- capturing BOTH nulls and blanks at the same time! \ No newline at end of file diff --git a/04_this_cohort/live_code/module_4/INTERSECT_EXCEPT.sql b/04_this_cohort/live_code/module_4/INTERSECT_EXCEPT.sql new file mode 100644 index 000000000..ccb85a945 --- /dev/null +++ b/04_this_cohort/live_code/module_4/INTERSECT_EXCEPT.sql @@ -0,0 +1,29 @@ +-- INTERSECT / EXCEPT + +-- products that have been sold (e.g. are in the customer_purchases and product) + +SELECT product_id +FROM customer_purchases +INTERSECT +SELECT product_id +FROM product; + +-- products that have NOT been sold (e.g. are NOT in customer_purchases even though they are in product) +SELECT product_name, x.product_id +FROM +( + SELECT product_id + FROM product + EXCEPT + SELECT product_id + FROM customer_purchases +) x +JOIN product p on x.product_id = p.product_id; + +-- sold products that are not in the products table ... not possible +-- NOTHING +SELECT product_id +FROM customer_purchases +EXCEPT +SELECT product_id +FROM product \ No newline at end of file diff --git a/04_this_cohort/live_code/module_4/NTILE.sql b/04_this_cohort/live_code/module_4/NTILE.sql new file mode 100644 index 000000000..275e164dc --- /dev/null +++ b/04_this_cohort/live_code/module_4/NTILE.sql @@ -0,0 +1,28 @@ +--ntile 4, 5, 100 + +--make quartiles, qunitiles, percentiles + +SELECT * +--,NTILE(4) OVER (PARTITION BY vendor_name ORDER BY sales) as quartiles +--,NTILE(5) OVER (PARTITION BY vendor_name ORDER BY sales) as quantiles +--,NTILE(100) OVER (PARTITION BY vendor_name ORDER BY sales) as percentile + +,NTILE(4) OVER (PARTITION BY vendor_name,product_id ORDER BY sales) + +FROM ( + SELECT + md.market_date + ,market_day + ,market_week + ,vendor_name + ,product_id + ,sum(quantity*cost_to_customer_per_qty) as sales + + FROM market_date_info md + JOIN customer_purchases cp + ON md.market_date = cp.market_date + JOIN vendor v + ON cp.vendor_id = v.vendor_id + + GROUP By md.market_date, v.vendor_id +) x \ No newline at end of file diff --git a/04_this_cohort/live_code/module_4/ROW_NUMBER.sql b/04_this_cohort/live_code/module_4/ROW_NUMBER.sql new file mode 100644 index 000000000..ad7471d1d --- /dev/null +++ b/04_this_cohort/live_code/module_4/ROW_NUMBER.sql @@ -0,0 +1,39 @@ +--ROW_NUMBER +--what product is the highest price per vendor + +--outer QUERY + +SELECT x.*,product_name + +FROM ( +--inner QUERY + SELECT + vendor_id, + market_date, + product_id, + original_price, + ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price DESC) as price_rank + + FROM vendor_inventory +) x +INNER JOIN product p + ON x.product_id = p.product_id + +WHERE price_rank = 1; + +--highest single purchase in a day PER customer + +SELECT * +FROM ( + SELECT + customer_id + ,product_id + ,market_date + ,quantity + ,quantity*cost_to_customer_per_qty as cost + ,ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY quantity*cost_to_customer_per_qty DESC) as sales_rank + + FROM customer_purchases +) x +WHERE sales_rank = 1 +ORDER BY cost DESC \ No newline at end of file diff --git a/04_this_cohort/live_code/module_4/UNION_UNION_ALL.sql b/04_this_cohort/live_code/module_4/UNION_UNION_ALL.sql new file mode 100644 index 000000000..9ea09076f --- /dev/null +++ b/04_this_cohort/live_code/module_4/UNION_UNION_ALL.sql @@ -0,0 +1,31 @@ +--UNION/UNION ALL + +--most and least expensive product per vendor with a UNION + +SELECT vendor_id, product_id, original_price, rn_max as [row_number] +FROM +( + SELECT DISTINCT + vendor_id + ,product_id + ,original_price + ,row_number() OVER(PARTITION BY vendor_id ORDER BY original_price DESC) as rn_max + + FROM vendor_inventory +) +where rn_max = 1 + +UNION -- union returned 5 rows...UNION all returned 6 rows (vendor #4 duplicated) + +SELECT * +FROM +( + SELECT DISTINCT + vendor_id + ,product_id + ,original_price + ,ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price ASC) as rn_min + + FROM vendor_inventory +) +where rn_min = 1 \ No newline at end of file diff --git a/04_this_cohort/live_code/module_4/budget_coalesece_NULLIF.sql b/04_this_cohort/live_code/module_4/budget_coalesece_NULLIF.sql new file mode 100644 index 000000000..51f215cba --- /dev/null +++ b/04_this_cohort/live_code/module_4/budget_coalesece_NULLIF.sql @@ -0,0 +1,24 @@ + +-- create a budge temp table +DROP TABLE IF EXISTS temp.budgets; + +-- here i am specifying the column types, this was asked, so budget is a string, current year is an integer, prev year also int +CREATE TEMP TABLE IF NOT EXISTS temp.budgets (budget STRING, current_year INT, previous_year INT); + + +--nothing is yet in budget +INSERT INTO temp.budgets + +-- so put as row 1 +VALUES ('software',1000,1000) +--and row 2 +, ('candles',300,500); + +--show me the average difference in years +--NULLIF, if the numbers are the same, then NULL +--COALESCE, if the result is NULL then 0.00 +--average across the values = change in years +SELECT AVG(COALESCE(NULLIF(current_year, previous_year), 0.00)) +FROM budgets + +--result(300 [current year for candles] +0 / 2 [two rows] = 150.0) diff --git a/04_this_cohort/live_code/module_4/row_rank_dense.sql b/04_this_cohort/live_code/module_4/row_rank_dense.sql new file mode 100644 index 000000000..f65c563f7 --- /dev/null +++ b/04_this_cohort/live_code/module_4/row_rank_dense.sql @@ -0,0 +1,28 @@ +-- dense_rank, rank, row_number + +DROP TABLE IF EXISTS temp.row_rank_dense; + +CREATE TEMP TABLE IF NOT EXISTS temp.row_rank_dense +( +emp_id INT, +salary INT +); + +INSERT INTO temp.row_rank_dense +VALUES(1,200000), + (2,200000), + (3, 160000), + (4, 120000), + (5, 125000), + (6, 165000), + (7, 230000), + (8, 100000), + (9, 165000), + (10, 100000); + +SELECT * +,row_number() OVER(ORDER BY salary desc) as [row_number] +,rank() OVER(ORDER BY salary desc) as [rank] +,dense_rank() OVER(ORDER BY salary desc) as [dense_rank] + +FROM row_rank_dense diff --git a/04_this_cohort/live_code/module_4/string_manipulations.sql b/04_this_cohort/live_code/module_4/string_manipulations.sql new file mode 100644 index 000000000..0af7c99a1 --- /dev/null +++ b/04_this_cohort/live_code/module_4/string_manipulations.sql @@ -0,0 +1,35 @@ +--string manipulations + +SELECT DISTINCT + +LTRIM(' THOMAS ROSENTHAL ') +,RTRIM(' THOMAS ROSENTHAL ') +,TRIM(' THOMAS ROSENTHAL ') + +,REPLACE('THOMAS ROSENTHAL', ' ', ' WILLIAM ') -- adds my middle name with spaces on both sides +,REPLACE('THOMAS ROSENTHAL','A','') +,REPLACE('THOMAS ROSENTHAL','a','') +--,REPLACE(customer_first_name,'a','') + +,'THOMAS + +ROSENTHAL' + +,replace('THOMAS + +ROSENTHAL', char(10), ' ') -- removing all instances of line breaks (char(10)) from this string + +FROM customer; + +-- upper / lower + +SELECT DISTINCT +UPPER(customer_first_name) +,LOWER(customer_first_name) +,customer_first_name || ' ' || customer_last_name as customer_name +,UPPER(customer_first_name) || ' ' || UPPER(customer_last_name) as upper_full_name +, '' || 'thomas' + +FROM customer + +WHERE customer_first_name REGEXP '(a)$' -- filtering to only ending in a has to be valid regex \ No newline at end of file diff --git a/04_this_cohort/live_code/module_5/CROSS_JOIN.sql b/04_this_cohort/live_code/module_5/CROSS_JOIN.sql new file mode 100644 index 000000000..919211e0e --- /dev/null +++ b/04_this_cohort/live_code/module_5/CROSS_JOIN.sql @@ -0,0 +1,14 @@ +-- CROSS JOIN +DROP TABLE IF EXISTS temp.sizes; +CREATE TEMP TABLE IF NOT EXISTS temp.sizes (size TEXT); + +INSERT INTO temp.sizes +VALUES('small'), +('medium'), +('large'); + +SELECT * FROM temp.sizes; + +SELECT product_name, size +FROM product +CROSS JOIN temp.sizes \ No newline at end of file diff --git a/04_this_cohort/live_code/module_5/INSERT_UPDATE_DELETE.sql b/04_this_cohort/live_code/module_5/INSERT_UPDATE_DELETE.sql new file mode 100644 index 000000000..df1836f43 --- /dev/null +++ b/04_this_cohort/live_code/module_5/INSERT_UPDATE_DELETE.sql @@ -0,0 +1,29 @@ +-- INSERT UPDATE DELETE + +-- 1) add a product to the temp TABLE +-- 2) change the product_size for THAT product +-- 3) delete our product + +DROP TABLE IF EXISTS temp.product_expanded; +CREATE TEMP TABLE product_expanded AS + SELECT * FROM product; + +--SELECT * FROM product_expanded + +--INSERT +INSERT INTO product_expanded +VALUES(24, 'Almonds', '1 lbs', 1, 'lbs'); + +--UPDATE +--change the product_size for almonds to 1/2 kg +UPDATE product_expanded +SET product_size = '1/2 kg', product_qty_type = 'kg' +WHERE product_id = 24; + +--DELETE +DELETE FROM product_expanded +--SELECT * FROM product_expanded -- can help you determine you are looking at the right rows before delete +WHERE product_id = 24; + + +SELECT * FROM product_expanded \ No newline at end of file diff --git a/04_this_cohort/live_code/module_5/SELF_JOIN.sql b/04_this_cohort/live_code/module_5/SELF_JOIN.sql new file mode 100644 index 000000000..c97d7c8f8 --- /dev/null +++ b/04_this_cohort/live_code/module_5/SELF_JOIN.sql @@ -0,0 +1,21 @@ +-- SELF JOIN +DROP TABLE IF EXISTS temp.employees; +CREATE TEMP TABLE temp.employees +( +emp_id INT +,emp_name text +,mgr_id INT +); + +insert into temp.employees +Values(1,'Thomas',3) +,(2,'Niyaz', 4) +,(3,'Rohan', null) +,(4, 'Jennie',3); + +SELECT * FROM temp.employees; + +SELECT e.emp_name, m.emp_name as mgr_name +from temp.employees e +left join temp.employees m + on e.mgr_id = m.emp_id \ No newline at end of file diff --git a/04_this_cohort/live_code/module_5/dynamic_view.sql b/04_this_cohort/live_code/module_5/dynamic_view.sql new file mode 100644 index 000000000..c45d2dc9e --- /dev/null +++ b/04_this_cohort/live_code/module_5/dynamic_view.sql @@ -0,0 +1,33 @@ +-- DYNAMIC VIEW + +-- THIS ONLY WORKS IF YOU HAVE DONE THE PROPER STEPS FOR IMPORTING +-- 1) update new_cust_pur to today +-- 2) add the union +-- 3) add the where statement +-- 4) update the market_date_info to include today +DROP VIEW IF EXISTS todays_vendor_daily_sales; +CREATE VIEW IF NOT EXISTS todays_vendor_daily_sales AS + + SELECT + md.market_date + ,market_day + ,market_week + ,market_year + ,vendor_name -- from vendor + ,SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM market_date_info md +-- add the new customer_purchases info + INNER JOIN ( + SELECT * FROM customer_purchases + UNION + SELECT * FROM new_customer_purchases )cp + ON md.market_date = cp.market_date + INNER JOIN vendor v + ON cp.vendor_id = v.vendor_id + + WHERE md.market_date = DATE('now') + --WHERE md.market_date= DATE('now','localtime') -- if the timezone not set + + GROUP BY cp.market_date, v.vendor_id diff --git a/04_this_cohort/live_code/module_5/update_statements_for_view.sql b/04_this_cohort/live_code/module_5/update_statements_for_view.sql new file mode 100644 index 000000000..217386753 --- /dev/null +++ b/04_this_cohort/live_code/module_5/update_statements_for_view.sql @@ -0,0 +1,12 @@ +--UPDATE the new_customer_purchases table to be "today" + +UPDATE new_customer_purchases +SET market_date = '2025-08-13'; + +-- Add today's info to the market_date_info +INSERT INTO market_date_info +VALUES('2025-08-13','Wednesday','33','2025','8:00 AM','2:00 PM','nothing interesting','Summer','25','28',0,0); + +SELECT * FROM todays_vendor_daily_sales + + diff --git a/04_this_cohort/live_code/module_5/vendor_daily_sales_view.sql b/04_this_cohort/live_code/module_5/vendor_daily_sales_view.sql new file mode 100644 index 000000000..3d6186964 --- /dev/null +++ b/04_this_cohort/live_code/module_5/vendor_daily_sales_view.sql @@ -0,0 +1,22 @@ +-- VIEW +DROP VIEW IF EXISTS vendor_daily_sales; +CREATE VIEW IF NOT EXISTS vendor_daily_sales AS + + SELECT + md.market_date + ,market_day + ,market_week + ,market_year + ,vendor_name -- from vendor + ,SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM market_date_info md + INNER JOIN customer_purchases cp + ON md.market_date = cp.market_date + INNER JOIN vendor v + ON cp.vendor_id = v.vendor_id + + GROUP BY cp.market_date, v.vendor_id; + +SELECT * FROM vendor_daily_sales \ No newline at end of file diff --git a/04_this_cohort/live_code/module_5/view_in_another_query.sql b/04_this_cohort/live_code/module_5/view_in_another_query.sql new file mode 100644 index 000000000..30a3ee131 --- /dev/null +++ b/04_this_cohort/live_code/module_5/view_in_another_query.sql @@ -0,0 +1,15 @@ +--using a view in another QUERY + +-- sales by vendor per week +SELECT +market_year +,market_week +,vendor_name +,SUM(sales) + +FROM vendor_daily_sales + +GROUP BY +market_year +,market_week +,vendor_name \ No newline at end of file diff --git a/04_this_cohort/live_code/module_6/1nf.sql b/04_this_cohort/live_code/module_6/1nf.sql new file mode 100644 index 000000000..b626911c5 --- /dev/null +++ b/04_this_cohort/live_code/module_6/1nf.sql @@ -0,0 +1,31 @@ +drop table if exists temp.skills; +create temp table if not exists temp.skills +( +name TEXT, +OS TEXT, +software TEXT, +supervisor TEXT +); + +insert into temp.skills +values("A","win","VSCode, MSSQL, RStudio", "Eric Yu"), + ("Thomas","mac", "Spyder, SQLite, RStudio", "Rohan Alexander"); + +--1nf +drop table if exists temp.hold; +CREATE TABLE temp.hold AS +SELECT DISTINCT +name, +OS, +SUBSTR(software, 1, INSTR(software,',')-1) AS s1, +SUBSTR(software,INSTR(software,',')+1, INSTR(SUBSTR(software, INSTR(software, ',')+1),',')-1) as s2, +SUBSTR(software,INSTR(SUBSTR(software,INSTR(software,',')+1),',')+INSTR(software,',')+1) as s3, +supervisor + +FROM skills; + +SELECT name,OS,s1 as software, supervisor FROM hold +UNION +SELECT name,OS,s2 as software, supervisor FROM hold +UNION +SELECT name,OS,s3 as software, supervisor FROM hold diff --git a/04_this_cohort/live_code/module_6/2nf.sql b/04_this_cohort/live_code/module_6/2nf.sql new file mode 100644 index 000000000..234d98d9a --- /dev/null +++ b/04_this_cohort/live_code/module_6/2nf.sql @@ -0,0 +1,52 @@ +-- 2nf +drop table if exists temp.student; +drop table if exists temp.supervisor; +drop table if exists temp.student_software; + +create temp table if not exists temp.supervisor +( +id INTEGER PRIMARY KEY AUTOINCREMENT, +name TEXT +); + +INSERT INTO temp.supervisor(name) +select distinct supervisor +from skills; + +create temp table if not exists temp.student +( +id INTEGER PRIMARY KEY AUTOINCREMENT, +name TEXT, +OS TEXT, +supervisor_id INTEGER, +CONSTRAINT "fk_supervisor_id" FOREIGN KEY ("supervisor_id") REFERENCES "supervisor" ("id") +) + +INSERT INTO student(name, OS, supervisor_id) +SELECT DISTINCT +h.name +,OS +,s.id AS supervisor_id + +FROM hold h +JOIN supervisor s + on h.supervisor = s.name + +CREATE TABLE temp.student_software AS +SELECT id, software + +FROM student s +JOIN ( + SELECT name,OS,s1 as software, supervisor FROM hold + UNION + SELECT name,OS,s2 as software, supervisor FROM hold + UNION + SELECT name,OS,s3 as software, supervisor FROM hold +) u +ON s.name = u.name + +--select * from student +--select * from supervisor +select * from student_software + + diff --git a/04_this_cohort/live_code/module_6/3nf.sql b/04_this_cohort/live_code/module_6/3nf.sql new file mode 100644 index 000000000..d5f037013 --- /dev/null +++ b/04_this_cohort/live_code/module_6/3nf.sql @@ -0,0 +1,37 @@ +--3nf +drop table if exists temp.OS; +drop table if exists temp.software; +create temp table if not exists temp.OS +( +OS_id INTEGER, +OS TEXT, +win_only TEXT +); + +insert into temp.OS +values(1,"win","TRUE"), + (2,"mac","FALSE"); + + +create temp table if not exists temp.software +( +software_id INTEGER PRIMARY KEY AUTOINCREMENT, +software TEXT, +win_only TEXT +); + +INSERT INTO temp.software(software, win_only) +SELECT DISTINCT software, win_only +FROM student_software s +CROSS JOIN ( + SELECT * FROM OS WHERE OS = 'mac' +); + +UPDATE software +SET win_only = 'TRUE' +WHERE software.software = ' MSSQL'; + +SELECT * FROM OS +--SELECT * FROM software + + diff --git a/04_this_cohort/live_code/module_6/SQLite_and_python.ipynb b/04_this_cohort/live_code/module_6/SQLite_and_python.ipynb new file mode 100644 index 000000000..9fddb799a --- /dev/null +++ b/04_this_cohort/live_code/module_6/SQLite_and_python.ipynb @@ -0,0 +1,717 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "05e1dbf0", + "metadata": {}, + "source": [ + "# Connect to FarmersMarket.db" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f1d8cb62", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlite3\n", + "#set your location, slash direction will change for windows and mac\n", + "DB = '/Users/thomas/Documents/GitHub/02-intro_sql/05_src/sql/farmersmarket.db' \n", + "#establish your connection\n", + "conn = sqlite3.connect(DB, isolation_level=None,\n", + " detect_types=sqlite3.PARSE_COLNAMES)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1204e343", + "metadata": {}, + "outputs": [], + "source": [ + "#run your query, use \"\\\" to allow line breaks\n", + "db_df = pd.read_sql_query(\"SELECT p.*,pc.product_category_name \\\n", + " FROM product p \\\n", + " JOIN product_category pc \\\n", + " ON p.product_category_id = pc.product_category_id\"\n", + " ,conn)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "5c7863ee-08cd-4095-b80a-61f82425bd2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
product_idproduct_nameproduct_sizeproduct_category_idproduct_qty_typeproduct_category_name
01Habanero Peppers - Organicmedium1lbsFresh Fruits & Vegetables
12Jalapeno Peppers - Organicsmall1lbsFresh Fruits & Vegetables
23Poblano Peppers - Organiclarge1unitFresh Fruits & Vegetables
34Banana Peppers - Jar8 oz3unitPackaged Prepared Food
45Whole Wheat Bread1.5 lbs3unitPackaged Prepared Food
56Cut Zinnias Bouquetmedium5unitPlants & Flowers
67Apple Pie10\"3unitPackaged Prepared Food
79Sweet Potatoesmedium1lbsFresh Fruits & Vegetables
810Eggs1 dozen6unitEggs & Meat (Fresh or Frozen)
911Pork Chops1 lb6lbsEggs & Meat (Fresh or Frozen)
1012Baby Salad Lettuce Mix - Bag1/2 lb1unitFresh Fruits & Vegetables
1113Baby Salad Lettuce Mix1 lb1lbsFresh Fruits & Vegetables
1214Red PotatoesNone1NoneFresh Fruits & Vegetables
1315Red Potatoes - Small1NoneFresh Fruits & Vegetables
1416Sweet CornEar1unitFresh Fruits & Vegetables
1517Carrotssold by weight1lbsFresh Fruits & Vegetables
1618Carrots - Organicbunch1unitFresh Fruits & Vegetables
1719Farmer's Market Resuable Shopping Bagmedium7unitNon-Edible Products
1820Homemade Beeswax Candles6\"7unitNon-Edible Products
1921Organic Cherry Tomatoespint1unitFresh Fruits & Vegetables
2022Roma Tomatoesmedium1lbsFresh Fruits & Vegetables
2123Maple Syrup - Jar8 oz2unitPackaged Pantry Goods
228Cherry Pie10\"3unitPackaged Prepared Food
\n", + "
" + ], + "text/plain": [ + " product_id product_name product_size \\\n", + "0 1 Habanero Peppers - Organic medium \n", + "1 2 Jalapeno Peppers - Organic small \n", + "2 3 Poblano Peppers - Organic large \n", + "3 4 Banana Peppers - Jar 8 oz \n", + "4 5 Whole Wheat Bread 1.5 lbs \n", + "5 6 Cut Zinnias Bouquet medium \n", + "6 7 Apple Pie 10\" \n", + "7 9 Sweet Potatoes medium \n", + "8 10 Eggs 1 dozen \n", + "9 11 Pork Chops 1 lb \n", + "10 12 Baby Salad Lettuce Mix - Bag 1/2 lb \n", + "11 13 Baby Salad Lettuce Mix 1 lb \n", + "12 14 Red Potatoes None \n", + "13 15 Red Potatoes - Small \n", + "14 16 Sweet Corn Ear \n", + "15 17 Carrots sold by weight \n", + "16 18 Carrots - Organic bunch \n", + "17 19 Farmer's Market Resuable Shopping Bag medium \n", + "18 20 Homemade Beeswax Candles 6\" \n", + "19 21 Organic Cherry Tomatoes pint \n", + "20 22 Roma Tomatoes medium \n", + "21 23 Maple Syrup - Jar 8 oz \n", + "22 8 Cherry Pie 10\" \n", + "\n", + " product_category_id product_qty_type product_category_name \n", + "0 1 lbs Fresh Fruits & Vegetables \n", + "1 1 lbs Fresh Fruits & Vegetables \n", + "2 1 unit Fresh Fruits & Vegetables \n", + "3 3 unit Packaged Prepared Food \n", + "4 3 unit Packaged Prepared Food \n", + "5 5 unit Plants & Flowers \n", + "6 3 unit Packaged Prepared Food \n", + "7 1 lbs Fresh Fruits & Vegetables \n", + "8 6 unit Eggs & Meat (Fresh or Frozen) \n", + "9 6 lbs Eggs & Meat (Fresh or Frozen) \n", + "10 1 unit Fresh Fruits & Vegetables \n", + "11 1 lbs Fresh Fruits & Vegetables \n", + "12 1 None Fresh Fruits & Vegetables \n", + "13 1 None Fresh Fruits & Vegetables \n", + "14 1 unit Fresh Fruits & Vegetables \n", + "15 1 lbs Fresh Fruits & Vegetables \n", + "16 1 unit Fresh Fruits & Vegetables \n", + "17 7 unit Non-Edible Products \n", + "18 7 unit Non-Edible Products \n", + "19 1 unit Fresh Fruits & Vegetables \n", + "20 1 lbs Fresh Fruits & Vegetables \n", + "21 2 unit Packaged Pantry Goods \n", + "22 3 unit Packaged Prepared Food " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db_df" + ] + }, + { + "cell_type": "markdown", + "id": "8b7c36c0", + "metadata": {}, + "source": [ + "Export the query:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ee17555e", + "metadata": {}, + "outputs": [], + "source": [ + "#save\n", + "db_df.to_csv('database-py.CSV', index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "ed14b573", + "metadata": {}, + "source": [ + "# Run a SQL query with pandasql" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ac82fb05", + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install pandasql" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "4f783bd4", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import pandasql as sql #this allows us to run SQLite queries!\n", + "p = \"https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/inst/extdata/penguins.csv\"\n", + "penguins = pd.read_csv(p) #create a dataframe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "7892f454", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
0AdelieTorgersen39.118.7181.03750.0male2007
1AdelieTorgersen39.517.4186.03800.0female2007
2AdelieTorgersen40.318.0195.03250.0female2007
3AdelieTorgersenNaNNaNNaNNaNNaN2007
4AdelieTorgersen36.719.3193.03450.0female2007
...........................
339ChinstrapDream55.819.8207.04000.0male2009
340ChinstrapDream43.518.1202.03400.0female2009
341ChinstrapDream49.618.2193.03775.0male2009
342ChinstrapDream50.819.0210.04100.0male2009
343ChinstrapDream50.218.7198.03775.0female2009
\n", + "

344 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", + "0 Adelie Torgersen 39.1 18.7 181.0 \n", + "1 Adelie Torgersen 39.5 17.4 186.0 \n", + "2 Adelie Torgersen 40.3 18.0 195.0 \n", + "3 Adelie Torgersen NaN NaN NaN \n", + "4 Adelie Torgersen 36.7 19.3 193.0 \n", + ".. ... ... ... ... ... \n", + "339 Chinstrap Dream 55.8 19.8 207.0 \n", + "340 Chinstrap Dream 43.5 18.1 202.0 \n", + "341 Chinstrap Dream 49.6 18.2 193.0 \n", + "342 Chinstrap Dream 50.8 19.0 210.0 \n", + "343 Chinstrap Dream 50.2 18.7 198.0 \n", + "\n", + " body_mass_g sex year \n", + "0 3750.0 male 2007 \n", + "1 3800.0 female 2007 \n", + "2 3250.0 female 2007 \n", + "3 NaN NaN 2007 \n", + "4 3450.0 female 2007 \n", + ".. ... ... ... \n", + "339 4000.0 male 2009 \n", + "340 3400.0 female 2009 \n", + "341 3775.0 male 2009 \n", + "342 4100.0 male 2009 \n", + "343 3775.0 female 2009 \n", + "\n", + "[344 rows x 8 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "penguins" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8036d336", + "metadata": {}, + "outputs": [], + "source": [ + "yrly_penguins = sql.sqldf('''SELECT DISTINCT year, COUNT(*) AS count, \n", + " SUM(COUNT(*)) OVER (ORDER BY year) AS running_total\n", + " FROM penguins\n", + " GROUP BY year''') #run a SQLite query with sqldf()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "80fd4dd6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearcountrunning_total
02007110110
12008114224
22009120344
\n", + "
" + ], + "text/plain": [ + " year count running_total\n", + "0 2007 110 110\n", + "1 2008 114 224\n", + "2 2009 120 344" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "yrly_penguins" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cd3de3f-fb4f-46ac-ad42-23971226e5d0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/04_this_cohort/live_code/module_6/denormalized.sql b/04_this_cohort/live_code/module_6/denormalized.sql new file mode 100644 index 000000000..da4208587 --- /dev/null +++ b/04_this_cohort/live_code/module_6/denormalized.sql @@ -0,0 +1,14 @@ +-- normal forms creation + +drop table if exists temp.skills; +create temp table if not exists temp.skills +( +name TEXT, +OS TEXT, +software TEXT, +supervisor TEXT +); + +insert into temp.skills +values("A","win","VSCode, MSSQL, RStudio", "Eric Yu"), + ("Thomas","mac", "Spyder, SQLite, RStudio", "Rohan Alexander"); diff --git a/04_this_cohort/live_code/module_6/penguins_in_python_sql.sql b/04_this_cohort/live_code/module_6/penguins_in_python_sql.sql new file mode 100644 index 000000000..2326c1d29 --- /dev/null +++ b/04_this_cohort/live_code/module_6/penguins_in_python_sql.sql @@ -0,0 +1,9 @@ +select * from penguins; + +-- how many penguins were identified each year +SELECT DISTINCT year +,COUNT(*) AS count +,SUM(COUNT(*)) OVER (ORDER BY year) AS running_total + + FROM penguins +GROUP BY year \ No newline at end of file