diff --git a/02_activities/assignments/Assignment2.md b/02_activities/assignments/Assignment2.md index 05244623b..29787ddbb 100644 --- a/02_activities/assignments/Assignment2.md +++ b/02_activities/assignments/Assignment2.md @@ -54,9 +54,9 @@ The store wants to keep customer addresses. Propose two architectures for the CU **HINT:** search type 1 vs type 2 slowly changing dimensions. ``` -Your answer... +Architecture 1 overwrites the old address, which would be Type 1. Simpler to build but doesn't retain old addresses, useful for situations where historical data is not important. ``` - +Architecture 2 is able to assign more than one address to a customer and retain their old addresses. This would be Type 2. Helpful for analytical approach as it keeps historical records *** ## Section 2: diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql index 5ad40748a..ac32c781c 100644 --- a/02_activities/assignments/assignment2.sql +++ b/02_activities/assignments/assignment2.sql @@ -20,6 +20,15 @@ The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. All the other rows will remain the same.) */ +SELECT +product_name || ', ' || product_size|| ' (' || product_qty_type || ')' +FROM product; + +SELECT + COALESCE(product_name, '') || ', ' || + COALESCE(product_size, '') || ' (' || + COALESCE(product_qty_type, 'unit') || ')' AS product_description +FROM product; --Windowed Functions @@ -31,18 +40,34 @@ You can either display all rows in the customer_purchases table, with the counte each new market date for each customer, or select only the unique market dates per customer (without purchase details) and number those visits. HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ - +SELECT + customer_id, + market_date, + ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number +FROM customer_purchases; /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, then write another query that uses this one as a subquery (or temp table) and filters the results to only the customer’s most recent visit. */ - +SELECT + customer_id, + market_date, + DENSE_RANK() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number +FROM ( + SELECT DISTINCT customer_id, market_date + FROM customer_purchases +) unique_visits; /* 3. Using a COUNT() window function, include a value along with each row of the customer_purchases table that indicates how many different times that customer has purchased that product_id. */ - +SELECT + customer_id, + product_id, + COUNT(*) OVER (PARTITION BY customer_id, product_id) AS purchase_count, + market_date +FROM customer_purchases; -- String manipulations @@ -56,11 +81,19 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for | Habanero Peppers - Organic | Organic | Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ - +SELECT + product_name, + TRIM(SUBSTR(product_name, INSTR(product_name, '-') + 1)) AS description +FROM product +WHERE INSTR(product_name, '-') > 0; /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ - +SELECT + product_name, + product_size +FROM product +WHERE product_size REGEXP '[0-9]'; -- UNION @@ -73,7 +106,27 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling 3) Query the second temp table twice, once for the best day, once for the worst day, with a UNION binding them. */ - +SELECT market_date, daily_sales +FROM +(select market_date +,quantity*cost_to_customer_per_qty as sales +,sum(quantity*cost_to_customer_per_qty) as daily_sales +,rank()over (order by sum(quantity*cost_to_customer_per_qty)) as sales_rank +FROM customer_purchases +GROUP BY market_date) +WHERE sales_rank = 1 + +UNION + +SELECT market_date, daily_sales +FROM +(select market_date +,quantity*cost_to_customer_per_qty as sales +,sum(quantity*cost_to_customer_per_qty) as daily_sales +,rank()over (order by sum(quantity*cost_to_customer_per_qty)DESC) as sales_rank +FROM customer_purchases +GROUP BY market_date) +WHERE sales_rank = 1; /* SECTION 3 */ @@ -89,7 +142,28 @@ Think a bit about the row counts: how many distinct vendors, product names are t How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ - +SELECT + v.vendor_name, + p.product_name, + (5*cp.cost_to_customer_per_qty*c.total_customers) AS revenue + FROM + vendor_inventory vi +JOIN + vendor v ON vi.vendor_id = v.vendor_id +JOIN + product p ON vi.product_id = p.product_id +JOIN + customer_purchases cp ON p.product_id = cp.product_id +CROSS JOIN ( + SELECT COUNT (customer_id) AS total_customers + FROM customer +) c +GROUP BY + v.vendor_name, + p.product_name +ORDER BY + v.vendor_name, + p.product_name; -- INSERT /*1. Create a new table "product_units". @@ -97,19 +171,28 @@ This table will contain only products where the `product_qty_type = 'unit'`. It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. Name the timestamp column `snapshot_timestamp`. */ - +DROP TABLE IF EXISTS product_units; +CREATE TEMP TABLE product_units AS +SELECT + *, + CURRENT_TIMESTAMP AS snapshot_timestamp +FROM + product +WHERE + product_qty_type = 'unit'; /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ - +INSERT INTO product_units (product_id, product_name, product_size, product_category_id, product_qty_type, snapshot_timestamp) +VALUES (27, 'Apple Pie', '10"', 3, 'unit', CURRENT_TIMESTAMP); -- DELETE /* 1. Delete the older record for the whatever product you added. HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ - - +DELETE FROM product_units +WHERE product_id = 27; -- UPDATE /* 1.We want to add the current_quantity to the product_units table. @@ -129,5 +212,15 @@ Finally, make sure you have a WHERE statement to update the right row, When you have all of these components, you can run the update statement. */ - +ALTER TABLE product_units +ADD current_quantity INT + +UPDATE product_units +SET current_quantity = COALESCE(( + SELECT vi.quantity + FROM vendor_inventory vi + WHERE vi.product_id = product_units.product_id + ORDER BY vi.market_date DESC + LIMIT 1 +), 0);