2. 지표추출
* 전체 주문 건수 select count(distinct order_id) as F from instacart_식품배송.orders; * 구매자 수 select count(distinct user_id) as BU from instacart_식품배송.orders; * 상품별 주문건 수 select * from instacart_식품배송.order_products__prior as A left join instacart_식품배송.products as B on A.product_id=B.product_id; select B.product_name, count(distinct A.order_id) as F from instacart_식품배송.order_products__prior as A left join instacart_식품배송.products as B on A.product_id=B.product_id group by 1; * 장바구니에 가장 먼저 넣는 상품 10개 select * from instacart_식품배송.order_products__prior; select product_id, case when add_to_cart_order=1 then 1 else 0 end as F_1st from instacart_식품배송.order_products__prior; 제일 먼저 담긴 장바구니 상품 개수 확인 select product_id, sum(case when add_to_cart_order=1 then 1 else 0 end) as F_1st from instacart_식품배송.order_products__prior group by 1; F_1st 로 순위 매기기 select *, row_number() over(order by F_1st DESC) as RNK from (select product_id, sum(case when add_to_cart_order=1 then 1 else 0 end) as F_1st from instacart_식품배송.order_products__prior group by 1) A; 상위 10개 select * from(select *, row_number() over(order by F_1st DESC) as RNK from (select product_id, sum(case when add_to_cart_order=1 then 1 else 0 end) as F_1st from instacart_식품배송.order_products__prior group by 1) A) base where rnk between 1 and 10; * 시간대별 주문 건수 select order_hour_of_day, count(distinct order_id) as F from instacart_식품배송.orders group by 1 order by 1; * 첫 구매후 다음 구매까지 걸린 평균 일수 select days_since_prior_order from instacart_식품배송.orders where order_number=2; select avg(days_since_prior_order) as avg_recency from instacart_식품배송.orders where order_number=2; * 주문 건당 평균 구매 상품 수 select count(product_id)/count(distinct order_id) as UPT from instacart_식품배송.order_products__prior; * 인당 평균 주문 건수 select count(distinct order_id)/count(distinct user_id) as AVG_F from instacart_식품배송.orders; * 재구매율이 가장 높은 상품 10개 상품별 재구매율 계산 = 재 구매 수 / 전체 구매수 select product_id, sum(case when reordered =1 then 1 else 0 end)/count(*) as RET_ratio from instacart_식품배송.order_products__prior group by 1; 재구매율로 순위 열 생성하기 select *, row_number() over(order by RET_ratio desc) as RNK from (select product_id, sum(case when reordered=1 then 1 else 0 end)/count(*) as RET_ratio from instacart_식품배송.order_products__prior group by 1) A; TOP 10 재구매율 상품 추출 select * from (select *, row_number() over(order by RET_ratio desc) RNK from (select product_id, sum(case when reordered=1 then 1 else 0 end)/count(*) as RET_ratio from instacart_식품배송.order_products__prior group by 1) A) A where RNK between 1 and 10;
3. 구매자 분석
* 10분위 분석 전체를 10분위로 나누어 각 분위수에 해당하는 집단의 성질을 나타내는 방법 주문 건수에 따른 Rank 생성 select *, row_number() over(order by F desc) as RNK from (select user_id, count(distinct order_id) as F from instacart_식품배송.orders group by 1) A; select *, case when RNK between 1 and 316 then 'Quantile_1' when RNK between 317 and 632 then 'Quantile_2' when RNK between 633 and 948 then 'Quantile_3' when RNK between 949 and 1264 then 'Quantile_4' when RNK between 1265 and 1580 then 'Quantile_5' when RNK between 1581 and 1895 then 'Quantile_6' when RNK between 1896 and 2211 then 'Quantile_7' when RNK between 2212 and 2527 then 'Quantile_8' when RNK between 2528 and 2843 then 'Quantile_9' when RNK between 2844 and 3159 then 'Quantile_10' end as quantile from (select *, row_number() over(order by F desc) as RNK from (select user_id, count(distinct order_id) as F from instacart_식품배송.orders group by 1) A) A; 임시 테이블 생성 create temporary table instacart_식품배송.user_quantile as select *, case when RNK <= 316 then 'Quantile_1' when RNK <= 632 then 'Quantile_2' when RNK <= 948 then 'Quantile_3' when RNK <= 1264 then 'Quantile_4' when RNK <= 1580 then 'Quantile_5' when RNK <= 1895 then 'Quantile_6' when RNK <= 2211 then 'Quantile_7' when RNK <= 2527 then 'Quantile_8' when RNK <= 2843 then 'Quantile_9' when RNK <= 3159 then 'Quantile_10' end as quantile from (select *, row_number() over(order by F desc) as RNK from (select user_id, count(distinct order_id) as F from instacart_식품배송.orders group by 1) A) A; 각 분위 수별 전체 주문 건수의 합 select quantile, sum(F) as F from instacart_식품배송.user_quantile group by 1; 총 주문 건수 select sum(F) as F from instacart_식품배송.user_quantile; 비율 확인 select quantile, sum(F)/3220 as F from instacart_식품배송.user_quantile group by 1;
4. 상품분석
* 재비중 높은 상품 = 상품별 재구매 비중(%), 주문건수 계산 select product_id, sum(reordered)/sum(1) as reorder_rate, count(distinct order_id) as F from instacart_식품배송.order_products__prior group by product_id order by reorder_rate desc; * 주문 건수 10개 이하 제외 select A.product_id, sum(reordered)/sum(1) as reorder_rate, count(distinct order_id) as F from instacart_식품배송.order_products__prior as A left join instacart_식품배송.products as B on A.product_id=B.product_id group by product_id having count(distinct order_id) > 10; * 재구매율이 높은 상품 조회 select A.product_id, B.product_name, sum(reordered)/sum(1) as reorder_rate, count(distinct order_id) as F from instacart_식품배송.order_products__prior as A left join instacart_식품배송.products as B on A.product_id=B.product_id group by product_id, B.product_name having count(distinct order_id) > 10
1장 데이터베이스와 SQL
2장 SQL문법
3장 데이터 추가, 삭제, 갱신, 데이터 정합성
4장 자동차 매출 데이터를 이용한 리포트 작성
5장 상품 리뷰 데이터를 이용한 리포트 작성
6장 식품 배송 데이터 분석
7장 UK Commerce 데이터를 이용한 리포트 작성
8장 타이타닉 호 데이터 분석
9장 R, Python 연동