1. 데이터 생성
https://www.kaggle.com/datasets/nicapotato/womens-ecommerce-clothing-reviews
SELECT * FROM MYDATA_상품리뷰.DATASET2
2. Division 별 평균 평점 계산
* Division Name 그룹핑 > 점수 평균 SELECT `Division Name`, AVG(RATING) FROM MYDATA_상품리뷰.DATASET2 group by 1 ORDER BY 2 DESC; * DEPARTMENT별 평균 점수 SELECT `Department Name`, AVG(RATING) AVG_RATE FROM MYDATA_상품리뷰.DATASET2 group by 1 ORDER BY 2 DESC; * Trend(Department name)의 평점 3점 이하 리뷰 SELECT * from MYDATA_상품리뷰.DATASET2 where `Department Name`='trend' and rating <=3; * 나이 그룹생성 CASE WHEN SELECT CASE WHEN AGE BETWEEN 0 AND 9 THEN '00' WHEN AGE BETWEEN 10 AND 19 THEN '10' WHEN AGE BETWEEN 20 AND 29 THEN '20' WHEN AGE BETWEEN 30 AND 39 THEN '30' WHEN AGE BETWEEN 40 AND 49 THEN '40' WHEN AGE BETWEEN 50 AND 59 THEN '50' WHEN AGE BETWEEN 60 AND 69 THEN '60' WHEN AGE BETWEEN 70 AND 79 THEN '70' WHEN AGE BETWEEN 80 AND 89 THEN '80' WHEN AGE BETWEEN 90 AND 99 THEN '90' END AGEBAND, AGE FROM MYDATA_상품리뷰.DATASET2 WHERE `Department Name`='trend' and rating <=3; * 나이 그룹생성 FLOOR SELECT FLOOR(AGE/10)*10 AGEBAND, AGE FROM MYDATA_상품리뷰.DATASET2 WHERE `Department Name`='trend' and rating <=3; * 나이 그룹별 TREND의 평점 3점이하 리뷰의 연령 분포 SELECT FLOOR(AGE/10)*10 AGEBAND, COUNT(*) COUNT FROM MYDATA_상품리뷰.DATASET2 WHERE `Department Name`='trend' and rating <=3 GROUP BY 1 ORDER BY 2 DESC; * DEPARTMENT별 연령별 리뷰 수 SELECT FLOOR(AGE/10)*10 AGEBAND, COUNT(*) COUNT FROM MYDATA_상품리뷰.DATASET2 WHERE `Department Name`='trend' GROUP BY 1 ORDER BY 2 DESC; * 50대 3점 이하 TREND 리뷰 SELECT * FROM MYDATA_상품리뷰.DATASET2 WHERE `Department Name`='trend' and rating <=3 AND AGE BETWEEN 50 AND 59 LIMIT 10;
3. 평점이 낮은 상품의 주요 Complain
* Department Name, Clothing Name 평균 평점 계산 SELECT `Department Name`,`Clothing ID`, avg(rating) as avg_rate from mydata_상품리뷰.dataset2 group by 1,2; * Department별 순위 생성 SELECT *, ROW_NUMBER() OVER(PARTITION BY `DEPARTMENT NAME` ORDER BY AVG_RATE) AS RNK FROM (SELECT `DEPARTMENT NAME`, `CLOTHING ID`, AVG(RATING) AS AVG_RATE FROM mydata_상품리뷰.DATASET2 GROUP BY 1,2 ) A; * 1~10위 데이터 조회 SELECT * FROM (SELECT *, ROW_NUMBER() OVER(PARTITION BY `DEPARTMENT NAME` ORDER BY AVG_RATE) AS RNK FROM (SELECT `DEPARTMENT NAME`, `CLOTHING ID`, AVG(RATING) AS AVG_RATE FROM MYDATA_상품리뷰.DATASET2 GROUP BY 1,2) A) A WHERE RNK <= 10; * Department별 평균 평점이 낮은 10개 상품 CREATE TEMPORARY TABLE MYDATA_상품리뷰.STAT AS SELECT * FROM ( SELECT *, ROW_NUMBER() OVER(PARTITION BY `DEPARTMENT NAME` ORDER BY AVG_RATE) AS RNK FROM ( SELECT `DEPARTMENT NAME`, `CLOTHING ID`, AVG(RATING) AS AVG_RATE FROM MYDATA_상품리뷰.DATASET2 GROUP BY 1,2 ) A ) A WHERE RNK <= 10; SELECT * FROM mydata_상품리뷰.STAT ; SELECT 'CLOTHING ID' FROM mydata_상품리뷰.STAT WHERE 'DEPARTMENT NAME'='BOTTOMS'; * 리뷰확인 SELECT * FROM mydata_상품리뷰.DATASET2 WHERE 'CLOTHING ID' IN (SELECT 'CLOTHING ID' FROM mydata_상품리뷰.STAT WHERE 'DEPARTMENT NAME'='BOTTOMS') ORDER BY 'CLOTHING ID';
4. 연령별 Worst Department
- 연령, Department별 가장 낮은 점수 계산
- 생성한 점수를 기반으로 Rank 생성
- Rank 값이 1인 데이터 조회
* DEPARTMENT 별, 연령별, RATE 평균 조회 SELECT `Department Name`, FLOOR(AGE/10)*10 AS AGEBAND, AVG(RATING) AS AVG_RATING FROM mydata_상품리뷰.dataset2 GROUP BY 1,2; * RANK 생성, 가장 낮은 점수가 1위가 되도록 오름차순 정렬 SELECT *, ROW_NUMBER() OVER(PARTITION BY AGEBAND ORDER BY AVG_RATING) AS RNK FROM ( SELECT `Department Name`, FLOOR(AGE/10)*10 AS AGEBAND, AVG(RATING) AS AVG_RATING FROM MYDATA_상품리뷰.DATASET2 GROUP BY 1,2) A; * RANK가 1인 값만 조회 SELECT * FROM (SELECT *, ROW_NUMBER() OVER(PARTITION BY AGEBAND ORDER BY AVG_RATING) AS RNK FROM (SELECT `Department Name`, FLOOR(AGE/10)*10 AS AGEBAND, AVG(RATING) AS AVG_RATING FROM MYDATA_상품리뷰.DATASET2 GROUP BY 1,2) A)A WHERE RNK=1;
5. SIZE Complain
* 사이즈가 포함된 리뷰
SELECT `Review Text`,
CASE WHEN `Review Text` LIKE '%SIZE%' THEN 1 ELSE 0 END AS SIZE_YN
FROM MYDATA_상품리뷰.DATASET2;
* 사이즈가 포함된 리뷰의 수
SELECT SUM(CASE WHEN `Review Text` LIKE '%SIZE%' THEN 1 ELSE 0 END) AS N_SIZE,
COUNT(*) AS N_TOTAL
FROM mydata_상품리뷰.dataset2;
RESULT> 사이즈 관련된 리뷰 개수 6133 (총 20603개 중)
N_SIZE = 6133 / N_TOTAL = 20603
* 사이즈별 리뷰 개수
Large, Loose, Small, Tight
SELECT SUM(CASE WHEN `Review Text` LIKE '%SIZE%' THEN 1 ELSE 0 END) AS N_SIZE,
SUM(CASE WHEN `Review Text` LIKE '%large%' THEN 1 ELSE 0 END) AS N_large,
SUM(CASE WHEN `Review Text` LIKE '%loose%' THEN 1 ELSE 0 END) AS N_loose,
SUM(CASE WHEN `Review Text` LIKE '%small%' THEN 1 ELSE 0 END) AS N_small,
SUM(CASE WHEN `Review Text` LIKE '%tight%' THEN 1 ELSE 0 END) AS N_tight,
sum(1) n_total from mydata_상품리뷰.dataset2;
* Department Name 별 리뷰 개수
SELECT `Department Name`,
SUM(CASE WHEN `Review Text` LIKE '%SIZE%' THEN 1 ELSE 0 END) AS N_SIZE,
SUM(CASE WHEN `Review Text` LIKE '%large%' THEN 1 ELSE 0 END) AS N_large,
SUM(CASE WHEN `Review Text` LIKE '%loose%' THEN 1 ELSE 0 END) AS N_loose,
SUM(CASE WHEN `Review Text` LIKE '%small%' THEN 1 ELSE 0 END) AS N_small,
SUM(CASE WHEN `Review Text` LIKE '%tight%' THEN 1 ELSE 0 END) AS N_tight,
sum(1) n_total from mydata_상품리뷰.dataset2
group by 1;
* 연령, Department Name 별 리뷰 개수
SELECT floor(age/10)*10 as ageband, `Department Name`,
SUM(CASE WHEN `Review Text` LIKE '%SIZE%' THEN 1 ELSE 0 END) AS N_SIZE,
SUM(CASE WHEN `Review Text` LIKE '%large%' THEN 1 ELSE 0 END) AS N_large,
SUM(CASE WHEN `Review Text` LIKE '%loose%' THEN 1 ELSE 0 END) AS N_loose,
SUM(CASE WHEN `Review Text` LIKE '%small%' THEN 1 ELSE 0 END) AS N_small,
SUM(CASE WHEN `Review Text` LIKE '%tight%' THEN 1 ELSE 0 END) AS N_tight,
sum(1) n_total from mydata_상품리뷰.dataset2
group by 1,2
order by 1,2;
* 연령, Department Name 별 리뷰 개수 비중 구하기
SELECT floor(age/10)*10 as ageband, `Department Name`,
SUM(CASE WHEN `Review Text` LIKE '%SIZE%' THEN 1 ELSE 0 END)/sum(1) AS N_SIZE,
SUM(CASE WHEN `Review Text` LIKE '%large%' THEN 1 ELSE 0 END)/sum(1) AS N_large,
SUM(CASE WHEN `Review Text` LIKE '%loose%' THEN 1 ELSE 0 END)/sum(1) AS N_loose,
SUM(CASE WHEN `Review Text` LIKE '%small%' THEN 1 ELSE 0 END)/sum(1) AS N_small,
SUM(CASE WHEN `Review Text` LIKE '%tight%' THEN 1 ELSE 0 END)/sum(1) AS N_tight
from mydata_상품리뷰.dataset2
group by 1,2
order by 1,2;
1장 데이터베이스와 SQL
2장 SQL문법
3장 데이터 추가, 삭제, 갱신, 데이터 정합성
4장 자동차 매출 데이터를 이용한 리포트 작성
5장 상품 리뷰 데이터를 이용한 리포트 작성
6장 식품 배송 데이터 분석
7장 UK Commerce 데이터를 이용한 리포트 작성
8장 타이타닉 호 데이터 분석
9장 R, Python 연동