-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathremove_duplicates.sql
79 lines (70 loc) · 1.38 KB
/
remove_duplicates.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
-- 1. Remove duplicates
-- duplicate_cte
WITH duplicate_cte AS (
SELECT
*,
ROW_NUMBER() OVER(
PARTITION BY
company,
location,
industry,
total_laid_off,
percentage_laid_off,
`date`,
stage,
country,
funds_raised_millions) AS row_num
FROM layoffs_staging_01
)
-- duplicates
SELECT *
FROM duplicate_cte
WHERE row_num > 1;
-- check one of duplicates
SELECT *
FROM layoffs_staging_01
WHERE company = 'Casper';
-- create another staging table
CREATE TABLE `layoffs_staging_02` (
`company` text,
`location` text,
`industry` text,
`total_laid_off` int DEFAULT NULL,
`percentage_laid_off` text,
`date` text,
`stage` text,
`country` text,
`funds_raised_millions` int DEFAULT NULL,
`row_num` INT
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
-- check staging table
SELECT *
FROM layoffs_staging_02;
-- insert data into satging table
INSERT INTO layoffs_staging_02
SELECT
*,
ROW_NUMBER() OVER(
PARTITION BY
company,
location,
industry,
total_laid_off,
percentage_laid_off,
`date`,
stage,
country,
funds_raised_millions) AS row_num
FROM layoffs_staging_01;
-- check staging table data
SELECT *
from layoffs_staging_02
LIMIT 10;
-- duplicates
SELECT *
FROM layoffs_staging_02
WHERE row_num > 1;
-- remove duplicates
DELETE
FROM layoffs_staging_02
WHERE row_num > 1;