-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquarterback_code
81 lines (75 loc) · 2.04 KB
/
quarterback_code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
-- inserting data into quarteback from csv
COPY quarterback
FROM
'C:\Users\bhamm\OneDrive\Documents\FFdb_raw\mySQL\QB.csv'
DELIMITER ','
CSV HEADER;
-- counting week, player (should have 173), games, and season rows
SELECT
COUNT(DISTINCT week),
COUNT(DISTINCT player),
COUNT(DISTINCT games),
COUNT(DISTINCT season)
FROM
quarterback;
-- filtering out and deleting quarterbacks with no pass_att or carries in 2023
-- players without 2023 stats add no value to the database
-- first I first count the number of players being dropped before dropping them
SELECT
player
FROM
quarterback
WHERE
season = 2023
GROUP BY
player
HAVING
SUM(pass_att) = 0
AND SUM(carries) = 0;
-- the results of the above query shows 37 players that needed dropped
-- dropping players
DELETE FROM quarterback
WHERE player IN (
SELECT
player
FROM
quarterback
WHERE
season = 2023
GROUP BY
player
HAVING
SUM(pass_att) = 0
AND SUM(carries) = 0);
-- running the query below returns 136 players which is 37 less than the 104 we started with
SELECT
COUNT(DISTINCT player)
FROM
quarterback;
-- trimming all whitespaces from text fields
SELECT
trim(player),
trim(team_code),
trim(position)
FROM
quarterback;
-- checking for duplicates by cross referencing games played per season
-- creating a pivot table using the FILTER clause to display games played per season
-- max games played per season from 2023-2021 = 17 and from 2020-2018 = 16
-- players with more games played than that suggest errors in the data
SELECT
DISTINCT player,
COALESCE(SUM(games) FILTER(WHERE season = 2023),0) AS "2023",
COALESCE(SUM(games) FILTER(WHERE season = 2022),0) AS "2022",
COALESCE(SUM(games) FILTER(WHERE season = 2021),0) AS "2021",
COALESCE(SUM(games) FILTER(WHERE season = 2020),0) AS "2020",
COALESCE(SUM(games) FILTER(WHERE season = 2019),0) AS "2019",
COALESCE(SUM(games) FILTER(WHERE season = 2018),0) AS "2018"
FROM
quarterback
GROUP BY
player
ORDER BY
"2023" DESC,
player;
-- All players have the correct max total number of games