-
Notifications
You must be signed in to change notification settings - Fork 0
/
part1.R
118 lines (93 loc) · 3.96 KB
/
part1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# load + copy the data---------------------------------------------------
library(tidyverse)
library(tidyselect)
library(plyr)
library(stargazer)
dfa <- read_csv("psam_pusa.csv")
#create a copy of dfa just in case
dfa_copy <- dfa
# selecting variables -----------------------------------------------------
dfa_subset = dfa %>%
select(WAGP, AGEP, SEX, COW, RAC1P, ENG, CIT, SCHL, NATIVITY, MIGSP)
# dropping NAs, filtering WAGP, SCHL, COW--------------------------------------------------------
dfa_subset = dfa_subset %>%
drop_na() %>%
filter(WAGP>0) %>%
filter(str_detect(SCHL,
'01|16|19|21|22|23|24')) %>%
filter(str_detect(COW,
'1|2|3|4|7'))
# creating variables ------------------------------------------------------
dfa_subset = dfa_subset %>%
mutate(lwages = log(WAGP)) %>%
mutate(NATIVITY = factor(NATIVITY)) %>%
mutate(CIT = factor(CIT)) %>%
mutate(COW = factor(COW)) %>%
mutate(SEX = factor(SEX)) %>%
mutate(RAC1P = factor(RAC1P)) %>%
mutate(ENG = factor(ENG)) %>%
mutate(SCHL = factor(SCHL))
# renaming the factors ----------------------------------------------------
levels(dfa_subset$NATIVITY) <- c("Native", "Foreign")
levels(dfa_subset$SCHL) <- c("no_educ", "high_school", "some_college", "bachelors", "masters", "professional", "phd")
levels(dfa_subset$COW) <- c("for_profit", "non_profit", "local_gvt", "state_gvt", "self_emp")
levels(dfa_subset$SEX) <- c("male", "female")
levels(dfa_subset$ENG) <- c("very_well", "well", "not_well", "not_at_all")
# running the regression --------------------------------------------------
immi.wages <- lm(lwages ~ NATIVITY + AGEP + SEX + COW + SCHL
+ ENG, data = dfa_subset)
summary(immi.wages)
stargazer(immi.wages, type = "text", style = "default", title = "Table 1: Results", object.names = TRUE, no.space = FALSE,
intercept.bottom = FALSE)
# summary tables ----------------------------------------------------------
cow_nativity_meanwages = dfa_subset %>%
group_by(COW, NATIVITY) %>%
dplyr::summarise(mean_wages = mean(WAGP)) %>%
arrange(desc(mean_wages))
cow_nativity_medwages = dfa_subset %>%
group_by(COW, NATIVITY) %>%
dplyr::summarise(median_wages = median(WAGP)) %>%
arrange(desc(median_wages))
cow_nativity_avglogwages = dfa_subset %>%
group_by(COW, NATIVITY) %>%
dplyr::summarise(avg_log_wages = mean(lwages)) %>%
arrange(desc(avg_log_wages))
educ_nativity_meanwages = dfa_subset %>%
group_by(SCHL, NATIVITY) %>%
dplyr::summarise(mean_wages = mean(WAGP)) %>%
arrange(desc(mean_wages))
educ_nativity_avglogwages = dfa_subset %>%
group_by(SCHL, NATIVITY) %>%
dplyr::summarise(avg_log_wages = mean(lwages)) %>%
arrange(desc(avg_log_wages))
# plotting wages against wages, separated by nativity ---------------------------------------------
ggplot(cow_nativity_meanwages, aes(COW, mean_wages)) +
geom_col() +
facet_wrap(~NATIVITY) +
xlab("Class of Worker") +
ylab("Mean of Wages")
ggplot(cow_nativity_medwages, aes(COW, median_wages)) +
geom_col() +
facet_wrap(~NATIVITY) +
xlab("Class of Worker") +
ylab("Median of Wages")
cowlognat <- ggplot(cow_nativity_avglogwages, aes(COW, avg_log_wages))
cowlognat + geom_col(fill = "#bc912c", color = "gray") +
facet_wrap(~NATIVITY) +
xlab("Class of Worker") +
ylab("Log of Wages")
ggplot(educ_nativity_meanwages, aes(SCHL, mean_wages, colour = "#bc912c")) +
geom_col() +
facet_wrap(~NATIVITY) +
xlab("Highest Level of Education Attained") +
ylab("Mean of Wages")
ggplot(educ_nativity_avglogwages, aes(SCHL, avg_log_wages)) +
geom_col(aes(colour = "#bc912c")) +
facet_wrap(~NATIVITY) +
xlab("Highest Level of Education Attained") +
ylab("Average Logarithm of Wages")
testcolorplot <- ggplot(educ_nativity_avglogwages, aes(SCHL, avg_log_wages, colour = "#bc912c"))
testcolorplot + geom_col(fill = "#bc912c", color = "gray") +
facet_wrap(~NATIVITY) +
xlab("Highest Level of Education Attained") +
ylab("Average Logarithm of Wages")