Skip to content

Commit

Permalink
add inequality plot
Browse files Browse the repository at this point in the history
  • Loading branch information
j-jayes committed Dec 12, 2023
1 parent d7d4d8c commit 3ee43ae
Show file tree
Hide file tree
Showing 11 changed files with 412 additions and 152 deletions.
Binary file modified .DS_Store
Binary file not shown.
48 changes: 28 additions & 20 deletions do_files/02-first_stage-test.do
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@ set more off
*---------------------------------------------------*

* Set local parameter for data trimming
local distance_cutoff = 250
local distance_cutoff = 300

* Setting the working directory
cd "C:\Users\User\Documents\Recon\paper-3-analysis"
cd "/Users/jonathanjayes/Documents/PhD/paper-3-analysis/"
* cd "C:\Users\User\Documents\Recon\paper-3-analysis"

* Import parish-level power station data from Excel
import excel "data/first-stage/02_firststage-data.xlsx", sheet("Sheet1") firstrow clear
import excel "data/first-stage/04_firststage-data.xlsx", sheet("Sheet1") firstrow clear

* Define the results directory for storing output
local results_dir "results/first-stage/"
Expand All @@ -29,11 +30,18 @@ local results_dir "results/first-stage/"
label var treated "Treated parishes"
label var area "Parish area in square kilometers"
label var population_1900 "Parish population in 1900"
label var latitude "Latitude"
label var longitude "Longitude"

* Create log-transformed outcomes for power-related variables
gen log_total_power = ln(total_power)
gen log_total_power_transmitted = ln(total_power_transmitted)
gen log_total_power_generated = ln(total_power_generated)
gen log_total_power = ln(total_power + 1)
gen log_total_power_transmitted = ln(total_power_transmitted + 1)
gen log_total_power_generated = ln(total_power_generated + 1)

* Create log-transformed outcomes for power-related variables
gen log_total_connections = ln(total_connections + 1)
gen log_num_connections_transmitted = ln(num_connections_transmitted + 1)
gen log_num_connections_generated = ln(num_connections_generated + 1)

*---------------------------------------------------*
* Regression Analyses for Total Power (in levels)
Expand All @@ -42,21 +50,21 @@ gen log_total_power_generated = ln(total_power_generated)
* Regression on total power
quietly summarize total_power if distance_to_line < `distance_cutoff'
local mean1 = round(r(mean), 0.01)
reg total_power treated area population_1900 if distance_to_line < `distance_cutoff'
reg total_power treated area population_1900 latitude longitude if distance_to_line < `distance_cutoff'
eststo Model1
estadd scalar mean_depvar = `mean1'

* Regression on total power transmitted
quietly summarize total_power_transmitted if distance_to_line < `distance_cutoff'
local mean2 = round(r(mean), 0.01)
reg total_power_transmitted treated area population_1900 if distance_to_line < `distance_cutoff'
reg total_power_transmitted treated area population_1900 latitude longitude if distance_to_line < `distance_cutoff'
eststo Model2
estadd scalar mean_depvar = `mean2'

* Regression on total power generated
quietly summarize total_power_generated if distance_to_line < `distance_cutoff'
local mean3 = round(r(mean), 0.01)
reg total_power_generated treated area population_1900 if distance_to_line < `distance_cutoff'
reg total_power_generated treated area population_1900 latitude longitude if distance_to_line < `distance_cutoff'
eststo Model3
estadd scalar mean_depvar = `mean3'

Expand All @@ -77,28 +85,28 @@ eststo clear
* Regression on log-transformed total power
quietly summarize log_total_power if distance_to_line < `distance_cutoff'
local mean1 = round(r(mean), 0.01)
reg log_total_power treated area population_1900 if distance_to_line < `distance_cutoff'
reg log_total_power treated area population_1900 latitude longitude if distance_to_line < `distance_cutoff'
eststo Model1
estadd scalar mean_depvar = `mean1'

* Regression on log-transformed total power transmitted
quietly summarize log_total_power_transmitted if distance_to_line < `distance_cutoff'
local mean2 = round(r(mean), 0.01)
reg log_total_power_transmitted treated area population_1900 if distance_to_line < `distance_cutoff'
reg log_total_power_transmitted treated area population_1900 latitude longitude if distance_to_line < `distance_cutoff'
eststo Model2
estadd scalar mean_depvar = `mean2'

* Regression on log-transformed total power generated
quietly summarize log_total_power_generated if distance_to_line < `distance_cutoff'
local mean3 = round(r(mean), 0.01)
reg log_total_power_generated treated area population_1900 if distance_to_line < `distance_cutoff'
reg log_total_power_generated treated area population_1900 latitude longitude if distance_to_line < `distance_cutoff'
eststo Model3
estadd scalar mean_depvar = `mean3'

* Tabulate results for log-transformed power variables
esttab Model1 Model2 Model3 using `results_dir'/022-first_stage_power_log.tex, label replace ///
stats(r2 N F mean_depvar, fmt(2 0 3 2) labels("R-squared" "Observations" "F-stat" "Mean Dependent Var")) ///
mlabels("Total Power" "Total Power Transmitted" "Total Power Generated") ///
mlabels("log(Total Power)" "log(Total Power Transmitted)" "log(Total Power Generated)") ///
cells(b(star fmt(3)) se(par fmt(2))) ///
addnotes("Robust standard errors in parentheses")

Expand All @@ -107,30 +115,30 @@ esttab Model1 Model2 Model3 using `results_dir'/022-first_stage_power_log.tex, l
*---------------------------------------------------*

* Regression on total connections
quietly summarize total_connections if distance_to_line < `distance_cutoff'
quietly summarize log_total_connections if distance_to_line < `distance_cutoff'
local mean1 = round(r(mean), 0.01)
reg total_connections treated area population_1900 if distance_to_line < `distance_cutoff'
reg log_total_connections treated area population_1900 latitude longitude if distance_to_line < `distance_cutoff'
eststo Model1
estadd scalar mean_depvar = `mean1'

* Regression on number of connections transmitted
quietly summarize num_connections_transmitted if distance_to_line < `distance_cutoff'
quietly summarize log_num_connections_transmitted if distance_to_line < `distance_cutoff'
local mean2 = round(r(mean), 0.01)
reg num_connections_transmitted treated area population_1900 if distance_to_line < `distance_cutoff'
reg log_num_connections_transmitted treated area population_1900 latitude longitude if distance_to_line < `distance_cutoff'
eststo Model2
estadd scalar mean_depvar = `mean2'

* Regression on number of connections generated
quietly summarize num_connections_generated if distance_to_line < `distance_cutoff'
quietly summarize log_num_connections_generated if distance_to_line < `distance_cutoff'
local mean3 = round(r(mean), 0.01)
reg num_connections_generated treated area population_1900 if distance_to_line < `distance_cutoff'
reg log_num_connections_generated treated area population_1900 latitude longitude if distance_to_line < `distance_cutoff'
eststo Model3
estadd scalar mean_depvar = `mean3'

* Tabulate results for connections variables
esttab Model1 Model2 Model3 using `results_dir'/023-first_stage_connections.tex, label replace ///
stats(r2 N F mean_depvar, fmt(2 0 3 2) labels("R-squared" "Observations" "F-stat" "Mean Dependent Var")) ///
mlabels("Total connections" "N. transformers" "N. generators (water, steam, diesel)") ///
mlabels("log(Total connections)" "log(N. transformers)" "log(N. generators) (water, steam, diesel)") ///
cells(b(star fmt(3)) se(par fmt(2))) ///
addnotes("Robust standard errors in parentheses")

34 changes: 34 additions & 0 deletions figure-code/inequliaty-dot-plot
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
library(tibble)

data <- tibble(
pctile = c(1, 2, 3, 4, 5),
Treated_Share_of_Income = c(6, 9, 13, 21, 51),
Control_Share_of_Income = c(6, 12, 17, 24, 41)
)

data %>%
ggplot(aes(y = pctile, x = decile_share, colour = treated)) +
geom_point(cex = 8) +
geom_line() +
geom_text(aes(label = decile_share_lab_control), hjust = 1.3, size = 6) +
geom_text(aes(label = decile_share_lab_treated), hjust = -.5, size = 6) +
labs(
x = "Share of total income in parish",
y = "Percentile of income distribution",
title = "Income quintile shares for
<b><span style = 'color:#9C6114;'>treated</span></b> and
<b><span style = 'color:#000080;'>control</span></b> individuals"
) +
scale_y_continuous(labels = scales::percent_format(scale = 200)) +
scale_x_continuous(labels = scales::percent_format()) +
scale_colour_manual(values = c("#000080", "#9C6114")) +
theme(
# panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),

plot.title = element_markdown(size = 32, family = 'roboto'),
plot.title.position = "plot",
legend.position = "none",
text = element_text(family = 'ibm', size = 18)
)
58 changes: 58 additions & 0 deletions figure-code/inequliaty-dot-plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
library(tidyverse)
library(ggtext)

library(showtext)
## Loading Google fonts (https://fonts.google.com/)
font_add_google("IBM Plex Mono", "ibm")
font_add_google("Roboto", "roboto")

showtext_opts(dpi = 300)
showtext_auto(enable = TRUE)
theme_set(theme_light())

treated_data <- tibble(
pctile = c(1, 2, 3, 4, 5),
decile_share = c(0.062, 0.12, 0.17, 0.24, 0.41),
treated = c("Treated", "Treated", "Treated", "Treated", "Treated"),
) %>%
mutate(decile_share_lab_treated = scales::percent(decile_share, accuracy = 1))

control_data <- tibble(
pctile = c(1, 2, 3, 4, 5),
decile_share = c(0.058, 0.082, 0.13, 0.2, 0.53),
treated = c("Control", "Control", "Control", "Control", "Control"),
) %>%
mutate(decile_share_lab_control = scales::percent(decile_share, accuracy = 1))



data <- bind_rows(treated_data, control_data)

data %>%
ggplot(aes(y = pctile, x = decile_share, colour = treated)) +
geom_point(cex = 8) +
geom_line() +
geom_text(aes(label = decile_share_lab_control), hjust = 1.3, size = 6, colour = "black") +
geom_text(aes(label = decile_share_lab_treated), hjust = -.5, size = 6, colour = "black") +
labs(
x = "Share of total income in parish",
y = "Percentile of income distribution",
colour = "Individual's birth parish"
# title = "Income quintile shares for
# <b><span style = 'color:#9C6114;'>treated</span></b> and
# <b><span style = 'color:#000080;'>control</span></b> individuals"
) +
scale_y_continuous(labels = scales::percent_format(scale = 20)) +
scale_x_continuous(labels = scales::percent_format(), limits = c(0, NA)) +
scale_colour_brewer(palette = "Dark2") +
theme(
# panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_markdown(size = 32, family = "roboto"),
plot.title.position = "plot",
legend.position = "bottom",
text = element_text(family = "ibm", size = 18)
)

ggsave(filename = here::here("figures/07-inequality-plot.jpeg"), device = "jpeg", width = 290, height = 231, units = "mm", dpi = 300)
Binary file modified figures/07-inequality-plot.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
54 changes: 29 additions & 25 deletions results/first-stage/021-first_stage_power.tex
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
{
\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}
\begin{tabular}{l*{3}{c}}
\hline\hline
&\multicolumn{1}{c}{(1)} &\multicolumn{1}{c}{(2)} &\multicolumn{1}{c}{(3)} \\
& Total Power &Total Power Transmitted &Total Power Generated \\
& b/se & b/se & b/se \\
\hline
Treated parishes & 43401.306\sym{***}& 43591.801\sym{***}& -190.495 \\
& (3903.05) & (3893.67) & (280.71) \\
Parish area in square kilometers& 0.000\sym{***}& 0.000\sym{***}& 0.000 \\
& (0.00) & (0.00) & (0.00) \\
Parish population in 1900& 1.899\sym{***}& 1.887\sym{***}& 0.012 \\
& (0.33) & (0.33) & (0.02) \\
Constant & -7919.342\sym{***}& -8113.159\sym{***}& 193.817 \\
& (1669.99) & (1665.98) & (120.11) \\
\hline
R-squared & 0.10 & 0.10 & 0.00 \\
Observations & 1813 & 1813 & 1813 \\
F-stat & 69.772 & 70.253 & 0.251 \\
Mean Dependent Var & 5983.27 & 5781.34 & 201.93 \\
\hline\hline
\multicolumn{4}{l}{\footnotesize Robust standard errors in parentheses}\\
\end{tabular}
}
{
\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}
\begin{tabular}{l*{3}{c}}
\hline\hline
&\multicolumn{1}{c}{(1)} &\multicolumn{1}{c}{(2)} &\multicolumn{1}{c}{(3)} \\
& Total Power &Total Power Transmitted &Total Power Generated \\
& b/se & b/se & b/se \\
\hline
Treated parishes & 45836.996\sym{***}& 45996.880\sym{***}& -159.884 \\
& (4192.85) & (4181.79) & (315.24) \\
Parish area in square kilometers& 0.000\sym{***}& 0.000\sym{***}& 0.000 \\
& (0.00) & (0.00) & (0.00) \\
Parish population in 1900& 1.940\sym{***}& 1.921\sym{***}& 0.018 \\
& (0.34) & (0.34) & (0.03) \\
Latitude & -2745.209\sym{*} & -2599.468\sym{*} & -145.741 \\
& (1214.19) & (1210.99) & (91.29) \\
Longitude & 1306.473 & 1280.837 & 25.636 \\
& (962.17) & (959.63) & (72.34) \\
Constant & 132245.430\sym{*} & 123900.541\sym{*} & 8344.889 \\
& (63061.21) & (62894.91) & (4741.20) \\
\hline
R-squared & 0.10 & 0.10 & 0.00 \\
Observations & 1684 & 1684 & 1684 \\
F-stat & 38.909 & 39.184 & 0.853 \\
Mean Dependent Var & 6477.95 & 6209.20 & 268.74 \\
\hline\hline
\multicolumn{4}{l}{\footnotesize Robust standard errors in parentheses}\\
\end{tabular}
}
54 changes: 29 additions & 25 deletions results/first-stage/022-first_stage_power_log.tex
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
{
\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}
\begin{tabular}{l*{3}{c}}
\hline\hline
&\multicolumn{1}{c}{(1)} &\multicolumn{1}{c}{(2)} &\multicolumn{1}{c}{(3)} \\
& Total Power &Total Power Transmitted &Total Power Generated \\
& b/se & b/se & b/se \\
\hline
Treated parishes & 2.425\sym{***}& 2.633\sym{***}& 0.989 \\
& (0.28) & (0.29) & (0.70) \\
Parish area in square kilometers& 0.000\sym{*} & 0.000\sym{**} & -0.000 \\
& (0.00) & (0.00) & (0.00) \\
Parish population in 1900& 0.000\sym{***}& 0.000\sym{***}& 0.000 \\
& (0.00) & (0.00) & (0.00) \\
Constant & 4.426\sym{***}& 4.228\sym{***}& 5.541\sym{***}\\
& (0.13) & (0.14) & (0.34) \\
\hline
R-squared & 0.25 & 0.27 & 0.07 \\
Observations & 531 & 496 & 96 \\
F-stat & 58.804 & 59.820 & 2.272 \\
Mean Dependent Var & 5.57 & 5.40 & 5.65 \\
\hline\hline
\multicolumn{4}{l}{\footnotesize Robust standard errors in parentheses}\\
\end{tabular}
}
{
\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}
\begin{tabular}{l*{3}{c}}
\hline\hline
&\multicolumn{1}{c}{(1)} &\multicolumn{1}{c}{(2)} &\multicolumn{1}{c}{(3)} \\
&log(Total Power) &log(Total Power Transmitted) &log(Total Power Generated) \\
& b/se & b/se & b/se \\
\hline
Treated parishes & 0.677\sym{***}& 0.699\sym{***}& -0.069 \\
& (0.20) & (0.18) & (0.11) \\
Parish area in square kilometers& 0.000\sym{***}& 0.000\sym{***}& -0.000 \\
& (0.00) & (0.00) & (0.00) \\
Parish population in 1900& 0.000\sym{***}& 0.000\sym{***}& 0.000\sym{***}\\
& (0.00) & (0.00) & (0.00) \\
Latitude & 0.197\sym{***}& 0.232\sym{***}& 0.037 \\
& (0.06) & (0.05) & (0.03) \\
Longitude & -0.070 & -0.087\sym{*} & 0.013 \\
& (0.05) & (0.04) & (0.02) \\
Constant & -9.677\sym{**} & -11.655\sym{***}& -2.124 \\
& (2.95) & (2.76) & (1.62) \\
\hline
R-squared & 0.17 & 0.19 & 0.03 \\
Observations & 1684 & 1684 & 1684 \\
F-stat & 68.552 & 78.333 & 10.151 \\
Mean Dependent Var & 1.62 & 1.45 & 0.35 \\
\hline\hline
\multicolumn{4}{l}{\footnotesize Robust standard errors in parentheses}\\
\end{tabular}
}
Loading

0 comments on commit 3ee43ae

Please sign in to comment.