https://github.com/wri/global-power-plant-database/
Thanks to Timo for the nice introduction and demonstration of Power BI. Power BI is freely available for personal use: https://www.microsoft.com/en-US/download/details.aspx?id=58494
library(tidyverse)
power_plants <- readr::read_csv('https://raw.githubusercontent.com/wri/global-power-plant-database/master/output_database/global_power_plant_database.csv')
str(power_plants)
power_plants %>% select(everything()) %>%
summarise_all(list(~sum(is.na(.)))) %>%
pivot_longer(country:estimated_generation_gwh)%>%filter(value<1000)
ggplot(data=power_plants) + geom_density(aes(x=capacity_mw)) + scale_x_log10()
power_plants %>% filter(capacity_mw==max(capacity_mw))
power_plants %>% count(country_long, sort=T) %>% head(10)
power_plants %>% group_by(country_long,) %>% summarize(capacity = sum(capacity_mw)) %>% arrange(-capacity) %>% head(20)
unique(power_plants$primary_fuel)
power_plants %>%
group_by(country_long,primary_fuel) %>%
summarize(capacity = sum(capacity_mw), number_powerplants=n()) %>% arrange(-capacity) %>% head
options(repr.plot.res = 250, repr.plot.width = 14)
power_plants %>%
mutate(
country_long = fct_lump(country_long, 9),
primary_fuel = fct_lump(primary_fuel, 8, w=capacity_mw)
) %>%
group_by(country_long,primary_fuel) %>%
summarize(capacity = sum(capacity_mw), number_powerplants=n()) %>% arrange(-capacity) %>%
ungroup() %>%
mutate(country_long = fct_reorder(country_long, capacity, .fun=sum)) %>%
ggplot(aes(y=country_long, x=capacity, fill=primary_fuel)) +
geom_col() +
scale_fill_viridis_d(option = "F") +
theme_minimal() +
labs(y="Country", x = "Capacity") #+ facet_wrap(~primary_fuel, scale="free_x")
power_plants %>%
mutate(
country_long = fct_lump(country_long, 9),
#primary_fuel = fct_lump(primary_fuel, 8, w=capacity_mw)
) %>%
group_by(country_long,primary_fuel) %>%
summarize(capacity = sum(capacity_mw), number_powerplants=n()) %>% arrange(-capacity) %>%
ungroup() %>%
mutate(primary_fuel = fct_reorder(primary_fuel, capacity, .fun=sum)) %>%
ggplot(aes(y=primary_fuel, x=capacity, fill=primary_fuel)) +
geom_col() +
scale_fill_viridis_d(option = "F", direction = -1) +
theme_minimal() +
labs(y="Country", x = "Capacity")
power_plants %>%
group_by(country_long,primary_fuel) %>%
summarize(number_powerplants=n()) %>%
pivot_wider(names_from=primary_fuel, values_from=number_powerplants, values_fill=0) %>%
identity %>% head