library(tidyverse)
── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ── ✔ ggplot2 3.3.6 ✔ purrr 0.3.4 ✔ tibble 3.1.7 ✔ dplyr 1.0.9 ✔ tidyr 1.2.0 ✔ stringr 1.4.0 ✔ readr 2.1.2 ✔ forcats 0.5.1 ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::filter() masks stats::filter() ✖ dplyr::lag() masks stats::lag()
data <- read_csv("Tallo.csv")
Rows: 498838 Columns: 13 ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── Delimiter: "," chr (7): tree_id, division, family, genus, species, height_outlier, crown_ra... dbl (6): latitude, longitude, stem_diameter_cm, height_m, crown_radius_m, re... ℹ Use `spec()` to retrieve the full column specification for this data. ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
c("Simon", "Niklas", sample(c("Mike", "Robin", "Max", "Markus")), "Andreas")
data %>% count(species) %>% arrange(-n)
species | n |
---|---|
<chr> | <int> |
NA | 61230 |
Quercus ilex | 22760 |
Pinus sylvestris | 18221 |
Pinus halepensis | 14003 |
Pinus pinaster | 11766 |
Quercus infectoria | 10964 |
Fagus sylvatica | 9470 |
Pinus nigra | 7365 |
Picea abies | 6723 |
Acer saccharum | 5744 |
Pseudotsuga menziesii | 5158 |
Quercus libani | 5119 |
Quercus faginea | 4246 |
Quercus pubescens | 4185 |
Quercus pyrenaica | 4129 |
Acer rubrum | 3545 |
Quercus suber | 3256 |
Pinus pinea | 3165 |
Pinus wallichiana | 2969 |
Abies balsamea | 2947 |
Picea mariana | 2647 |
Quercus brantii | 2619 |
Picea smithiana | 2576 |
Callitris columellaris | 2539 |
Eucalyptus globulus | 2469 |
Quercus robur | 2448 |
Carpinus betulus | 2298 |
Gymnanthes lucida | 2260 |
Litsea leefeana | 1896 |
Pinus radiata | 1878 |
⋮ | ⋮ |
Viburnum odoratissimum | 1 |
Vismia latifolia | 1 |
Vitex axillariflora | 1 |
Vitex ciliata | 1 |
Vitex doniana | 1 |
Vitex queenslandica | 1 |
Voacanga thouarsii | 1 |
Vochysia guatemalensis | 1 |
Wendlandia guangdongensis | 1 |
Wikstroemia chui | 1 |
Wikstroemia indica | 1 |
Wikstroemia nutans | 1 |
Wisteria sinensis | 1 |
Xanthophyllum amoenum | 1 |
Xanthophyllum ellipticum | 1 |
Xanthophyllum heterophyllum | 1 |
Xanthophyllum macrophyllum | 1 |
Xylia xylocarpa | 1 |
Xylopia amazonica | 1 |
Xylopia rubescens | 1 |
Xylopia staudtii | 1 |
Xylopia villosa | 1 |
Zanha africana | 1 |
Zanha golungensis | 1 |
Zanthoxylum acuminatum | 1 |
Zanthoxylum brachyacanthum | 1 |
Zanthoxylum ovalifolium | 1 |
Zanthoxylum riedelianum | 1 |
Ziziphus calophylla | 1 |
Ziziphus reticulata | 1 |
filtered_data <- data %>% filter(!is.na(species)) %>% add_count(species) %>% filter(n >= 5000) %>% filter(family != "Sapindaceae")
filtered_data %>% ggplot(aes(longitude, latitude, color=family)) + geom_point()
options(repr.plot.width = 12, repr.plot.res=150)
theme_set(theme_light())
filtered_data %>% ggplot(aes(longitude, latitude, color = species)) + geom_point()
filtered_data %>% ggplot(aes(stem_diameter_cm, height_m, color = species)) + geom_point()
Warning message: “Removed 826 rows containing missing values (geom_point).”
filtered_data %>% filter(species %in% c("Fagus sylvatica", "Quercus ilex", "Picea abies")) %>% ggplot(aes(stem_diameter_cm, height_m, color = species)) + geom_point()
Warning message: “Removed 567 rows containing missing values (geom_point).”
filtered_species <- filtered_data %>% filter(species %in% c("Fagus sylvatica", "Quercus ilex", "Pinus pinaster"))
filtered_species %>% ggplot(aes(longitude, latitude, color=species)) + geom_point()
filtered_species %>% ggplot(aes(stem_diameter_cm, height_m, color=species)) + geom_point()
Warning message: “Removed 534 rows containing missing values (geom_point).”
filtered_species %>% ggplot(aes(stem_diameter_cm, height_m, color=crown_radius_outlier)) + geom_point()
Warning message: “Removed 534 rows containing missing values (geom_point).”
filtered_species %>% ggplot(aes(crown_radius_m, height_m, color=species)) + geom_point()
Warning message: “Removed 4722 rows containing missing values (geom_point).”
filtered_species %>% ggplot(aes(stem_diameter_cm, height_m, color=crown_radius_outlier)) + geom_point()
Warning message: “Removed 534 rows containing missing values (geom_point).”
filtered_species %>% group_by(species) %>% summarize(mean(stem_diameter_cm), sd(stem_diameter_cm), var(stem_diameter_cm)) -> hans
filtered_species %>% ggplot(aes(stem_diameter_cm, color = species))+ geom_density()
filtered_species %>% group_by(species) %>% summarize(mean(height_m,na.rm = T), sd(height_m,na.rm = T), var(height_m,na.rm = T)) -> dieter
dieter
species | mean(height_m, na.rm = T) | sd(height_m, na.rm = T) | var(height_m, na.rm = T) |
---|---|---|---|
<chr> | <dbl> | <dbl> | <dbl> |
Fagus sylvatica | 19.675126 | 8.038170 | 64.612176 |
Pinus pinaster | 10.939198 | 4.668945 | 21.799052 |
Quercus ilex | 6.327681 | 2.469029 | 6.096105 |
filtered_species %>% ggplot(aes(height_m, color = species))+ geom_density()
Warning message: “Removed 534 rows containing non-finite values (stat_density).”
filtered_species %>% ggplot(aes(crown_radius_m, color = species))+ geom_density()
Warning message: “Removed 4188 rows containing non-finite values (stat_density).”
filtered_species %>%
pivot_longer(names_to = "bla", values_to = "blub", latitude:crown_radius_m) %>%
ggplot(aes(blub, color=species)) + geom_density() + facet_wrap(~bla, scale="free")
Warning message: “Removed 4722 rows containing non-finite values (stat_density).”
filtered_species %>% count(species)
species | n |
---|---|
<chr> | <int> |
Fagus sylvatica | 9470 |
Pinus pinaster | 11766 |
Quercus ilex | 22760 |