library(tidyverse)
── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ── ✔ ggplot2 3.3.6 ✔ purrr 0.3.4 ✔ tibble 3.1.7 ✔ dplyr 1.0.9 ✔ tidyr 1.2.0 ✔ stringr 1.4.0 ✔ readr 2.1.2 ✔ forcats 0.5.1 ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::filter() masks stats::filter() ✖ dplyr::lag() masks stats::lag()
data <- read_csv("Tallo.csv")
Rows: 498838 Columns: 13 ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── Delimiter: "," chr (7): tree_id, division, family, genus, species, height_outlier, crown_ra... dbl (6): latitude, longitude, stem_diameter_cm, height_m, crown_radius_m, re... ℹ Use `spec()` to retrieve the full column specification for this data. ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
c("Simon", "Niklas", sample(c("Mike", "Robin", "Max", "Markus")), "Andreas")
data %>% count(species) %>% arrange(-n)
| species | n |
|---|---|
| <chr> | <int> |
| NA | 61230 |
| Quercus ilex | 22760 |
| Pinus sylvestris | 18221 |
| Pinus halepensis | 14003 |
| Pinus pinaster | 11766 |
| Quercus infectoria | 10964 |
| Fagus sylvatica | 9470 |
| Pinus nigra | 7365 |
| Picea abies | 6723 |
| Acer saccharum | 5744 |
| Pseudotsuga menziesii | 5158 |
| Quercus libani | 5119 |
| Quercus faginea | 4246 |
| Quercus pubescens | 4185 |
| Quercus pyrenaica | 4129 |
| Acer rubrum | 3545 |
| Quercus suber | 3256 |
| Pinus pinea | 3165 |
| Pinus wallichiana | 2969 |
| Abies balsamea | 2947 |
| Picea mariana | 2647 |
| Quercus brantii | 2619 |
| Picea smithiana | 2576 |
| Callitris columellaris | 2539 |
| Eucalyptus globulus | 2469 |
| Quercus robur | 2448 |
| Carpinus betulus | 2298 |
| Gymnanthes lucida | 2260 |
| Litsea leefeana | 1896 |
| Pinus radiata | 1878 |
| ⋮ | ⋮ |
| Viburnum odoratissimum | 1 |
| Vismia latifolia | 1 |
| Vitex axillariflora | 1 |
| Vitex ciliata | 1 |
| Vitex doniana | 1 |
| Vitex queenslandica | 1 |
| Voacanga thouarsii | 1 |
| Vochysia guatemalensis | 1 |
| Wendlandia guangdongensis | 1 |
| Wikstroemia chui | 1 |
| Wikstroemia indica | 1 |
| Wikstroemia nutans | 1 |
| Wisteria sinensis | 1 |
| Xanthophyllum amoenum | 1 |
| Xanthophyllum ellipticum | 1 |
| Xanthophyllum heterophyllum | 1 |
| Xanthophyllum macrophyllum | 1 |
| Xylia xylocarpa | 1 |
| Xylopia amazonica | 1 |
| Xylopia rubescens | 1 |
| Xylopia staudtii | 1 |
| Xylopia villosa | 1 |
| Zanha africana | 1 |
| Zanha golungensis | 1 |
| Zanthoxylum acuminatum | 1 |
| Zanthoxylum brachyacanthum | 1 |
| Zanthoxylum ovalifolium | 1 |
| Zanthoxylum riedelianum | 1 |
| Ziziphus calophylla | 1 |
| Ziziphus reticulata | 1 |
filtered_data <- data %>% filter(!is.na(species)) %>% add_count(species) %>% filter(n >= 5000) %>% filter(family != "Sapindaceae")
filtered_data %>% ggplot(aes(longitude, latitude, color=family)) + geom_point()
options(repr.plot.width = 12, repr.plot.res=150)
theme_set(theme_light())
filtered_data %>% ggplot(aes(longitude, latitude, color = species)) + geom_point()
filtered_data %>% ggplot(aes(stem_diameter_cm, height_m, color = species)) + geom_point()
Warning message: “Removed 826 rows containing missing values (geom_point).”
filtered_data %>% filter(species %in% c("Fagus sylvatica", "Quercus ilex", "Picea abies")) %>% ggplot(aes(stem_diameter_cm, height_m, color = species)) + geom_point()
Warning message: “Removed 567 rows containing missing values (geom_point).”
filtered_species <- filtered_data %>% filter(species %in% c("Fagus sylvatica", "Quercus ilex", "Pinus pinaster"))
filtered_species %>% ggplot(aes(longitude, latitude, color=species)) + geom_point()
filtered_species %>% ggplot(aes(stem_diameter_cm, height_m, color=species)) + geom_point()
Warning message: “Removed 534 rows containing missing values (geom_point).”
filtered_species %>% ggplot(aes(stem_diameter_cm, height_m, color=crown_radius_outlier)) + geom_point()
Warning message: “Removed 534 rows containing missing values (geom_point).”
filtered_species %>% ggplot(aes(crown_radius_m, height_m, color=species)) + geom_point()
Warning message: “Removed 4722 rows containing missing values (geom_point).”
filtered_species %>% ggplot(aes(stem_diameter_cm, height_m, color=crown_radius_outlier)) + geom_point()
Warning message: “Removed 534 rows containing missing values (geom_point).”
filtered_species %>% group_by(species) %>% summarize(mean(stem_diameter_cm), sd(stem_diameter_cm), var(stem_diameter_cm)) -> hans
filtered_species %>% ggplot(aes(stem_diameter_cm, color = species))+ geom_density()
filtered_species %>% group_by(species) %>% summarize(mean(height_m,na.rm = T), sd(height_m,na.rm = T), var(height_m,na.rm = T)) -> dieter
dieter
| species | mean(height_m, na.rm = T) | sd(height_m, na.rm = T) | var(height_m, na.rm = T) |
|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> |
| Fagus sylvatica | 19.675126 | 8.038170 | 64.612176 |
| Pinus pinaster | 10.939198 | 4.668945 | 21.799052 |
| Quercus ilex | 6.327681 | 2.469029 | 6.096105 |
filtered_species %>% ggplot(aes(height_m, color = species))+ geom_density()
Warning message: “Removed 534 rows containing non-finite values (stat_density).”
filtered_species %>% ggplot(aes(crown_radius_m, color = species))+ geom_density()
Warning message: “Removed 4188 rows containing non-finite values (stat_density).”
filtered_species %>%
pivot_longer(names_to = "bla", values_to = "blub", latitude:crown_radius_m) %>%
ggplot(aes(blub, color=species)) + geom_density() + facet_wrap(~bla, scale="free")
Warning message: “Removed 4722 rows containing non-finite values (stat_density).”
filtered_species %>% count(species)
| species | n |
|---|---|
| <chr> | <int> |
| Fagus sylvatica | 9470 |
| Pinus pinaster | 11766 |
| Quercus ilex | 22760 |