ENV["COLUMNS"] = 1000; # print more columns of tables
using Random
Random.seed!(42)
"Andi Kerstin Chris Caro Jana" |> split |> shuffle |> x -> join(x," → ")
Realisierte_Erzeugung_201501010000_202305301300_Stunde.csv
as a table into julia :heavy_check_mark:Time
containing the combined info from Datum
and Anfang
using Dates
using CSV
using DataFrames
using DataFramesMeta
using Chain
using Statistics
using CairoMakie
df = CSV.read("../data_dojo_22/Realisierte_Erzeugung_201501010000_202305301300_Stunde.csv",
DataFrame,
delim=";",
decimal=',',
groupmark='.',
missingstring="-",
dateformat=Dict("Datum"=>"dd.mm.yyyy"),
types=Dict("Datum"=>Date)
);
clean_colname(x) = replace(split(x, " [")[1], " " => "_")
clean_names = clean_colname.(names(df));
rename!(df, clean_names)
Row | Datum | Anfang | Ende | Biomasse | Wasserkraft | Wind_Offshore | Wind_Onshore | Photovoltaik | Sonstige_Erneuerbare | Kernenergie | Braunkohle | Steinkohle | Erdgas | Pumpspeicher | Sonstige_Konventionelle |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | Time | Time | Float64? | Float64? | Float64 | Float64 | Float64 | Float64? | Float64 | Float64? | Float64? | Float64? | Float64? | Float64? | |
1 | 2015-01-01 | 00:00:00 | 01:00:00 | 4024.25 | 1158.25 | 516.5 | 8128.0 | 0.0 | 133.0 | 10710.5 | 15687.2 | 3219.75 | 1226.25 | 1525.75 | 4909.25 |
2 | 2015-01-01 | 01:00:00 | 02:00:00 | 3982.75 | 1188.0 | 516.25 | 8297.5 | 0.0 | 122.5 | 11086.2 | 15321.8 | 2351.25 | 870.75 | 1079.25 | 4932.75 |
3 | 2015-01-01 | 02:00:00 | 03:00:00 | 4019.5 | 1139.25 | 514.0 | 8540.0 | 0.0 | 93.0 | 11026.2 | 14817.5 | 2227.0 | 809.5 | 787.0 | 5041.75 |
4 | 2015-01-01 | 03:00:00 | 04:00:00 | 4040.75 | 1122.5 | 517.75 | 8552.0 | 0.0 | 86.5 | 11027.8 | 14075.0 | 2339.75 | 821.0 | 287.75 | 5084.0 |
5 | 2015-01-01 | 04:00:00 | 05:00:00 | 4037.75 | 1112.0 | 519.75 | 8643.5 | 0.0 | 86.5 | 10962.2 | 14115.0 | 2461.5 | 831.25 | 346.75 | 5070.75 |
6 | 2015-01-01 | 05:00:00 | 06:00:00 | 4028.25 | 1107.75 | 520.0 | 8711.75 | 0.0 | 86.75 | 10696.0 | 13474.2 | 2217.75 | 851.0 | 765.5 | 5096.75 |
7 | 2015-01-01 | 06:00:00 | 07:00:00 | 4013.25 | 1111.75 | 521.5 | 9167.25 | 0.0 | 87.0 | 10299.5 | 12403.8 | 2373.25 | 868.25 | 414.5 | 5153.0 |
8 | 2015-01-01 | 07:00:00 | 08:00:00 | 4012.75 | 1113.75 | 520.25 | 9811.0 | 0.0 | 87.0 | 10035.2 | 12062.5 | 2491.0 | 876.0 | 582.5 | 5161.0 |
9 | 2015-01-01 | 08:00:00 | 09:00:00 | 3999.75 | 1107.5 | 525.25 | 9683.0 | 53.0 | 87.0 | 10245.8 | 12405.0 | 2530.25 | 888.25 | 750.5 | 5393.5 |
10 | 2015-01-01 | 09:00:00 | 10:00:00 | 4016.25 | 1121.0 | 527.0 | 9501.75 | 773.25 | 85.75 | 10060.2 | 12798.8 | 2386.25 | 891.5 | 387.0 | 5884.0 |
11 | 2015-01-01 | 10:00:00 | 11:00:00 | 4007.75 | 1122.0 | 525.0 | 10025.0 | 2116.75 | 82.0 | 10082.2 | 12728.5 | 2557.0 | 888.25 | 176.25 | 6064.0 |
12 | 2015-01-01 | 11:00:00 | 12:00:00 | 4011.75 | 1146.25 | 527.25 | 10862.5 | 3364.25 | 82.0 | 9949.0 | 12451.0 | 2657.75 | 876.5 | 884.5 | 5914.75 |
13 | 2015-01-01 | 12:00:00 | 13:00:00 | 4014.0 | 1139.75 | 527.75 | 11575.2 | 4198.25 | 82.0 | 10119.0 | 12464.5 | 2937.75 | 874.25 | 705.0 | 5778.25 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
73705 | 2023-05-30 | 01:00:00 | 02:00:00 | 4581.5 | 2033.75 | 3891.75 | 11488.5 | 0.0 | 119.5 | 0.0 | 3523.5 | 1288.5 | 3984.75 | 769.25 | 1146.0 |
73706 | 2023-05-30 | 02:00:00 | 03:00:00 | 4551.0 | 1963.5 | 3725.5 | 10141.0 | 0.0 | 119.0 | 0.0 | 3484.25 | 1263.75 | 3592.0 | 604.25 | 1141.75 |
73707 | 2023-05-30 | 03:00:00 | 04:00:00 | 4503.25 | 2090.5 | 3635.25 | 9170.25 | 0.0 | 119.0 | 0.0 | 3490.0 | 1272.5 | 4030.5 | 578.75 | 1132.5 |
73708 | 2023-05-30 | 04:00:00 | 05:00:00 | 4454.25 | 2105.25 | 3635.75 | 8735.75 | 2.25 | 120.0 | 0.0 | 3678.25 | 1275.0 | 4895.5 | 433.25 | 1136.0 |
73709 | 2023-05-30 | 05:00:00 | 06:00:00 | 4406.75 | 2134.75 | 2940.0 | 8419.75 | 407.75 | 120.25 | 0.0 | 4087.5 | 1289.25 | 5723.75 | 404.25 | 1142.25 |
73710 | 2023-05-30 | 06:00:00 | 07:00:00 | 4413.25 | 2227.5 | 2272.5 | 7531.0 | 2888.75 | 128.25 | 0.0 | 5010.75 | 1362.0 | 6525.25 | 2803.75 | 1144.5 |
73711 | 2023-05-30 | 07:00:00 | 08:00:00 | 4468.0 | 2276.5 | 1916.25 | 5507.25 | 8343.25 | 137.0 | 0.0 | 5275.5 | 1173.75 | 6905.75 | 5712.75 | 1156.75 |
73712 | 2023-05-30 | 08:00:00 | 09:00:00 | 4503.5 | 2152.5 | 1643.25 | 3647.75 | 15893.2 | 138.0 | 0.0 | 5181.75 | 1196.25 | 6946.5 | 5079.5 | 1132.0 |
73713 | 2023-05-30 | 09:00:00 | 10:00:00 | 4544.5 | 2156.25 | 1285.0 | 2900.5 | 23612.5 | 135.0 | 0.0 | 4707.5 | 1114.75 | 6384.5 | 2122.75 | 1115.0 |
73714 | 2023-05-30 | 10:00:00 | 11:00:00 | 4568.25 | 2085.25 | 1241.25 | 2729.5 | 30341.8 | 121.25 | 0.0 | 3978.5 | 1095.5 | 4802.5 | 1279.0 | 1096.25 |
73715 | 2023-05-30 | 11:00:00 | 12:00:00 | 4618.75 | 2068.25 | 1053.25 | 3115.0 | 35196.2 | 119.0 | 0.0 | 3759.75 | 1151.5 | 3762.25 | 774.0 | 1096.5 |
73716 | 2023-05-30 | 12:00:00 | 13:00:00 | 4612.25 | 1967.25 | 781.25 | 3496.25 | 37030.0 | 119.0 | 0.0 | 3765.0 | 1296.75 | 3240.0 | 196.75 | 1099.75 |
Time
columndatetime_vec = Dates.DateTime.(df.Datum, df.Anfang);
datetime_data = insertcols!(df,1, :Datetime => datetime_vec );
datetime_clean = select(datetime_data, Not([:Datum, :Anfang, :Ende]));
train = filter(:Datetime => <(Dates.Date(2021)), datetime_clean);
val = filter(:Datetime => d -> Dates.year(d)==2021, datetime_clean);
test = filter(:Datetime => >(Dates.Date(2021)), datetime_clean);
size(train), size(val), size(test)
((52608, 13), (8760, 13), (21107, 13))
data_y = @chain train begin
@transform(:Year = Dates.year.(:Datetime))
groupby(:Year)
@combine(:Biomasse = sum(:Biomasse), :Wasserkraft = sum(:Wasserkraft), :Wind_Offshore = sum(:Wind_Offshore), :Wind_Onshore = sum(:Wind_Onshore), :Photovoltaik = sum(:Photovoltaik), :Sonstige_Erneuerbare = sum(:Sonstige_Erneuerbare), :Kernenergie = sum(:Kernenergie), :Braunkohle = sum(:Braunkohle) ,:Steinkohle = sum(:Steinkohle), :Erdgas = sum(:Erdgas), :Pumpspeicher = sum(:Pumpspeicher), :Sonstige_Konventionelle = sum(:Sonstige_Konventionelle))
end
Row | Year | Biomasse | Wasserkraft | Wind_Offshore | Wind_Onshore | Photovoltaik | Sonstige_Erneuerbare | Kernenergie | Braunkohle | Steinkohle | Erdgas | Pumpspeicher | Sonstige_Konventionelle |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | |
1 | 2015 | 3.48479e7 | 1.37193e7 | 8.13438e6 | 6.90421e7 | 3.48794e7 | 9.4563e5 | 8.43635e7 | 1.33798e8 | 8.11387e7 | 1.52259e7 | 9.93722e6 | 5.29686e7 |
2 | 2016 | 3.97455e7 | 1.80759e7 | 1.20931e7 | 6.52747e7 | 3.45389e7 | 1.40346e6 | 8.02425e7 | 1.30376e8 | 8.089e7 | 2.28888e7 | 8.58761e6 | 4.73436e7 |
3 | 2017 | 4.03284e7 | 1.55901e7 | 1.74142e7 | 8.51901e7 | 3.58832e7 | 1.75439e6 | 7.2214e7 | 1.29287e8 | 6.60055e7 | 2.55804e7 | 9.29078e6 | 4.87431e7 |
4 | 2018 | 4.00905e7 | 1.52577e7 | 1.90684e7 | 8.92706e7 | 4.12343e7 | 1.53113e6 | 7.18417e7 | 1.28362e8 | 7.15453e7 | 4.28782e7 | 9.13598e6 | 1.2838e7 |
5 | 2019 | 4.03876e7 | 1.58305e7 | 2.43828e7 | 9.98789e7 | 4.19148e7 | 1.46438e6 | 7.10422e7 | 1.02729e8 | 4.78153e7 | 5.46197e7 | 9.02058e6 | 1.27306e7 |
6 | 2020 | 4.08223e7 | 1.55834e7 | 2.68832e7 | 1.03104e8 | 4.58216e7 | 1.60774e6 | 6.09239e7 | 8.3374e7 | 3.4872e7 | 5.9126e7 | 1.15287e7 | 1.29299e7 |
fig1 =lines(data_y.Year, data_y.Biomasse, label = "Biomasse")
for i in names(data_y)[3:end]
lines!(fig1.axis, data_y.Year, data_y[!,i], label =i)
end
axislegend(fig1.axis, position = :lb)
fig1
Bonus:
See the results in the R notebook
Bonus:
photo_test = test.Photovoltaik
21107-element Vector{Float64}: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 194.0 1363.75 2986.5 4237.5 4826.75 4234.25 ⋮ 0.0 0.0 0.0 2.25 407.75 2888.75 8343.25 15893.25 23612.5 30341.75 35196.25 37030.0
first(train)
Row | Datetime | Biomasse | Wasserkraft | Wind_Offshore | Wind_Onshore | Photovoltaik | Sonstige_Erneuerbare | Kernenergie | Braunkohle | Steinkohle | Erdgas | Pumpspeicher | Sonstige_Konventionelle |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
DateTime | Float64? | Float64? | Float64 | Float64 | Float64 | Float64? | Float64 | Float64? | Float64? | Float64? | Float64? | Float64? | |
1 | 2015-01-01T00:00:00 | 4024.25 | 1158.25 | 516.5 | 8128.0 | 0.0 | 133.0 | 10710.5 | 15687.2 | 3219.75 | 1226.25 | 1525.75 | 4909.25 |
data = select(train, :Datetime, :Photovoltaik);
md=monthday.(data.Datetime)
h=hour.(data.Datetime)
insertcols!(data, 3, :monthday => md)
insertcols!(data, 4, :hour => h)
Row | Datetime | Photovoltaik | monthday | hour |
---|---|---|---|---|
DateTime | Float64 | Tuple… | Int64 | |
1 | 2015-01-01T00:00:00 | 0.0 | (1, 1) | 0 |
2 | 2015-01-01T01:00:00 | 0.0 | (1, 1) | 1 |
3 | 2015-01-01T02:00:00 | 0.0 | (1, 1) | 2 |
4 | 2015-01-01T03:00:00 | 0.0 | (1, 1) | 3 |
5 | 2015-01-01T04:00:00 | 0.0 | (1, 1) | 4 |
6 | 2015-01-01T05:00:00 | 0.0 | (1, 1) | 5 |
7 | 2015-01-01T06:00:00 | 0.0 | (1, 1) | 6 |
8 | 2015-01-01T07:00:00 | 0.0 | (1, 1) | 7 |
9 | 2015-01-01T08:00:00 | 53.0 | (1, 1) | 8 |
10 | 2015-01-01T09:00:00 | 773.25 | (1, 1) | 9 |
11 | 2015-01-01T10:00:00 | 2116.75 | (1, 1) | 10 |
12 | 2015-01-01T11:00:00 | 3364.25 | (1, 1) | 11 |
13 | 2015-01-01T12:00:00 | 4198.25 | (1, 1) | 12 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
52597 | 2020-12-31T12:00:00 | 8398.0 | (12, 31) | 12 |
52598 | 2020-12-31T13:00:00 | 7127.5 | (12, 31) | 13 |
52599 | 2020-12-31T14:00:00 | 4330.0 | (12, 31) | 14 |
52600 | 2020-12-31T15:00:00 | 1372.5 | (12, 31) | 15 |
52601 | 2020-12-31T16:00:00 | 52.25 | (12, 31) | 16 |
52602 | 2020-12-31T17:00:00 | 0.0 | (12, 31) | 17 |
52603 | 2020-12-31T18:00:00 | 0.0 | (12, 31) | 18 |
52604 | 2020-12-31T19:00:00 | 0.0 | (12, 31) | 19 |
52605 | 2020-12-31T20:00:00 | 0.0 | (12, 31) | 20 |
52606 | 2020-12-31T21:00:00 | 0.0 | (12, 31) | 21 |
52607 | 2020-12-31T22:00:00 | 0.0 | (12, 31) | 22 |
52608 | 2020-12-31T23:00:00 | 0.0 | (12, 31) | 23 |
nicename = groupby(data, [:monthday,:hour])
GroupedDataFrame with 8784 groups based on keys: monthday, hour
Row | Datetime | Photovoltaik | monthday | hour |
---|---|---|---|---|
DateTime | Float64 | Tuple… | Int64 | |
1 | 2015-01-01T00:00:00 | 0.0 | (1, 1) | 0 |
2 | 2016-01-01T00:00:00 | 0.0 | (1, 1) | 0 |
3 | 2017-01-01T00:00:00 | 0.0 | (1, 1) | 0 |
4 | 2018-01-01T00:00:00 | 0.0 | (1, 1) | 0 |
5 | 2019-01-01T00:00:00 | 0.0 | (1, 1) | 0 |
6 | 2020-01-01T00:00:00 | 0.0 | (1, 1) | 0 |
⋮
Row | Datetime | Photovoltaik | monthday | hour |
---|---|---|---|---|
DateTime | Float64 | Tuple… | Int64 | |
1 | 2016-03-29T02:00:00 | 0.0 | (3, 29) | 2 |
2 | 2017-03-29T02:00:00 | 0.0 | (3, 29) | 2 |
3 | 2018-03-29T02:00:00 | 0.0 | (3, 29) | 2 |
4 | 2019-03-29T02:00:00 | 0.0 | (3, 29) | 2 |
mean_nicename = @combine(nicename, :photo = mean(:Photovoltaik));
mean_nicename = filter(df-> df.monthday != (2,29), mean_nicename)
Row | monthday | hour | photo |
---|---|---|---|
Tuple… | Int64 | Float64 | |
1 | (1, 1) | 0 | 0.0 |
2 | (1, 1) | 1 | 0.0 |
3 | (1, 1) | 2 | 0.0 |
4 | (1, 1) | 3 | 0.0 |
5 | (1, 1) | 4 | 0.0 |
6 | (1, 1) | 5 | 0.0 |
7 | (1, 1) | 6 | 0.0 |
8 | (1, 1) | 7 | 0.125 |
9 | (1, 1) | 8 | 182.792 |
10 | (1, 1) | 9 | 1512.79 |
11 | (1, 1) | 10 | 3582.58 |
12 | (1, 1) | 11 | 5287.5 |
13 | (1, 1) | 12 | 5991.92 |
⋮ | ⋮ | ⋮ | ⋮ |
8749 | (12, 31) | 13 | 4837.54 |
8750 | (12, 31) | 14 | 3085.29 |
8751 | (12, 31) | 15 | 1080.5 |
8752 | (12, 31) | 16 | 54.7083 |
8753 | (12, 31) | 17 | 0.0 |
8754 | (12, 31) | 18 | 0.0 |
8755 | (12, 31) | 19 | 0.0 |
8756 | (12, 31) | 20 | 0.0 |
8757 | (12, 31) | 21 | 0.0 |
8758 | (12, 31) | 22 | 0.0 |
8759 | (12, 31) | 23 | 0.0 |
8760 | (3, 29) | 2 | 0.0 |
first(mean_nicename)
Row | monthday | hour | photo |
---|---|---|---|
Tuple… | Int64 | Float64 | |
1 | (1, 1) | 0 | 0.0 |
first(val)
Row | Datetime | Biomasse | Wasserkraft | Wind_Offshore | Wind_Onshore | Photovoltaik | Sonstige_Erneuerbare | Kernenergie | Braunkohle | Steinkohle | Erdgas | Pumpspeicher | Sonstige_Konventionelle |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
DateTime | Float64? | Float64? | Float64 | Float64 | Float64 | Float64? | Float64 | Float64? | Float64? | Float64? | Float64? | Float64? | |
1 | 2021-01-01T00:00:00 | 4597.25 | 1205.0 | 383.0 | 3934.5 | 0.0 | 227.0 | 8144.75 | 11608.5 | 3443.75 | 5649.25 | 347.25 | 1638.25 |
fig = Figure()
ax = Axis(fig[1,1])
scatter!(ax,val.Photovoltaik, color = "red", label = "val", markersize = 4)
scatter!(ax, mean_nicename.photo, color = "blue", label = "predict",markersize = 4)
axislegend(ax)
fig
using Metrics
mse_own(y_pred, y_true) = sum((y_true .- y_pred).^2) / length(y_true)
mse_own (generic function with 1 method)
mse_own(mean_nicename.photo,val.Photovoltaik )
1.1335880219992626e7
mse(mean_nicename.photo,val.Photovoltaik )
1.1335880219992626e7
mae(mean_nicename.photo,val.Photovoltaik)
1633.652064307458
QC: do values in prediction and validation set match?
plot(mean_nicename.hour .== hour.(val.Datetime))
val_data = select(val, :Datetime, :Photovoltaik);
insertcols!(val_data, 3, :monthday => monthday.(val.Datetime))
insertcols!(val_data, 4, :hour => hour.(val.Datetime))
insertcols!(val_data, 5, :predmd => mean_nicename.monthday)
insertcols!(val_data, 5, :predh => mean_nicename.hour)
val_data
Row | Datetime | Photovoltaik | monthday | hour | predh | predmd |
---|---|---|---|---|---|---|
DateTime | Float64 | Tuple… | Int64 | Int64 | Tuple… | |
1 | 2021-01-01T00:00:00 | 0.0 | (1, 1) | 0 | 0 | (1, 1) |
2 | 2021-01-01T01:00:00 | 0.0 | (1, 1) | 1 | 1 | (1, 1) |
3 | 2021-01-01T02:00:00 | 0.0 | (1, 1) | 2 | 2 | (1, 1) |
4 | 2021-01-01T03:00:00 | 0.0 | (1, 1) | 3 | 3 | (1, 1) |
5 | 2021-01-01T04:00:00 | 0.0 | (1, 1) | 4 | 4 | (1, 1) |
6 | 2021-01-01T05:00:00 | 0.0 | (1, 1) | 5 | 5 | (1, 1) |
7 | 2021-01-01T06:00:00 | 0.0 | (1, 1) | 6 | 6 | (1, 1) |
8 | 2021-01-01T07:00:00 | 0.0 | (1, 1) | 7 | 7 | (1, 1) |
9 | 2021-01-01T08:00:00 | 194.0 | (1, 1) | 8 | 8 | (1, 1) |
10 | 2021-01-01T09:00:00 | 1363.75 | (1, 1) | 9 | 9 | (1, 1) |
11 | 2021-01-01T10:00:00 | 2986.5 | (1, 1) | 10 | 10 | (1, 1) |
12 | 2021-01-01T11:00:00 | 4237.5 | (1, 1) | 11 | 11 | (1, 1) |
13 | 2021-01-01T12:00:00 | 4826.75 | (1, 1) | 12 | 12 | (1, 1) |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
8749 | 2021-12-31T12:00:00 | 10006.8 | (12, 31) | 12 | 13 | (12, 31) |
8750 | 2021-12-31T13:00:00 | 7878.75 | (12, 31) | 13 | 14 | (12, 31) |
8751 | 2021-12-31T14:00:00 | 4845.0 | (12, 31) | 14 | 15 | (12, 31) |
8752 | 2021-12-31T15:00:00 | 1990.75 | (12, 31) | 15 | 16 | (12, 31) |
8753 | 2021-12-31T16:00:00 | 154.25 | (12, 31) | 16 | 17 | (12, 31) |
8754 | 2021-12-31T17:00:00 | 0.0 | (12, 31) | 17 | 18 | (12, 31) |
8755 | 2021-12-31T18:00:00 | 0.0 | (12, 31) | 18 | 19 | (12, 31) |
8756 | 2021-12-31T19:00:00 | 0.0 | (12, 31) | 19 | 20 | (12, 31) |
8757 | 2021-12-31T20:00:00 | 0.0 | (12, 31) | 20 | 21 | (12, 31) |
8758 | 2021-12-31T21:00:00 | 0.0 | (12, 31) | 21 | 22 | (12, 31) |
8759 | 2021-12-31T22:00:00 | 0.0 | (12, 31) | 22 | 23 | (12, 31) |
8760 | 2021-12-31T23:00:00 | 0.0 | (12, 31) | 23 | 2 | (3, 29) |