[`scikit-learn` documentation on ensemble models]([https://scikit\-learn.org/stable/modules/neural\_networks\_supervised.html\#](https://scikit-learn.org/stable/modules/neural_networks_supervised.html#) )
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
np.random.seed(42)
names = ["Robin", "Max", "Sonia", "Konstantin","Andi", "Kerstin","Sabine","Caro","Jana","Sascha","Dana","Chaitanya" ]
np.random.shuffle(names)
" => ".join(names)
'Dana => Sascha => Robin => Jana => Kerstin => Sonia => Max => Chaitanya => Andi => Caro => Konstantin => Sabine'
80/12
6.666666666666667
data = pd.read_csv("https://github.com/ddojo/ddojo.github.io/raw/main/sessions/14_trees/train.tsv", sep="\t")
test = pd.read_csv("https://github.com/ddojo/ddojo.github.io/raw/main/sessions/14_trees/test.tsv", sep="\t")
X = data.drop("species",axis=1)
y = data.species
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42)
X_test = test.drop("tree_id",axis=1)
tree_id = test.tree_id
pred = pd.DataFrame()
pred["tree_id"] = tree_id
pred["species"] = "unknown"
X_complete = data.dropna().drop("species",axis=1)
y_complete = data.dropna().species
X_train_complete, X_val_complete, y_train_complete, y_val_complete = train_test_split(X_complete, y_complete, random_state=42)
X_test_complete = test.dropna().drop("tree_id",axis=1)
tree_id_complete = test.dropna().tree_id
pred_complete = pd.DataFrame()
pred_complete["tree_id"] = tree_id_complete
pred_complete["species"] = "unknown"
X_test_complete
latitude | longitude | stem_diameter_cm | height_m | crown_radius_m | |
---|---|---|---|---|---|
0 | 46.01 | 11.44 | 5.0 | 14.2 | 3.10 |
1 | 46.01 | 11.44 | 5.0 | 5.4 | 1.80 |
2 | 46.03 | 11.43 | 5.0 | 4.9 | 1.75 |
3 | 46.05 | 10.99 | 5.0 | 6.5 | 1.55 |
4 | 46.05 | 10.99 | 5.0 | 4.9 | 1.90 |
... | ... | ... | ... | ... | ... |
8892 | 46.24 | 7.26 | 21.0 | 2.6 | 3.00 |
8893 | 47.39 | 7.36 | 32.0 | 21.6 | 4.00 |
8894 | 46.09 | 8.99 | 36.0 | 3.0 | 3.50 |
8895 | 46.74 | 6.96 | 37.0 | 32.3 | 4.50 |
8896 | 47.32 | 7.26 | 38.0 | 28.2 | 4.00 |
7963 rows × 5 columns
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='adam', alpha=1e-5,
hidden_layer_sizes=(5, 2), random_state=1, max_iter = 15)
clf.fit(X_train_complete, y_train_complete)
MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2),random_state=1,
solver='lbfgs')
/usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (15) reached and the optimization hasn't converged yet. warnings.warn(
MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), random_state=1, solver='lbfgs')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), random_state=1, solver='lbfgs')
predict = clf.predict(X_test_complete)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Don't cheat - fit only on training data
scaler.fit(X_train_complete)
X_train_scale = scaler.transform(X_train_complete)
# apply same transformation to test data
X_val_scale = scaler.transform(X_val_complete)
X_test_scale = scaler.transform(X_test_complete)
X_train_scale
array([[ 0.3111399 , 0.6479698 , 0.31078895, -0.36213006, 0.86597811], [ 0.17541879, -0.17084478, -0.88682956, -0.68421516, -1.19200251], [-0.35326228, -0.4264638 , 0.67370365, -0.28160878, 0.86597811], ..., [ 1.92306482, 2.77123044, -1.08038406, -0.168879 , -0.19731188], [-1.35507346, -0.02167165, -0.29406888, -0.68421516, -0.33451058], [-0.40723509, -0.41693578, 0.52248919, -0.44265133, 1.0374765 ]])
clf = MLPClassifier(solver='adam', alpha=1e-5,learning_rate_init=1e-5, tol=1e-5, hidden_layer_sizes= (10, 10), random_state=1, max_iter = 1500)
clf.fit(X_train_scale, y_train_complete)
/usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (1500) reached and the optimization hasn't converged yet. warnings.warn(
MLPClassifier(alpha=1e-05, hidden_layer_sizes=(10, 10), learning_rate_init=1e-05, max_iter=1500, random_state=1, tol=1e-05)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MLPClassifier(alpha=1e-05, hidden_layer_sizes=(10, 10), learning_rate_init=1e-05, max_iter=1500, random_state=1, tol=1e-05)
clf.score(X_val_scale, y_val_complete)
0.9021461420541645
result = []
for x_layers in range(6,7):
for y_layers in range(2,4):
clf_1 = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes= (x_layers, y_layers), random_state=1, max_iter = 1500)
clf_1.fit(X_train_scale, y_train_complete)
hans= clf_1.score(X_val_scale, y_val_complete)
result.append([x_layers, y_layers,hans])
/usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:693: UserWarning: Training interrupted by user. warnings.warn("Training interrupted by user.") /usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:693: UserWarning: Training interrupted by user. warnings.warn("Training interrupted by user.") /usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:693: UserWarning: Training interrupted by user. warnings.warn("Training interrupted by user.") /usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:693: UserWarning: Training interrupted by user. warnings.warn("Training interrupted by user.") /usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:693: UserWarning: Training interrupted by user. warnings.warn("Training interrupted by user.") /usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:693: UserWarning: Training interrupted by user. warnings.warn("Training interrupted by user.") /usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:693: UserWarning: Training interrupted by user. warnings.warn("Training interrupted by user.") /usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:693: UserWarning: Training interrupted by user. warnings.warn("Training interrupted by user.")
result
pred["species"] = clf.predict(X_test_scale)
pred.to_csv("clf_prediction.tsv", sep="\t")
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /tmp/ipykernel_1872/2466453748.py in <cell line: 1>() ----> 1 pred["species"] = clf.predict(X_test_scale) 2 pred.to_csv("clf_prediction.tsv", sep="\t") /usr/local/lib/python3.8/dist-packages/pandas/core/frame.py in __setitem__(self, key, value) 3978 else: 3979 # set column -> 3980 self._set_item(key, value) 3981 3982 def _setitem_slice(self, key: slice, value): /usr/local/lib/python3.8/dist-packages/pandas/core/frame.py in _set_item(self, key, value) 4172 ensure homogeneity. 4173 """ -> 4174 value = self._sanitize_column(value) 4175 4176 if ( /usr/local/lib/python3.8/dist-packages/pandas/core/frame.py in _sanitize_column(self, value) 4913 4914 if is_list_like(value): -> 4915 com.require_length_match(value, self.index) 4916 return sanitize_array(value, self.index, copy=True, allow_2d=True) 4917 /usr/local/lib/python3.8/dist-packages/pandas/core/common.py in require_length_match(data, index) 569 """ 570 if len(data) != len(index): --> 571 raise ValueError( 572 "Length of values " 573 f"({len(data)}) " ValueError: Length of values (7963) does not match length of index (8897)
pred
tree_id | species | |
---|---|---|
0 | T_75102 | unknown |
1 | T_75103 | unknown |
2 | T_75118 | unknown |
3 | T_75152 | unknown |
4 | T_75161 | unknown |
... | ... | ... |
8892 | T_497140 | unknown |
8893 | T_497324 | unknown |
8894 | T_497361 | unknown |
8895 | T_497401 | unknown |
8896 | T_497446 | unknown |
8897 rows × 2 columns
or
pred_complete["species"] = clf.predict(X_test_scale)
pred_complete.to_csv("clf_prediction.tsv", sep="\t")