import rasterio as rio
import geopandas as gpd
from pathlib import Path
import rasterio.plot as rioplot
import matplotlib.pyplot as plt
Tabular data workflow
Point-based
= Path('workflow_examples/')
path_to_data = path_to_data/'s2_2018_lataseno.tif'
raster_data = path_to_data/'points_clc.geojson' point_data
Example data here are Sentinel 2 mosaic from 2018, with 9 bands, and scattered point observations from that area. Target class for the points is Corine Land Cover class for the corresponding location
= plt.subplots(1,2, dpi=100, figsize=(10,4))
fig, axs with rio.open(raster_data) as src:
3,2,1)), adjust=True, ax=axs[0])
rioplot.show((src, (= gpd.read_file(point_data)
train_gdf 'corine'] = train_gdf.corine.astype('category')
train_gdf[='corine', ax=axs[1], cmap='tab20', markersize=1, legend=True,
train_gdf.plot(column={'loc':'right', 'bbox_to_anchor':(1.2,0.5)})
legend_kwds'Example area')
plt.suptitle(
plt.tight_layout() plt.show()
Create the dataset
CLI
geo2ml_sample_points \
\
example_data/workflow_examples/points_clc.geojson \
example_data/workflow_examples/s2_2018_lataseno.tif \
corine \
example_data/workflow_examples/points/ --out_prefix example
Python
from geo2ml.scripts.data import sample_points
= path_to_data/'points'
outpath
'corine', outpath, out_prefix='example') sample_points(point_data, raster_data,
Training a random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pandas as pd
= pd.read_csv(outpath/'example__s2_2018_lataseno__points_clc__corine.csv')
df
= df['corine']
y = df.drop(columns='corine')
X
= train_test_split(X, y)
X_train, X_test, y_train, y_test
= RandomForestClassifier()
rf rf.fit(X_train, y_train)
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()
= rf.predict(X_test)
y_pred print(classification_report(y_test, y_pred, zero_division=0))
precision recall f1-score support
23 0.55 0.77 0.64 81
24 0.00 0.00 0.00 7
25 0.00 0.00 0.00 7
28 0.00 0.00 0.00 2
32 0.42 0.36 0.39 36
33 0.00 0.00 0.00 2
34 0.00 0.00 0.00 2
35 0.00 0.00 0.00 3
39 0.00 0.00 0.00 1
40 0.00 0.00 0.00 2
42 0.00 0.00 0.00 1
43 0.70 0.72 0.71 75
47 1.00 0.50 0.67 2
48 0.83 0.83 0.83 6
accuracy 0.59 227
macro avg 0.25 0.23 0.23 227
weighted avg 0.53 0.59 0.55 227
Polygon-based
Example data here is RGB UAV data from Evo, Hämeenlinna, and the target polygons are tree canopies. Target class is the species (Spruce, pine, birch, aspen) or standing deadwood, found on column label
. As the extracted features, we use min, max, mean, std and median of the red, green and blue channels within the canopies.
= path_to_data/'example_area.tif'
uav_data = path_to_data/'canopies.geojson'
canopy_data
= plt.subplots(1,2, dpi=100, figsize=(5,7))
fig, axs with rio.open(uav_data) as src:
1,2,3)), adjust=True, ax=axs[0])
rioplot.show((src, (= gpd.read_file(canopy_data)
train_gdf ='label', ax=axs[1], cmap='tab20')
train_gdf.plot(column'Example area')
plt.suptitle(
plt.tight_layout() plt.show()
CLI
geo2ml_sample_polygons \
\
example_data/workflow_examples/canopies.geojson \
example_data/workflow_examples/example_area.tif \
label \
example_data/workflow_examples/polygons/ \
--out_prefix example --max --mean --std --median --min
Python
from geo2ml.scripts.data import sample_polygons
= path_to_data/'polygons'
outpath
'label', outpath, out_prefix='example',
sample_polygons(canopy_data, uav_data, min=True, max=True, mean=True, std=True, median=True,
=False, sum=False, categorical=False) count
/home/mayrajeo/miniconda3/envs/point-eo-dev/lib/python3.11/site-packages/rasterstats/io.py:328: NodataWarning: Setting nodata to -999; specify nodata explicitly
warnings.warn(
Dataset structure
Train model
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
= pd.read_csv(outpath/'example__example_area__canopies__label.csv')
df
= LabelEncoder()
le = le.fit_transform(df['label'])
y = df.drop(columns='label')
X
= train_test_split(X, y)
X_train, X_test, y_train, y_test
= RandomForestClassifier()
rf rf.fit(X_train, y_train)
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()
= rf.predict(X_test)
y_pred print(classification_report(y_test, y_pred, zero_division=0, target_names=le.classes_))
precision recall f1-score support
Birch 0.67 0.59 0.63 326
European aspen 0.85 0.50 0.63 102
Norway spruce 0.83 0.93 0.87 777
Scots pine 0.84 0.78 0.81 273
Standing deadwood 0.96 0.96 0.96 74
accuracy 0.81 1552
macro avg 0.83 0.75 0.78 1552
weighted avg 0.80 0.81 0.80 1552
= confusion_matrix(y_test, y_pred, labels=rf.classes_)
cm = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=le.classes_)
disp
disp.plot() plt.show()