Dataset description and generation

Author

Janne Mäyrä

Published

December 22, 2022

Code

from pathlib import Path
import os, sys
import geopandas as gpd
import pandas as pd
import math
import numpy as np
from shapely.geometry import Point
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

from drone_detector.utils import *
from drone_detector.processing.tiling import *

import warnings
warnings.filterwarnings("ignore")
sys.path.append('..')
from src.tree_functions import *

Code

from tqdm.auto import tqdm
tqdm.pandas()

Code

# visualization functions

from shapely.geometry import LineString, box, Point, Polygon
from shapely.geometry.polygon import orient
from shapely.ops import nearest_points

def get_three_point_diam_lines(geom):
    d = get_len(geom)
    mrr = geom.minimum_rotated_rectangle
    mrr = orient(mrr)
    mrr_x, mrr_y = mrr.exterior.coords.xy
    a_ix = list(mrr_y).index(min(mrr_y))
    b_ix = a_ix + 1 if a_ix < 3 else 0
    c_ix = b_ix + 1 if b_ix < 3 else 0
    d_ix = c_ix + 1 if c_ix < 3 else 0 
    ab = LineString([Point(mrr_x[a_ix], mrr_y[a_ix]), Point(mrr_x[b_ix], mrr_y[b_ix])])
    ad = LineString([Point(mrr_x[a_ix], mrr_y[a_ix]), Point(mrr_x[d_ix], mrr_y[d_ix])])
    if ab.length > ad.length:
        maxline = ab
        max_ort = LineString([Point(mrr_x[d_ix], mrr_y[d_ix]), Point(mrr_x[c_ix], mrr_y[c_ix])])
    else:
        maxline = ad
        max_ort = LineString([Point(mrr_x[b_ix], mrr_y[b_ix]), Point(mrr_x[c_ix], mrr_y[c_ix])]) 
    bot_a, bot_b = maxline.boundary
    top_a, top_b = max_ort.boundary
    if top_a.y == top_b.y: # horizontal
        line_10 = LineString([Point(min(top_a.x, top_b.x) + 0.10*d, top_a.y), 
                              Point(min(bot_a.x, bot_b.x) + 0.10*d, bot_a.y)])
        line_50 = LineString([Point(min(top_a.x, top_b.x) + 0.5*d, top_a.y), 
                              Point(min(bot_a.x, bot_b.x) + 0.5*d, bot_a.y)])
        line_90 = LineString([Point(max(top_a.x, top_b.x) - 0.10*d, top_b.y),
                              Point(max(bot_a.x, bot_b.x) - 0.10*d, bot_b.y)])
    elif top_a.x == top_b.x: # vertical
        line_10 = LineString([Point(top_a.x, min(top_a.y,  top_b.y) + 0.10*d), 
                              Point(bot_a.x, min(bot_a.y,  bot_b.y) + 0.10*d)])
        line_50 = LineString([Point(top_a.x, min(top_a.y,  top_b.y) + 0.5*d), 
                              Point(bot_a.x, min(bot_a.y,  bot_b.y) + 0.5*d)])
        line_90 = LineString([Point(top_b.x, max(top_a.y,  top_b.y) - 0.10*d), 
                              Point(bot_b.x, max(bot_a.y,  bot_b.y) - 0.10*d)])
    else: # we do geometry, solve <x,y> = <x_1,y_1> + t<1,m>
        k = (bot_b.y - bot_a.y) / (bot_b.x - bot_a.x)
        mul = 1 if k > 0 else -1
        top_x_10 = top_a.x + mul * np.sqrt(((0.10*d)**2)/(1+k**2))
        top_x_50 = top_a.x + mul * np.sqrt(((0.5*d)**2)/(1+k**2))
        top_x_90 = top_a.x + mul * np.sqrt(((0.90*d)**2)/(1+k**2))
        top_y_10 = top_a.y + np.sqrt(((0.10*d*k)**2)/(1+k**2))
        top_y_50 = top_a.y + np.sqrt(((0.5*d*k)**2)/(1+k**2))
        top_y_90 = top_a.y + np.sqrt(((0.90*d*k)**2)/(1+k**2))
        bot_x_10 = bot_a.x + mul * np.sqrt(((0.10*d)**2)/(1+k**2))
        bot_x_50 = bot_a.x + mul * np.sqrt(((0.5*d)**2)/(1+k**2))
        bot_x_90 = bot_a.x + mul * np.sqrt(((0.90*d)**2)/(1+k**2))
        bot_y_10 = bot_a.y + np.sqrt(((0.10*d*k)**2)/(1+k**2))
        bot_y_50 = bot_a.y + np.sqrt(((0.5*d*k)**2)/(1+k**2))
        bot_y_90 = bot_a.y + np.sqrt(((0.90*d*k)**2)/(1+k**2))
        line_10 = LineString([Point(top_x_10, top_y_10), Point(bot_x_10, bot_y_10)])
        line_50 = LineString([Point(top_x_50, top_y_50), Point(bot_x_50, bot_y_50)])
        line_90 = LineString([Point(top_x_90, top_y_90), Point(bot_x_90, bot_y_90)])
    return line_10, line_50, line_90

def get_iline(geom):
    p1, p2 = nearest_points(geom.minimum_rotated_rectangle.exterior, geom.representative_point())
    minx, miny, maxx, maxy = geom.bounds
    line = LineString([p1,p2])
    bbox = box(*geom.bounds)
    a,b = line.boundary
    if a.x == b.x:
        extended_line = LineString([(a.x, miny), (a.x, maxy)])
    elif a.y == b.y:
        extended_line = LineString([(minx, a.y), (maxx, a.y)])
    else:
        k = (b.y - a.y) / (b.x - a.x)
        m = a.y - k * a.x
        y0 = k * minx + m
        y1 = k * maxx + m
        x0 = (miny - m) / k
        x1 = (maxy - m) / k
        points_on_boundary_lines = [Point(minx, y0), Point(maxx, y1), 
                                    Point(x0, miny), Point(x1, maxy)]
        points_sorted_by_distance = sorted(points_on_boundary_lines, key=bbox.distance)
        extended_line = LineString(points_sorted_by_distance[:2])
    return extended_line.intersection(geom)

1 Materials and methods

1.1 Study areas

The first study area is located in the eastern side of Hiidenportti National Park, Sotkamo, Eastern-Finland, and the field data contains areas both inside and outside the conserved areas (Hiidenportti National Park in the west and Teerisuo - Lososuo area in the east). The most common trees in Hiidenportti area are Scots pine and Norway spruce, with different deciduous trees (e.g. Silver birch, Downy birch and European aspen). The UAV data from this study area consist of nine partially overlapping RGB orthoimages which cover approximately 10 km² area, with spatial resolution ranging between 3.9cm and 4.4cm. These scenes cover both conserved and managed forests, as well as some logging openings. The UAV data were collected on 16. and 17.7.2019 using DJI Phantom 4 RTK UAV equipped with 20 megapixel CMOS sensor.

Our other study area is located in Sudenpesänkangas, Evo, Southern-Finland. The data was acquired on 11.7.2018, using eBee Plus RTK fixed-wing platform, equipped with a 20 megapixel global shutter S.O.D.A camera with a field of view of 64\(^{\circ}\). The UAV data from Evo consist of a single RGB orthomosaic which has a spatial resolution of 4.9cm. The area covers both managed and conserved areas, and the canopy is mostly dominated by Scots pine and Norway spruce, with a mixture of the two birch species. Other species (e.g. aspen and larch) are rather scarce in dominant canopy layer. Data from Evo was used for testing the methods in different geographical location

In addition to UAV data, we also have high-resolution LiDAR data from both of our study areas. The point density for Hiidenportti LiDAR data was approximately 15 points/m\(^2\), and 11 points/m\(^2\) for Evo LiDAR data. These LiDAR data from Hiidenportti were the same as used in Heinaro et al (2021), while the LiDAR data from Evo were used in both Viinikka et al (2020 and Mäyrä et al (2021). We used these data to create canopy height models (CHMs) with 0.5m spatial resoution for both study areas, which were then used to compute the canopy densities for the field plots.

1.2 Virtual deadwood data

Having accurate and good quality ground reference data is not only important for training the models, but also for evaluating the obtained results. Even though we had extensive field data from both of our study areas, using only these data would not provide enough training data for our methods. For both of our study areas, we constructed rectangular virtual plots (scenes) in such way that all usable field plots were covered. Because the sizes of the UAV mosaics were different between the study areas, these plots were constructed slightly differently. For Hiidenportti dataset, we constructed 33 rectangular scenes in such way that all usable field plots were covered and each plot had at least 45 meter distance to the border of the scene. As the total area of Sudenpesänkangas UAV-mosaic is larger than Hiidenportti, we constructed the scenes for Evo area as 100x100 meter non-overlapping squares in such way that each virtual plot contained only one circular field plot.

We manually annotated all visible deadwood from the scenes to use our reference data. Both deadwood types were annotated as a separate classes. The standing deadwood instances were annotated so that the full canopy were inside the polygon, but for fallen deadwood we annotated the trunks and left the branches and twigs unannotated. We annotated even the shortest deadwood instances visible, as it was possible that in reality they belonged of a larger trunk, but were partially obscured by canopy.

1.2.1 Hiidenportti dataset

Out of the 178 circular field plots with 9m radius present in the Hiidenportti area, our UAV data covered 75 in such way that the image quality was sufficient for our study. Out of these 75, we omitted four plots as they had been cut down between the time of field work and UAV flights, bringing the total number of field plots to 71. 23 of these field plots contained accurately measured and located deadwood data, collected by Heinaro et al (2021). In Evo area, the UAV data covered 79 circular field plots with 9 meter radius, from which 71 had suitable image quality for our use cases.

Code

full_data = gpd.read_file('../../data/raw/hiidenportti/virtual_plots/all_deadwood_hiidenportti.geojson')
conservation_areas = gpd.read_file('../data/common/LsAlueValtio.shp')
cons_hp = conservation_areas[conservation_areas.geometry.intersects(box(*full_data.total_bounds))]

full_data['conservation'] = full_data.progress_apply(lambda row: 1 if row.geometry.intersects(cons_hp.geometry.unary_union)
                                                                 else 0, axis=1)
full_data.to_file('../../data/raw/hiidenportti/virtual_plots/all_deadwood_hiidenportti.geojson')

In total, the final deadwood data contained 7396 annotated fallen deadwood and 1083 standing deadwood instances. The total numbers of deadwood instances present in a single virtual plot varied between 47 and 1051, the number of fallen deadwood between 31 and 990, and for the number of standing deadwood between 1 and 159. Relatively, standing deadwood was much more common in conserved areas than in managed forests, as 59% (639 instances) of standing deadwood were located in conservation areas, compared to 30.6% (2260 instances) of fallen deadwood.

Code

pd.crosstab(full_data.conservation, full_data.layer, margins=True)

layer	groundwood	uprightwood	All
conservation
0	5909	683	6592
1	1487	400	1887
All	7396	1083	8479

In addition to the total number of deadwood instances, these data tells us also the total area covered in m² as well as pixels for each deadwood type, both for the polygons and their bounding boxes. Based on the annotated polygons, we estimated the trunk length of the fallen deadwood and the maximum canopy diameter of the standing deadwood to be the longer side of the minimum rotated rectangle for the corresponding polygon. Because the spatial resolution of our data is around 4cm, we did not estimate fallen deadwood diameter or volume, as even an error of one pixel effects these calculations too much.

Overall, the total area covered by fallen deadwood was 3459.81m² and 3565.37m² by standing deadwood. The mean area covered by a single fallen deadwood polygon was around 0.47m² and 3.29m² for standing deadwood.

Code

full_data['area'] = full_data.geometry.area
full_data['pixel_area'] = full_data.area / 0.04**2
full_data_dis = full_data.dissolve(by='layer')
full_data_dis.area

layer
groundwood     3459.805448
uprightwood    3565.369704
dtype: float64

Code

pd.pivot_table(full_data, index=['layer'], values='area', aggfunc=['min', 'max', 'mean', 'median'], margins=True)

	min	max	mean	median
	area	area	area	area
layer
groundwood	0.024711	5.789380	0.469012	0.348866
uprightwood	0.103820	24.334151	3.292461	2.416391
All	0.024711	24.334151	0.829644	0.395097

<seaborn.axisgrid.FacetGrid>

Based on the annotated polygons, we also estimated the trunk length of the fallen deadwood and the maximum canopy diameter of the standing deadwood to be the longest side of the minimum rotated rectangle for the corresponding polygon. For diameter and volume estimation for the fallen deadwood, we constructed three lines that are perpendicular with the minimum rotated rectangle of the polygon that intersect in the polygon at the 10%, 50% and 90% of the length. This process is visualized below. Based on these lines, we used the mean length of these intersecting lines to estimate the polygon diameter. For the volume estimation, we assumed that the fallen deadwood polygons are constructed from two truncated right circular cones and used the following equations to estimate the volume:

\[ V_{1} = \frac{\pi h(r_{10}^2 + r_{10}r_{50} + r_{50}^2)}{3} \\ V_{2} = \frac{\pi h(r_{90}^2 + r_{90}r_{50} + r_{50}^2)}{3} \\ V = V_1 + V_2 \]

where r\(_{x}\) are the radii at the length percentile of x, and h is half the estimated polygon length.

The average fallen deadwood trunk length was around 2.48 meters, with the shortest annotated tree being 0.21m and longest 15.38m. On average, both deadwood types were larger in the conservation areas compared to the managed forests. It is worth mentioning that 1655 of the annotated fallen deadwood were shorter than 1.3m (the minimum length for the tree to be measured in the Finnish National Forest Inventory), but due to the possibility of them being partially obscured by the canopy, they were included in the deadwood data.

Code

full_data['tree_length'] = full_data.apply(lambda row: get_len(row.geometry), axis=1)
len(full_data[(full_data.layer == 'groundwood') & (full_data.tree_length < 1.3)])

Statistics for estimated tree length and canopy diameter.

Code

pd.pivot_table(full_data, index=['layer'], 
               values=['tree_length'], aggfunc=['min', 'max', 'mean', 'median', 'count'], margins=True)

	min	max	mean	median	count
	tree_length	tree_length	tree_length	tree_length	tree_length
layer
groundwood	0.206041	15.383351	2.404708	2.002028	7396
uprightwood	0.385542	8.313442	2.323786	2.154042	1083
All	0.206041	15.383351	2.394372	2.024371	8479

Code

pd.pivot_table(full_data, index=['conservation', 'layer'], 
               values=['tree_length'], aggfunc=['min', 'max', 'mean', 'median', 'count'], margins=True)

		min	max	mean	median	count
		tree_length	tree_length	tree_length	tree_length	tree_length
conservation	layer
0	groundwood	0.206041	15.383351	2.284923	1.915742	5909
0	uprightwood	0.385542	6.977358	2.248640	2.075352	683
1	groundwood	0.346741	12.954119	2.880703	2.388131	1487
1	uprightwood	0.633733	8.313442	2.452099	2.294426	400
All		0.206041	15.383351	2.394372	2.024371	8479

Statistics for estimated diameter for groundwood.

Code

full_data['diam'] = full_data.geometry.apply(lambda row: np.mean(get_three_point_diams(row))) * 1000
pd.pivot_table(full_data[full_data.layer=='groundwood'], index=['conservation'], 
               values=['diam'], aggfunc=['min', 'max', 'mean', 'median', 'count'], margins=True)

	min	max	mean	median	count
	diam	diam	diam	diam	diam
conservation
0	55.336382	626.065954	180.397765	172.583156	5909
1	68.693180	677.660650	208.541880	190.436852	1487
All	55.336382	677.660650	186.056270	175.094096	7396

Look at the largest polygons.

Code

# echo: false

fig, axs = plt.subplots(1,2, figsize=(10,4))
for ix, ax in enumerate(axs.flatten()):
    full_data[(full_data.layer == 'groundwood')&(full_data.diam>650)].iloc[ix:ix+1].exterior.plot(ax=ax, color="#e41a1c")
    temp = full_data[(full_data.layer == 'groundwood')&(full_data.diam>650)].iloc[ix:ix+1].copy()
    geom = temp.iloc[0].geometry
    temp['geometry'] = temp.apply(lambda row: row.geometry.minimum_rotated_rectangle, axis=1)
    temp.exterior.plot(ax=ax, color='#377eb8', linestyle='--')
    temp['geometry'] = temp.apply(lambda row: LineString([(row.geometry.exterior.xy[0][0], row.geometry.exterior.xy[1][0]),
                                                         (row.geometry.exterior.xy[0][1], row.geometry.exterior.xy[1][1])]),
                                  axis=1)
    temp.plot(ax=ax, color='#377eb8')
    temp2 = full_data[(full_data.layer == 'groundwood')&(full_data.diam>650)].iloc[ix:ix+1].copy()
    lines = get_three_point_diam_lines(temp2.iloc[0].geometry)
    for l in lines:
        temp2['geometry'] = l
        temp2.plot(ax=ax, edgecolor='#4daf4a', linestyle='--')
    for l in lines:
        temp2['geometry'] = l.intersection(geom)
        temp2.plot(ax=ax, edgecolor='#984ea3')
plt.show()

Estimations for volume of fallen deadwood.

Code

full_data['v'] = full_data.geometry.apply(cut_cone_volume)

pd.pivot_table(full_data[full_data.layer=='groundwood'], index=['conservation'], 
               values=['v'], aggfunc=['min', 'max', 'mean', 'median', 'count', 'sum'], margins=True)

	min	max	mean	median	count	sum
	v	v	v	v	v	v
conservation
0	0.001237	2.399251	0.072439	0.046403	5909	428.041718
1	0.002850	2.130253	0.128083	0.070668	1487	190.459568
All	0.001237	2.399251	0.083626	0.050058	7396	618.501287

Finally, get estimations of total volume of groundwood m³/ha.

Code

field_data_path = Path('../data/hiidenportti')
virtual_plot_grid = gpd.read_file(field_data_path/'envelopes_with_trees.geojson')
tot_vplot_area = virtual_plot_grid.area.sum()
cons_areas = gpd.clip(cons_hp, virtual_plot_grid)
tot_cons_area = cons_areas.area.sum()
tot_man_area = tot_vplot_area - tot_cons_area
tot_man_ha = tot_man_area / 10000
tot_cons_ha = tot_cons_area / 10000

est_v_man = full_data[(full_data.layer=='groundwood')&(full_data.conservation==0)].v.sum()/tot_man_ha
est_v_cons = full_data[(full_data.layer=='groundwood')&(full_data.conservation==1)].v.sum()/tot_cons_ha
est_v_tot = full_data[(full_data.layer=='groundwood')].v.sum()/(tot_vplot_area/10000)

print(f'Estimated groundwood volume in managed forests: {est_v_man:.2f} ha/m³')
print(f'Estimated groundwood volume in conserved forests: {est_v_cons:.2f} ha/m³')
print(f'Estimated groundwood volume in both types: {est_v_tot:.2f} ha/m³')

Estimated groundwood volume in managed forests: 10.76 ha/m³
Estimated groundwood volume in conserved forests: 11.59 ha/m³
Estimated groundwood volume in both types: 11.00 ha/m³

1.2.2 Evo dataset

We constructed the scenes from Evo area as 100x100 meter non-overlapping squares in such way that each virtual plot contained only one circular field plot. This dataset contained 3915 fallen deadwood and 1419 standing deadwood instances, and the total number of deadwood instances in a single field plot varied between 3 and 570, the number of fallen deadwood between 1 and 570, and the number of standing deadwood between 1 and 162.

Code

evo_data = gpd.read_file('../../data/raw/sudenpesankangas/virtual_plots/sudenpesankangas_deadwood.geojson')
cons_evo = conservation_areas[conservation_areas.geometry.intersects(box(*evo_data.total_bounds))]
evo_data['conservation'] = evo_data.progress_apply(lambda row: 1 if row.geometry.intersects(cons_evo.geometry.unary_union)
                                                               else 0, axis=1)
evo_data.to_file('../../data/raw/sudenpesankangas/virtual_plots/sudenpesankangas_deadwood.geojson')

Code

pd.crosstab(evo_data.conservation, evo_data.label, margins=True)

label	groundwood	uprightwood	All
conservation
0	2369	386	2755
1	1546	1033	2579
All	3915	1419	5334

The total area covered by fallen deadwood was 3117.1m² and the total canopy area for standing deadwood was 7196.4m². On average, a single deadwood instance covered more area compared to Hiidenportti dataset, as the average area for fallen deadwood in Evo was 0.81m² and 5.08m² for standing deadwood.

Code

evo_data['area'] = evo_data.geometry.area
evo_data['pixel_area'] = evo_data.area / 0.0485**2
evo_data_dis = evo_data.dissolve(by='label')
evo_data_dis.area

label
groundwood     3117.087445
uprightwood    7196.352919
dtype: float64

Code

pd.pivot_table(evo_data, index=['conservation', 'label'], values='area', 
               aggfunc=['min', 'max', 'mean', 'median'], margins=True)

		min	max	mean	median
		area	area	area	area
conservation	label
0	groundwood	0.064145	8.456464	0.646843	0.489316
0	uprightwood	0.364184	41.339628	3.142462	2.017817
1	groundwood	0.092866	8.173691	1.049027	0.735591
1	uprightwood	0.319265	35.944833	5.811285	4.102591
All		0.064145	41.339628	1.944172	0.758211

<seaborn.axisgrid.FacetGrid>

Similar observations were found from the average tree length and canopy diameter, as the average trunk length of the fallen trees was around 1 meter more in Evo, and canopy diameter of standing trees was around 60cm larger. However, this is likely due to the vast majority of the annotated trees in Evo dataset being in conserved areas.

Code

evo_data['tree_length'] = evo_data.apply(lambda row: get_len(row.geometry), axis=1)
pd.pivot_table(evo_data, index=['label'], 
               values=['tree_length'], aggfunc=['min', 'max', 'mean', 'median', 'count'], margins=True)

	min	max	mean	median	count
	tree_length	tree_length	tree_length	tree_length	tree_length
label
groundwood	0.575351	24.479043	3.429642	2.790052	3915
uprightwood	0.674329	8.319080	2.958530	2.745163	1419
All	0.575351	24.479043	3.304312	2.779696	5334

Code

pd.pivot_table(evo_data, index=['conservation','label'], 
               values=['tree_length'], aggfunc=['min', 'max', 'mean', 'median', 'count'], margins=True)

		min	max	mean	median	count
		tree_length	tree_length	tree_length	tree_length	tree_length
conservation	label
0	groundwood	0.605847	24.479043	3.113435	2.609120	2369
0	uprightwood	0.674329	8.319080	2.310101	2.080163	386
1	groundwood	0.575351	22.625422	3.914179	3.145306	1546
1	uprightwood	0.779996	7.974969	3.200828	2.981814	1033
All		0.575351	24.479043	3.304312	2.779696	5334

In addition of annotated fallen trees being longer in Evo than Hiidenportti, they were also thicker, as the average diameter around 20mm more in both forest types, and only 7 annotations had diameter less than 100mm.

Code

evo_data['diam'] = evo_data.geometry.apply(lambda row: np.mean(get_three_point_diams(row))) * 1000
pd.pivot_table(evo_data[evo_data.label=='groundwood'], index=['conservation'], 
               values=['diam'], aggfunc=['min', 'max', 'mean', 'median', 'count'], margins=True)

	min	max	mean	median	count
	diam	diam	diam	diam	diam
conservation
0	85.977991	760.781151	200.474161	188.603269	2369
1	101.096058	709.170305	254.250350	233.417789	1546
All	85.977991	760.781151	221.709917	203.275219	3915

Look at the largest polygons.

Code

evo_data['v'] = evo_data.geometry.apply(cut_cone_volume)
pd.pivot_table(evo_data[evo_data.label=='groundwood'], index=['conservation'], 
               values=['v'], aggfunc=['min', 'max', 'mean', 'median', 'count', 'sum'], margins=True)

	min	max	mean	median	count	sum
	v	v	v	v	v	v
conservation
0	0.004749	2.765285	0.125592	0.076470	2369	297.527245
1	0.011879	4.192773	0.262005	0.141118	1546	405.059473
All	0.004749	4.192773	0.179460	0.092882	3915	702.586718

The estimated total volume of fallen deadwood was 702.59 m³, of which 297.52m³ was in managed forests and 405.06m³ in conservation areas, corresponding to 7.08m³/ha of deadwood in managed forests and 13.97 m³/ha in conserved forests.

Code

evo_fd_path = Path('../data/sudenpesankangas/')
evo_grid = gpd.read_file(evo_fd_path/'vplots.geojson')
evo_grid = evo_grid.to_crs('epsg:3067')
evo_tot_vplot_area = evo_grid.area.sum()
evo_cons_area = cons_evo.overlay(evo_grid).area.sum()
evo_man_area = evo_tot_vplot_area - evo_cons_area
evo_man_ha = evo_man_area / 10000
evo_cons_ha = evo_cons_area / 10000
evo_tot_ha = evo_man_ha + evo_cons_ha

Code

evo_est_v_man = evo_data[(evo_data.label=='groundwood')&(evo_data.conservation==0)].v.sum()/evo_man_ha
evo_est_v_cons = evo_data[(evo_data.label=='groundwood')&(evo_data.conservation==1)].v.sum()/evo_cons_ha
evo_est_v_tot = evo_data[(evo_data.label=='groundwood')].v.sum()/evo_tot_ha

Code

print(f'Estimated groundwood volume in managed forests: {evo_est_v_man:.2f} ha/m³')
print(f'Estimated groundwood volume in conserved forests: {evo_est_v_cons:.2f} ha/m³')
print(f'Estimated groundwood volume in both types: {evo_est_v_tot:.2f} ha/m³')

Estimated groundwood volume in managed forests: 7.08 ha/m³
Estimated groundwood volume in conserved forests: 13.97 ha/m³
Estimated groundwood volume in both types: 9.90 ha/m³

1.2.3 Training data generation

In order to ensure that no training data leaked into the test set, which would have given our models overly optimistic results, we selected 5 of the virtual plots as the holdout set to test our models. These virtual plots contained both managed and conserved areas, and were only used for testing the models. Furthermore, the remaining 28 plots were spatially split to training and validation sets with 85:15 ratio to ensure that the models were validated during training with data not used for training.

As our virtual plots had too large dimensions to be efficiently used as the training data for the deep learning models, the final preprocessing step for UAV plot images was to split each of them into 512 times 512 pixel image patches. We also split the the polygon data into separate files so that each file only covers this smaller area. Finally, we converted the polygon data into COCO format, in which the polygon coordinates are converted into pixel coordinates related to the corresponding image. Because mosaicking images this way meant that some of the annotated polygons were only partially present in image patches, we removed all partial annotations whose area was less than 25% of the original annotation. Also, all polygons that had bounding box smaller than 16 pixels were discarded, as that small targets can not be accurately found from the data. For test images, as the model outputs were in COCO format, the results were transformed into georeferenced polygon data.

Code

tile_folder = Path('../../data/raw/hiidenportti/virtual_plots/train/images/')
vector_folder = Path('../../data/raw/hiidenportti/virtual_plots/train/vectors/')

outpath = Path('../../data/processed/hiidenportti/train_512')

tiles = os.listdir(tile_folder)
vectors = [f for f in os.listdir(vector_folder) if f.endswith('geojson')]
assert len(tiles) == len(vectors)

Code

for t in tiles:
    if not os.path.exists(outpath/t[:-4]): os.makedirs(outpath/t[:-4])
    shp_fname = t.replace('tif', 'geojson')
    tilesize = 512
    tiler = Tiler(outpath=outpath/t[:-4], gridsize_x=tilesize, gridsize_y=tilesize, overlap=(0,0))
    tiler.tile_raster(str(tile_folder/t))
    tiler.tile_vector(vector_folder/shp_fname, min_area_pct=0.25)

Code

import shapely
from shapely.geometry import box
#Fix labelling, todo fix it in COCOProcessor

for p in os.listdir(outpath):
    files = [outpath/p/'vector_tiles'/f for f in os.listdir(outpath/p/'vector_tiles') if f.endswith('geojson')]
    for f in files: 
        gdf = gpd.read_file(f)
        bbox = box(*gdf.total_bounds)
        gdf['geometry'] = gdf.geometry.buffer(0) # fix faulty geometries
        gdf['geometry'] = gdf.apply(lambda row: fix_multipolys(row.geometry) if row.geometry.type == 'MultiPolygon' 
                                    else shapely.geometry.Polygon(row.geometry.exterior), axis=1)
        gdf.rename(columns={'groundwood':'label'}, inplace=True)
        gdf = gpd.clip(gdf, bbox, keep_geom_type=True)
        gdf['label'] = gdf.apply(lambda row: 'Standing' if row.label == 1 else 'Fallen', axis=1)
        gdf.to_file(f, driver='GeoJSON')

Code

# Convert to COCO format

from drone_detector.processing.coco import *

deadwood_categories = [
        {'supercategory':'deadwood', 'id':1, 'name': 'uprightwood'},
        {'supercategory':'deadwood', 'id':2, 'name': 'groundwood'},
    ]

from datetime import date

coco_info = {'description': 'Train dataset for deadwood detection in Hiidenportti',
             'version': 0.1,
             'year': 2022,
             'contributor': 'Janne Mäyrä',
             'date_created': date.today().strftime("%Y/%m/%d")
}

coco_licenses = {}

for p in os.listdir(outpath):
    coco_processor = COCOProcessor(outpath/p, outpath/p, coco_info=coco_info, coco_licenses=coco_licenses,
                                  coco_categories=deadwood_categories)
    coco_processor.shp_to_coco('layer')

Code

# Combine several coco-annotation .json files into one

import json

full_coco = None
image_id_modifier = 0
ann_id_modifier = 0
for p in os.listdir(outpath):
    with open(outpath/p/'coco.json') as f:
        coco = json.load(f)
    
    # update filename
    for i in coco['images']:
        i['file_name'] = f"{p}/raster_tiles/{i['file_name']}"
        
    if full_coco is None: 
        full_coco = coco
        image_id_modifier = full_coco['images'][-1]['id']
        ann_id_modifier = full_coco['annotations'][-1]['id']
    else:
        for i in coco['images']:
            i['id'] += image_id_modifier
        for a in coco['annotations']:
            a['image_id'] += image_id_modifier
            a['id'] += ann_id_modifier
        
        full_coco['images'].extend(coco['images'])
        full_coco['annotations'].extend(coco['annotations'])
        image_id_modifier = full_coco['images'][-1]['id'] + 1
        ann_id_modifier = full_coco['annotations'][-1]['id'] + 1
        
with open(outpath.parents[0]/'hiidenportti_train.json', 'w') as outfile: json.dump(full_coco, outfile)

Code

tile_folder = Path('../../data/raw/hiidenportti/virtual_plots/valid/images/')
vector_folder = Path('../../data/raw/hiidenportti/virtual_plots/valid/vectors/')

outpath = Path('../../data/processed/hiidenportti/valid_512')

tiles = os.listdir(tile_folder)
vectors = [f for f in os.listdir(vector_folder) if f.endswith('geojson')]
assert len(tiles) == len(vectors)

Code

for t in tiles:
    if not os.path.exists(outpath/t[:-4]): os.makedirs(outpath/t[:-4])
    shp_fname = t.replace('tif', 'geojson')
    tilesize = 512
    tiler = Tiler(outpath=outpath/t[:-4], gridsize_x=tilesize, gridsize_y=tilesize, overlap=(0,0))
    tiler.tile_raster(str(tile_folder/t))
    tiler.tile_vector(vector_folder/shp_fname, min_area_pct=0.25)

Code

#Fix labelling, todo fix it in COCOProcessor

for p in os.listdir(outpath):
    files = [outpath/p/'vector_tiles'/f for f in os.listdir(outpath/p/'vector_tiles') if f.endswith('geojson')]
    for f in files: 
        gdf = gpd.read_file(f)
        bbox = box(*gdf.total_bounds)
        gdf['geometry'] = gdf.geometry.buffer(0) # fix faulty geometries
        gdf['geometry'] = gdf.apply(lambda row: fix_multipolys(row.geometry) if row.geometry.type == 'MultiPolygon' 
                                    else shapely.geometry.Polygon(row.geometry.exterior), axis=1)
        gdf.rename(columns={'groundwood':'label'}, inplace=True)
        gdf = gpd.clip(gdf, bbox, keep_geom_type=True)
        gdf['label'] = gdf.apply(lambda row: 'Standing' if row.label == 1 else 'Fallen', axis=1)
        gdf.to_file(f, driver='GeoJSON')

Code

# Convert to COCO format

from drone_detector.processing.coco import *

deadwood_categories = [
        {'supercategory':'deadwood', 'id':1, 'name': 'uprightwood'},
        {'supercategory':'deadwood', 'id':2, 'name': 'groundwood'},
    ]

from datetime import date

coco_info = {'description': 'Validation dataset for deadwood detection in Hiidenportti',
             'version': 0.1,
             'year': 2022,
             'contributor': 'Janne Mäyrä',
             'date_created': date.today().strftime("%Y/%m/%d")
}

coco_licenses = {}

for p in os.listdir(outpath):
    coco_processor = COCOProcessor(outpath/p, outpath/p, coco_info=coco_info, coco_licenses=coco_licenses,
                                  coco_categories=deadwood_categories)
    coco_processor.shp_to_coco('layer')

Code

# Combine several coco-annotation .json files into one

import json

full_coco = None
image_id_modifier = 0
ann_id_modifier = 0
for p in os.listdir(outpath):
    with open(outpath/p/'coco.json') as f:
        coco = json.load(f)
    
    # update filename
    for i in coco['images']:
        i['file_name'] = f"{p}/raster_tiles/{i['file_name']}"
        
    if full_coco is None: 
        full_coco = coco
        image_id_modifier = full_coco['images'][-1]['id']
        ann_id_modifier = full_coco['annotations'][-1]['id']
    else:
        for i in coco['images']:
            i['id'] += image_id_modifier
        for a in coco['annotations']:
            a['image_id'] += image_id_modifier
            a['id'] += ann_id_modifier
        
        full_coco['images'].extend(coco['images'])
        full_coco['annotations'].extend(coco['annotations'])
        image_id_modifier = full_coco['images'][-1]['id'] + 1
        ann_id_modifier = full_coco['annotations'][-1]['id'] + 1
        
with open(outpath.parents[0]/'hiidenportti_valid.json', 'w') as outfile: json.dump(full_coco, outfile)

Code

tile_folder = Path('../../data/raw/hiidenportti/virtual_plots/test/images/')
vector_folder = Path('../../data/raw/hiidenportti/virtual_plots/test/vectors/')

outpath = Path('../../data/processed/hiidenportti/test_512')

tiles = os.listdir(tile_folder)
vectors = [f for f in os.listdir(vector_folder) if f.endswith('geojson')]
assert len(tiles) == len(vectors)

Code

for t in tiles:
    if not os.path.exists(outpath/t[:-4]): os.makedirs(outpath/t[:-4])
    shp_fname = t.replace('tif', 'geojson')
    tilesize = 512
    tiler = Tiler(outpath=outpath/t[:-4], gridsize_x=tilesize, gridsize_y=tilesize, overlap=(0,0))
    tiler.tile_raster(str(tile_folder/t))
    tiler.tile_vector(vector_folder/shp_fname, min_area_pct=0.25)

Code

#Fix labelling, todo fix it in COCOProcessor

for p in os.listdir(outpath):
    files = [outpath/p/'vector_tiles'/f for f in os.listdir(outpath/p/'vector_tiles') if f.endswith('geojson')]
    for f in files: 
        gdf = gpd.read_file(f)
        bbox = box(*gdf.total_bounds)
        gdf['geometry'] = gdf.geometry.buffer(0) # fix faulty geometries
        gdf['geometry'] = gdf.apply(lambda row: fix_multipolys(row.geometry) if row.geometry.type == 'MultiPolygon' 
                                    else shapely.geometry.Polygon(row.geometry.exterior), axis=1)
        gdf.rename(columns={'groundwood':'label'}, inplace=True)
        gdf = gpd.clip(gdf, bbox, keep_geom_type=True)
        gdf['label'] = gdf.apply(lambda row: 'Standing' if row.label == 1 else 'Fallen', axis=1)
        gdf.to_file(f, driver='GeoJSON')

Code

# Convert to COCO format

from drone_detector.processing.coco import *

deadwood_categories = [
        {'supercategory':'deadwood', 'id':1, 'name': 'uprightwood'},
        {'supercategory':'deadwood', 'id':2, 'name': 'groundwood'},
    ]

from datetime import date

coco_info = {'description': 'Test dataset for deadwood detection in Hiidenportti',
             'version': 0.1,
             'year': 2022,
             'contributor': 'Janne Mäyrä',
             'date_created': date.today().strftime("%Y/%m/%d")
}

coco_licenses = {}

for p in os.listdir(outpath):
    coco_processor = COCOProcessor(outpath/p, outpath/p, coco_info=coco_info, coco_licenses=coco_licenses,
                                  coco_categories=deadwood_categories)
    coco_processor.shp_to_coco('layer')

Code

# Combine several coco-annotation .json files into one

import json

full_coco = None
image_id_modifier = 0
ann_id_modifier = 0
for p in os.listdir(outpath):
    with open(outpath/p/'coco.json') as f:
        coco = json.load(f)
    
    # update filename
    for i in coco['images']:
        i['file_name'] = f"{p}/raster_tiles/{i['file_name']}"
        
    if full_coco is None: 
        full_coco = coco
        image_id_modifier = full_coco['images'][-1]['id']
        ann_id_modifier = full_coco['annotations'][-1]['id']
    else:
        for i in coco['images']:
            i['id'] += image_id_modifier
        for a in coco['annotations']:
            a['image_id'] += image_id_modifier
            a['id'] += ann_id_modifier
        
        full_coco['images'].extend(coco['images'])
        full_coco['annotations'].extend(coco['annotations'])
        image_id_modifier = full_coco['images'][-1]['id'] + 1
        ann_id_modifier = full_coco['annotations'][-1]['id'] + 1
        
with open(outpath.parents[0]/'hiidenportti_test.json', 'w') as outfile: json.dump(full_coco, outfile)

Code

tile_folder = Path('../../data/raw/sudenpesankangas/virtual_plots/train/images/')
vector_folder = Path('../../data/raw/sudenpesankangas/virtual_plots/train/vectors/')

outpath = Path('../../data/processed/sudenpesankangas/train_512')

tiles = os.listdir(tile_folder)
vectors = [f for f in os.listdir(vector_folder) if f.endswith('geojson')]
assert len(tiles) == len(vectors)

Code

for t in tiles:
    if not os.path.exists(outpath/t[:-4]): os.makedirs(outpath/t[:-4])
    shp_fname = t.replace('tif', 'geojson')
    tilesize = 512
    tiler = Tiler(outpath=outpath/t[:-4], gridsize_x=tilesize, gridsize_y=tilesize, overlap=(0,0))
    tiler.tile_raster(str(tile_folder/t))
    tiler.tile_vector(vector_folder/shp_fname, min_area_pct=0.25)

Code

#Fix labelling, todo fix it in COCOProcessor

for p in os.listdir(outpath):
    files = [outpath/p/'vector_tiles'/f for f in os.listdir(outpath/p/'vector_tiles') if f.endswith('geojson')]
    for f in files: 
        gdf = gpd.read_file(f)
        bbox = box(*gdf.total_bounds)
        gdf['geometry'] = gdf.geometry.buffer(0) # fix faulty geometries
        gdf['geometry'] = gdf.apply(lambda row: fix_multipolys(row.geometry) if row.geometry.type == 'MultiPolygon' 
                                    else shapely.geometry.Polygon(row.geometry.exterior), axis=1)
        gdf = gpd.clip(gdf, bbox, keep_geom_type=True)
        gdf.to_file(f, driver='GeoJSON')

Code

# Convert to COCO format

from drone_detector.processing.coco import *

deadwood_categories = [
        {'supercategory':'deadwood', 'id':1, 'name': 'uprightwood'},
        {'supercategory':'deadwood', 'id':2, 'name': 'groundwood'},
    ]

from datetime import date

coco_info = {'description': 'Train dataset for deadwood detection in Sudenpesänkangas',
             'version': 0.1,
             'year': 2022,
             'contributor': 'Janne Mäyrä',
             'date_created': date.today().strftime("%Y/%m/%d")
}

coco_licenses = {}

for p in os.listdir(outpath):
    coco_processor = COCOProcessor(outpath/p, outpath/p, coco_info=coco_info, coco_licenses=coco_licenses,
                                  coco_categories=deadwood_categories)
    coco_processor.shp_to_coco('label')

Code

# Combine several coco-annotation .json files into one

import json

full_coco = None
image_id_modifier = 0
ann_id_modifier = 0
for p in os.listdir(outpath):
    with open(outpath/p/'coco.json') as f:
        coco = json.load(f)
    
    # update filename
    for i in coco['images']:
        i['file_name'] = f"{p}/raster_tiles/{i['file_name']}"
        
    if full_coco is None: 
        full_coco = coco
        image_id_modifier = full_coco['images'][-1]['id']
        ann_id_modifier = full_coco['annotations'][-1]['id']
    else:
        for i in coco['images']:
            i['id'] += image_id_modifier
        for a in coco['annotations']:
            a['image_id'] += image_id_modifier
            a['id'] += ann_id_modifier
        
        full_coco['images'].extend(coco['images'])
        full_coco['annotations'].extend(coco['annotations'])
        image_id_modifier = full_coco['images'][-1]['id'] + 1
        ann_id_modifier = full_coco['annotations'][-1]['id'] + 1
        
with open(outpath.parents[0]/'sudenpesankangas_train.json', 'w') as outfile: json.dump(full_coco, outfile)

Code

tile_folder = Path('../../data/raw/sudenpesankangas/virtual_plots/valid/images/')
vector_folder = Path('../../data/raw/sudenpesankangas/virtual_plots/valid/vectors/')

outpath = Path('../../data/processed/sudenpesankangas/valid_512')

tiles = os.listdir(tile_folder)
vectors = [f for f in os.listdir(vector_folder) if f.endswith('geojson')]
assert len(tiles) == len(vectors)

Code

for t in tiles:
    if not os.path.exists(outpath/t[:-4]): os.makedirs(outpath/t[:-4])
    shp_fname = t.replace('tif', 'geojson')
    tilesize = 512
    tiler = Tiler(outpath=outpath/t[:-4], gridsize_x=tilesize, gridsize_y=tilesize, overlap=(0,0))
    tiler.tile_raster(str(tile_folder/t))
    tiler.tile_vector(vector_folder/shp_fname, min_area_pct=0.25)

Code

#Fix labelling, todo fix it in COCOProcessor

for p in os.listdir(outpath):
    files = [outpath/p/'vector_tiles'/f for f in os.listdir(outpath/p/'vector_tiles') if f.endswith('geojson')]
    for f in files: 
        gdf = gpd.read_file(f)
        bbox = box(*gdf.total_bounds)
        gdf['geometry'] = gdf.geometry.buffer(0) # fix faulty geometries
        gdf['geometry'] = gdf.apply(lambda row: fix_multipolys(row.geometry) if row.geometry.type == 'MultiPolygon' 
                                    else shapely.geometry.Polygon(row.geometry.exterior), axis=1)
        gdf = gpd.clip(gdf, bbox, keep_geom_type=True)
        gdf.to_file(f, driver='GeoJSON')

Code

# Convert to COCO format

from drone_detector.processing.coco import *

deadwood_categories = [
        {'supercategory':'deadwood', 'id':1, 'name': 'uprightwood'},
        {'supercategory':'deadwood', 'id':2, 'name': 'groundwood'},
    ]

from datetime import date

coco_info = {'description': 'Train dataset for deadwood detection in Sudenpesänkangas',
             'version': 0.1,
             'year': 2022,
             'contributor': 'Janne Mäyrä',
             'date_created': date.today().strftime("%Y/%m/%d")
}

coco_licenses = {}

for p in os.listdir(outpath):
    coco_processor = COCOProcessor(outpath/p, outpath/p, coco_info=coco_info, coco_licenses=coco_licenses,
                                  coco_categories=deadwood_categories)
    coco_processor.shp_to_coco('label')

Code

# Combine several coco-annotation .json files into one

import json

full_coco = None
image_id_modifier = 0
ann_id_modifier = 0
for p in os.listdir(outpath):
    with open(outpath/p/'coco.json') as f:
        coco = json.load(f)
    
    # update filename
    for i in coco['images']:
        i['file_name'] = f"{p}/raster_tiles/{i['file_name']}"
        
    if full_coco is None: 
        full_coco = coco
        image_id_modifier = full_coco['images'][-1]['id']
        ann_id_modifier = full_coco['annotations'][-1]['id']
    else:
        for i in coco['images']:
            i['id'] += image_id_modifier
        for a in coco['annotations']:
            a['image_id'] += image_id_modifier
            a['id'] += ann_id_modifier
        
        full_coco['images'].extend(coco['images'])
        full_coco['annotations'].extend(coco['annotations'])
        image_id_modifier = full_coco['images'][-1]['id'] + 1
        ann_id_modifier = full_coco['annotations'][-1]['id'] + 1
        
with open(outpath.parents[0]/'sudenpesankangas_valid.json', 'w') as outfile: json.dump(full_coco, outfile)

Code

tile_folder = Path('../../data/raw/sudenpesankangas/virtual_plots/test/images/')
vector_folder = Path('../../data/raw/sudenpesankangas/virtual_plots/test/vectors/')

outpath = Path('../../data/processed/sudenpesankangas/test_512')

tiles = os.listdir(tile_folder)
vectors = [f for f in os.listdir(vector_folder) if f.endswith('geojson')]
assert len(tiles) == len(vectors)

Code

for t in tiles:
    if not os.path.exists(outpath/t[:-4]): os.makedirs(outpath/t[:-4])
    shp_fname = t.replace('tif', 'geojson')
    tilesize = 512
    tiler = Tiler(outpath=outpath/t[:-4], gridsize_x=tilesize, gridsize_y=tilesize, overlap=(0,0))
    tiler.tile_raster(str(tile_folder/t))
    tiler.tile_vector(vector_folder/shp_fname, min_area_pct=0.25)

Code

#Fix labelling, todo fix it in COCOProcessor

for p in os.listdir(outpath):
    files = [outpath/p/'vector_tiles'/f for f in os.listdir(outpath/p/'vector_tiles') if f.endswith('geojson')]
    for f in files: 
        gdf = gpd.read_file(f)
        bbox = box(*gdf.total_bounds)
        gdf['geometry'] = gdf.geometry.buffer(0) # fix faulty geometries
        gdf['geometry'] = gdf.apply(lambda row: fix_multipolys(row.geometry) if row.geometry.type == 'MultiPolygon' 
                                    else shapely.geometry.Polygon(row.geometry.exterior), axis=1)
        gdf = gpd.clip(gdf, bbox, keep_geom_type=True)
        gdf.to_file(f, driver='GeoJSON')

Code

# Convert to COCO format

from drone_detector.processing.coco import *

deadwood_categories = [
        {'supercategory':'deadwood', 'id':1, 'name': 'uprightwood'},
        {'supercategory':'deadwood', 'id':2, 'name': 'groundwood'},
    ]

from datetime import date

coco_info = {'description': 'Test dataset for deadwood detection in Sudenpesänkangas',
             'version': 0.1,
             'year': 2022,
             'contributor': 'Janne Mäyrä',
             'date_created': date.today().strftime("%Y/%m/%d")
}

coco_licenses = {}

for p in os.listdir(outpath):
    coco_processor = COCOProcessor(outpath/p, outpath/p, coco_info=coco_info, coco_licenses=coco_licenses,
                                  coco_categories=deadwood_categories)
    coco_processor.shp_to_coco('label')

Code

# Combine several coco-annotation .json files into one

import json

full_coco = None
image_id_modifier = 0
ann_id_modifier = 0
for p in os.listdir(outpath):
    with open(outpath/p/'coco.json') as f:
        coco = json.load(f)
    
    # update filename
    for i in coco['images']:
        i['file_name'] = f"{p}/raster_tiles/{i['file_name']}"
        
    if full_coco is None: 
        full_coco = coco
        image_id_modifier = full_coco['images'][-1]['id']
        ann_id_modifier = full_coco['annotations'][-1]['id']
    else:
        for i in coco['images']:
            i['id'] += image_id_modifier
        for a in coco['annotations']:
            a['image_id'] += image_id_modifier
            a['id'] += ann_id_modifier
        
        full_coco['images'].extend(coco['images'])
        full_coco['annotations'].extend(coco['annotations'])
        image_id_modifier = full_coco['images'][-1]['id'] + 1
        ann_id_modifier = full_coco['annotations'][-1]['id'] + 1
        
with open(outpath.parents[0]/'sudenpesankangas_test.json', 'w') as outfile: json.dump(full_coco, outfile)