import rasterio as rio
import geopandas as gpd
from pathlib import Path
import rasterio.plot as rioplot
import matplotlib.pyplot as plt
COCO workflow
detectron2
= Path('workflow_examples/')
path_to_data = path_to_data/'104_28_Hiidenportti_Chunk1_orto.tif'
train_raster = path_to_data/'104_28_Hiidenportti_Chunk1_orto.geojson'
train_shp = path_to_data/'104_42_Hiidenportti_Chunk5_orto.tif'
test_raster = path_to_data/'104_42_Hiidenportti_Chunk5_orto.geojson' test_shp
Example data is RGB UAV imagery from Hiidenportti, and the task is to detect and segment different deadwood types. The reference data are annotated as polygons, and target column is layer
.
Training area looks like this.
= plt.subplots(1,2, dpi=150, figsize=(10,3))
fig, axs with rio.open(train_raster) as src:
=axs[0])
rioplot.show(src, ax= gpd.read_file(train_shp)
train_gdf ='layer', ax=axs[1], cmap='seismic')
train_gdf.plot(column'Train area')
plt.suptitle(
plt.tight_layout() plt.show()
And test area looks like this.
= plt.subplots(1,2, dpi=150, figsize=(5,3))
fig, axs with rio.open(test_raster) as src:
=axs[0])
rioplot.show(src, ax= gpd.read_file(test_shp)
test_gdf ='layer', ax=axs[1], cmap='seismic')
test_gdf.plot(column'Test area')
plt.suptitle(
plt.tight_layout() plt.show()
Install required dependencies
In order to install detectron2
, follow the instructions provided here.
Create COCO-format dataset
In this example, the data are split into 256x256 pixel tiles with no overlap. Also set the min_bbox_area
to 8 pixels so too small objects are discarded.
CLI
geo2ml_create_coco_dataset \
\
example_data/workflow_examples/104_28_Hiidenportti_Chunk1_orto.tif \
example_data/workflow_examples/104_28_Hiidenportti_Chunk1_orto.geojson layer \
example_data/workflow_examples/coco/train example_train --gridsize_y 256 \
--gridsize_x 256 --min_bbox_area 8
--ann_format polygon
geo2ml_create_coco_dataset \
\
example_data/workflow_examples/104_42_Hiidenportti_Chunk5_orto.tif \
example_data/workflow_examples/104_42_Hiidenportti_Chunk5_orto.geojson layer \
example_data/workflow_examples/coco/test example_test --gridsize_y 256 \
--gridsize_x 256 --min_bbox_area 8 --ann_format polygon
Python
from geo2ml.scripts.data import create_coco_dataset
= path_to_data/'coco'
outpath
=train_raster, polygon_path=train_shp, target_column='layer',
create_coco_dataset(raster_path=outpath/'train', output_format='gpkg', save_grid=False, allow_partial_data=True,
outpath='example_train', gridsize_x=256, gridsize_y=256,
dataset_name='polygon', min_bbox_area=8)
ann_format
=test_raster, polygon_path=test_shp, target_column='layer',
create_coco_dataset(raster_path=outpath/'test',output_format='gpkg', save_grid=False, allow_partial_data=True,
outpath='example_test', gridsize_x=256, gridsize_y=256, ann_format='polygon', min_bbox_area=8) dataset_name
Dataset structure
Above creates the dataset to path_to_data/'yolo'
, so that it contains folders train
and test
. Both of these folders contain
- folder
images
, which contains the tiled raster patches - folder
vectors
, which contain geojson-files corresponding to each file inimages
, if the location contains any annotations - file
coco_polygon.json
, which is the annotation file and info for the dataset
Train the model
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator, DatasetEvaluators
import os
First we need to register the datasets:
='example_train', # the name that identifies a dataset for this session
register_coco_instances(name={}, # extra metadata, can be left as an empty dict
metadata=outpath/'train/example_train.json', # Annotation file
json_file=outpath/'train/images/') # directory which contains all the images
image_root'example_test', {}, outpath/'test/example_test.json', outpath/'test/images') register_coco_instances(
And then modify the config file.
= get_cfg()
cfg "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.merge_from_file(model_zoo.get_config_file(= ("example_train",)
cfg.DATASETS.TRAIN = ("example_test",)
cfg.DATASETS.TEST = 4
cfg.DATALOADER.NUM_WORKERS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") # Let training initialize from model zoo
cfg.MODEL.WEIGHTS = 4
cfg.SOLVER.IMS_PER_BATCH = 100
cfg.TEST.EVAL_PERIOD = str(outpath/'runs')
cfg.OUTPUT_DIR = 300
cfg.SOLVER.MAX_ITER
cfg.MODEL.ROI_HEADS.NUM_CLASSES=True) os.makedirs(cfg.OUTPUT_DIR, exist_ok
Next create a trainer. Here we use DefaultTrainer
because of demo purposes.
= DefaultTrainer(cfg) trainer
First the trainer must resume_or_load
the checkpoint.
=False) trainer.resume_or_load(resume
Then it can be trained:
trainer.train()
For evaluation we need to build an evaluator to like the following:
= trainer.test(cfg, trainer.model,
results =DatasetEvaluators([COCOEvaluator('example_test',
evaluators=cfg.OUTPUT_DIR)])) output_dir
Results are returned as OrderedDict
:
results
OrderedDict([('bbox',
{'AP': 26.524945427964656,
'AP50': 54.61366682355787,
'AP75': 23.264070383011905,
'APs': 25.132186571431554,
'APm': 21.41516566172374,
'APl': nan,
'AP-groundwood': 23.99694519893256,
'AP-uprightwood': 29.052945656996748}),
('segm',
{'AP': 24.178996439781,
'AP50': 50.930807248720775,
'AP75': 19.645957078082983,
'APs': 19.084138095771337,
'APm': 30.276207887160094,
'APl': nan,
'AP-groundwood': 18.747639759369484,
'AP-uprightwood': 29.610353120192507})])
Other libraries
MMDetection is another commonly used library for object detection from COCO formatted datasets. According to these instructions, below should work:
# the new config inherits the base configs to highlight the necessary modification
= './cascade_mask_rcnn_r50_fpn_1x_coco.py
_base_
# 1. dataset settings
= 'CocoDataset'
dataset_type = ('Standing', 'Fallen')
classes = /workflow_examples/coco/'
data_root
train_dataloader = dict(
=2,
batch_size=2,
num_workers=dict(
datasettype=dataset_type,
=dict(classes=classes),
metainfo=data_root,
data_root='train/coco_polygon.json',
ann_file=dict(img='train/images')
data_prefix
)
) ...