这篇文章的目的,1.解析Crowdhuman数据集;2.准备Crowdhuman相应配置,训练yolov5。
其中我的是将273271,1017c000ac1360b7.jpg,全部写为273271_1017c000ac1360b7.jpg
1.环境
ubuntu16.04
cuda10.1
cudnn7
python3.6
Cython
matplotlib>=3.2.2
numpy>=1.18.5
opencv-python>=4.1.2
Pillow
PyYAML>=5.3
scipy>=1.4.1
tensorboard>=2.2
torch>=1.7.0 (my 1.7.1)
torchvision>=0.8.1 (my 0.8.2)
tqdm>=4.41.0
seaborn>=0.11.0
easydict
thop
pycocotools>=2.0
2.下载
(1)crowdhuman数据集:CrowdHuman Dataset
(2)yolov5的工程
GitHub - ultralytics/yolov5: YOLOv5 🚀 in PyTorch > ONNX > CoreML > TFLite
(3)yolov5预训练模型下载
放到yolov5/weights目录下
3.解析Crowdhuman数据集
这里有两个参考:参考的处理crowdhuman数据集的源码,另一个是一篇应用的博客。
YOLOv5-Tools/CrowHuman2YOLO/data at main · shaoshengsong/YOLOv5-Tools · GitHub
目标检测 YOLOv5 CrowdHuman数据集格式转YOLOv5格式_深度学习-CSDN博客_crowdhuman数据集转yolo格式
由于我要的图片格式和作者的稍微有点不同,但是还是需要花点时间调试一下。比如目录结构,以及odgt中图片名称等。下面进入正题。
(1)首先将下载的源码YOLOv5-Tools/CrowHuman2YOLO/data目录下的数据,放到yolov5/data目录下:
(2)将下载完成的数据,放到raw目录下
(3)修改脚本prepare_data.sh 和 gen_txts.py
#prepare_data.sh line 47-48 insert this:
cd ../crowdhuman-$1/
for file in `ls | grep .jpg`
do
newfile=`echo $file | sed 's/,/_/g'`
mv $file $newfile
echo Rename file : $newfile ...
done
# gen_txts.py change line80:
ID = anno['ID'] # e.g. '273271,c9db000d5146c15'
# to
ID = anno['ID'].replace(',', '_') # e.g. '273271,c9db000d5146c15'
完整代码 prepare_data.sh 和 gen_txts.py:
#!/bin/bash
# prepare_data.sh
set -e
# check argument
if [[ -z $1 || ! $1 =~ [[:digit:]]x[[:digit:]] ]]; then
echo "ERROR: This script requires 1 argument, \"input dimension\" of the YOLO model."
echo "The input dimension should be {width}x{height} such as 608x608 or 416x256.".
exit 1
fi
if which python3 > /dev/null; then
PYTHON=python3
else
PYTHON=python
fi
pushd $(dirname $0)/raw > /dev/null
get_file()
{
# do download only if the file does not exist
if [[ -f $2 ]]; then
echo Skipping $2
else
echo Downloading $2...
python3 -m gdown.cli $1
fi
}
echo "** Download dataset files"
# unzip image files (ignore CrowdHuman_test.zip for now)
echo "** Unzip dataset files"
for f in CrowdHuman_train01.zip CrowdHuman_train02.zip CrowdHuman_train03.zip CrowdHuman_val.zip ; do
unzip -n ${f}
done
echo "** Create the crowdhuman-$1/ subdirectory"
rm -rf ../crowdhuman-$1/
mkdir ../crowdhuman-$1/
ln Images/*.jpg ../crowdhuman-$1/
# the crowdhuman/ subdirectory now contains all train/val jpg images
# change file ','with '_'
cd ../crowdhuman-$1/
for file in `ls | grep .jpg`
do
newfile=`echo $file | sed 's/,/_/g'`
mv $file $newfile
echo Rename file : $newfile ...
done
echo "** Generate yolo txt files"
cd ..
${PYTHON} gen_txts.py $1
popd > /dev/null
echo "** Done."
"""gen_txts.py
To generate YOLO txt files from the original CrowdHuman annotations.
Please also refer to README.md in this directory.
Inputs:
* raw/annotation_train.odgt
* raw/annotation_val.odgt
* crowdhuman-{width}x{height}/[IDs].jpg
Outputs:
* crowdhuman-{width}x{height}train.txt
* crowdhuman-{width}x{height}/test.txt
* crowdhuman-{width}x{height}/[IDs].txt (one annotation for each image in the training or test set)
"""
import json
from pathlib import Path
from argparse import ArgumentParser
import numpy as np
import cv2
# input image width/height of the yolov4 model, set by command-line argument
INPUT_WIDTH = 0
INPUT_HEIGHT = 0
# Minimum width/height of objects for detection (don't learn from
# objects smaller than these
MIN_W = 5
MIN_H = 5
# Do K-Means clustering in order to determine "anchor" sizes
DO_KMEANS = True
KMEANS_CLUSTERS = 9
BBOX_WHS = [] # keep track of bbox width/height with respect to 608x608
def image_shape(ID, image_dir):
assert image_dir is not None
jpg_path = image_dir / ('%s.jpg' % ID)
img = cv2.imread(jpg_path.as_posix())
return img.shape
def txt_line(cls, bbox, img_w, img_h):
"""Generate 1 line in the txt file."""
assert INPUT_WIDTH > 0 and INPUT_HEIGHT > 0
x, y, w, h = bbox
x = max(int(x), 0)
y = max(int(y), 0)
w = min(int(w), img_w - x)
h = min(int(h), img_h - y)
w_rescaled = float(w) * INPUT_WIDTH / img_w
h_rescaled = float(h) * INPUT_HEIGHT / img_h
if w_rescaled < MIN_W or h_rescaled < MIN_H:
return ''
else:
if DO_KMEANS:
global BBOX_WHS
BBOX_WHS.append((w_rescaled, h_rescaled))
cx = (x + w / 2.) / img_w
cy = (y + h / 2.) / img_h
nw = float(w) / img_w
nh = float(h) / img_h
return '%d %.6f %.6f %.6f %.6f\n' % (cls, cx, cy, nw, nh)
def process(set_='test', annotation_filename='raw/annotation_val.odgt',
output_dir=None):
"""Process either 'train' or 'test' set."""
assert output_dir is not None
output_dir.mkdir(exist_ok=True)
jpgs = []
with open(annotation_filename, 'r') as fanno:
for raw_anno in fanno.readlines():
anno = json.loads(raw_anno)
ID = anno['ID'].raplace(',', '_') # e.g. '273271_c9db000d5146c15'
print('Processing ID: %s' % ID)
img_h, img_w, img_c = image_shape(ID, output_dir)
assert img_c == 3 # should be a BGR image
txt_path = output_dir / ('%s.txt' % ID)
# write a txt for each image
with open(txt_path.as_posix(), 'w') as ftxt:
for obj in anno['gtboxes']:
if obj['tag'] == 'mask':
continue # ignore non-human
assert obj['tag'] == 'person'
if 'hbox' in obj.keys(): # head
line = txt_line(0, obj['hbox'], img_w, img_h)
if line:
ftxt.write(line)
if 'fbox' in obj.keys(): # full body
line = txt_line(1, obj['fbox'], img_w, img_h)
if line:
ftxt.write(line)
jpgs.append('data/%s/%s.jpg' % (output_dir, ID))
# write the 'data/crowdhuman/train.txt' or 'data/crowdhuman/test.txt'
set_path = output_dir / ('%s.txt' % set_)
with open(set_path.as_posix(), 'w') as fset:
for jpg in jpgs:
fset.write('%s\n' % jpg)
def rm_txts(output_dir):
"""Remove txt files in output_dir."""
for txt in output_dir.glob('*.txt'):
if txt.is_file():
txt.unlink()
def main():
global INPUT_WIDTH, INPUT_HEIGHT
parser = ArgumentParser()
parser.add_argument('dim', help='input width and height, e.g. 608x608')
args = parser.parse_args()
dim_split = args.dim.split('x')
if len(dim_split) != 2:
raise SystemExit('ERROR: bad spec of input dim (%s)' % args.dim)
INPUT_WIDTH, INPUT_HEIGHT = int(dim_split[0]), int(dim_split[1])
if INPUT_WIDTH % 32 != 0 or INPUT_HEIGHT % 32 != 0:
raise SystemExit('ERROR: bad spec of input dim (%s)' % args.dim)
output_dir = Path('crowdhuman-%s' % args.dim)
if not output_dir.is_dir():
raise SystemExit('ERROR: %s does not exist.' % output_dir.as_posix())
rm_txts(output_dir)
process('test', 'raw/annotation_val.odgt', output_dir)
process('train', 'raw/annotation_train.odgt', output_dir)
with open('crowdhuman-%s.data' % args.dim, 'w') as f:
f.write("""classes = 2
train = data/crowdhuman-%s/train.txt
valid = data/crowdhuman-%s/test.txt
names = data/crowdhuman.names
backup = backup/\n""" % (args.dim, args.dim))
if DO_KMEANS:
try:
from sklearn.cluster import KMeans
except ModuleNotFoundError:
print('WARNING: no sklearn, skipping anchor clustering...')
else:
X = np.array(BBOX_WHS)
kmeans = KMeans(n_clusters=KMEANS_CLUSTERS, random_state=0).fit(X)
centers = kmeans.cluster_centers_
centers = centers[centers[:, 0].argsort()] # sort by bbox w
print('\n** for yolov5-%dx%d, ' % (INPUT_WIDTH, INPUT_HEIGHT), end='')
print('resized bbox width/height clusters are: ', end='')
print(' '.join(['(%.2f, %.2f)' % (c[0], c[1]) for c in centers]))
print('\nanchors = ', end='')
print(', '.join(['%d,%d' % (int(c[0]), int(c[1])) for c in centers]))
if __name__ == '__main__':
main()
(4)运行数据处理脚本
bash ./prepare_data.sh 608x608
4.训练
添加一个yaml配置文件在data目录下:
# crowdhuman.yaml
train: "./data/crowdhuman-608x608/train.txt"
val: "./data/crowdhuman-608x608/test.txt"
nc: 2
names: ["head", "person"]
进入yolov5目录下,运行以下脚本(其中models中选择的模型大小配置文件,与weights中的预训练模型应该是一致):
python train.py --data data/crowdhuman.yaml --cfg models/yolov5m.yaml --weights weights/yolov5m.pt --device 0
这里就欧克了。