先展示成果,这点玩意写了两天。
import yolo_utils
import pandas as pd
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 忽略警告
yolo2_model = ks.models.load_model("./model_data/yolov2.h5")
f=open("./model_data/coco_classes.txt")
classes = f.readlines()
classes = [c.strip() for c in classes]
yolo2_model.summary()
pic_set = []
for i in range(1, 121):
if i < 10:
src = cv2.resize(cv2.imread("./images/000" + str(i) + ".jpg"), (608, 608))
src = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
pic_set.append(src)
elif i < 100:
src = cv2.resize(cv2.imread("./images/00" + str(i) + ".jpg"), (608, 608))
src = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
pic_set.append(src)
else:
src = cv2.resize(cv2.imread("./images/0" + str(i) + ".jpg"), (608, 608))
src = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
pic_set.append(src)
pic_set = np.array(pic_set)
pic_set_255=pic_set.copy()
pic_set = pic_set / 255
block_size = 32
output = yolo2_model.predict(pic_set)
def decode(x):
x = x.reshape((x.shape[0], x.shape[1], x.shape[2], 5, 85))
xy_a = x[:, :, :, :, 0:2]
wh_a = x[:, :, :, :, 2:4]
pc_a = x[:, :, :, :, 4:5]
c_a = x[:, :, :, :, 5:]
xy = ks.backend.sigmoid(np.array(xy_a))
wh = ks.backend.exp(np.array(wh_a))
pc = ks.backend.sigmoid(np.array(pc_a))
c = ks.backend.softmax(np.array(c_a))
with tf.Session() as sess:
return sess.run(xy), sess.run(wh), sess.run(pc), sess.run(c) # 返回 图片数*分块数*分块数*锚框数*数据数目(xy:2,wh:2,pc:1,c:80)
def get_boxes(xy, wh, pc, c, block_size, threshold=0.6): # 锚框数*图片数*x分块数*y分块数*数据数目(xy:2,wh:2,pc:1,c:80)
transform_all = []
for i in range(pc.shape[0]):
transform_temp = []
for j in range(pc.shape[1]):
for k in range(pc.shape[2]):
for n in range(pc.shape[3]):
if (pc[i, j, k, n, 0] >= threshold):
left = block_size * k + xy[i, j, k, n, 0] * block_size - wh[i, j, k, n, 0] * block_size
right = block_size * k + xy[i, j, k, n, 0] * block_size + wh[i, j, k, n, 0] * block_size
up = block_size * j + xy[i, j, k, n, 1] * block_size - wh[i, j, k, n, 1] * block_size
down = block_size * j + xy[i, j, k, n, 1] * block_size + wh[i, j, k, n, 1] * block_size
record = np.array(
[up, left, down, right, pc[i, j, k, n, 0], classes[np.argmax(c[i, j, k, n, :])]])
transform_temp.append(record)
transform_temp = np.array(transform_temp)
transform_all.append(transform_temp)
transform_all = np.array(transform_all)
return transform_all
def non_max_suppress(boxes, max_outputsize=20, iou_threshold=0.6): # boxes维度:图片数*box数目*6
box = np.array(boxes)
box_coor = box[:, 0:4].astype(np.float32)
box_score = box[:, 4].astype(np.float32)
box_what = box[:, 5].astype(np.str)
selected_indices = tf.image.non_max_suppression(box_coor, box_score, max_outputsize, iou_threshold)
select_box = ks.backend.gather(box_coor, selected_indices)
select_score = ks.backend.gather(box_score, selected_indices)
select_what = ks.backend.gather(box_what, selected_indices)
with tf.Session() as sess:
box = sess.run(select_box)
score = sess.run(select_score)
what = sess.run(select_what)
sess.close()
return box,score,what
def draw_box(src, box, score, what):
for j in range(box.shape[0]):
out = cv2.rectangle(src, (int(box[j, 1]), int(box[j, 0])), (int(box[j, 3]), int(box[j, 2])), (255, 0, 0),
thickness=3)
title=str(np.around(score[j],2))+" "+str(what[j])
out = cv2.putText(out,title,(int(box[j, 1]), int(box[j, 0])-5),cv2.FONT_HERSHEY_SIMPLEX,0.3,(255,255,255))
return out
xy, wh, pc, c = decode(output)
boxes = get_boxes(xy, wh, pc, c, block_size)
writer=cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc(*'mp4v'),3,(608,608),isColor=True)
for i in range(boxes.shape[0]):
if boxes[i].shape[0]:
box,score,what = non_max_suppress(boxes[i],20,0.5)
output_scr=draw_box(pic_set_255[i], box,score,what)
else:
output_scr=pic_set_255[i]
output_scr=cv2.cvtColor(output_scr,cv2.COLOR_RGB2BGR)
writer.write(output_scr)
writer.release()