目录
一、聚类分析
1、基本原理:
(1)根据K个中心将数据集按到中心值距离分簇
(2)将已分的数据集,根据平均向量再确定中心值
(3)重复1、2步骤,直至中心值不再移动(每次的差值与上次相同)
2、示例
import numpy as np
from scipy.cluster.vq import vq, kmeans, whiten
import matplotlib.pyplot as plt
fe = np.array([[1.9,2.0],
[1.7,2.5],
[1.6,3.1],
[0.1,0.1],
[0.8,0.3],
[0.4,0.3],
[0.22,0.1],
[0.4, 0.3],
[0.4,0.5],
[1.8,1.9]])
book = np.array((fe[0], fe[1]))
print(type(book))
print("book: \n",book)
codebook, distortion = kmeans(fe, book)
# 可以写kmeans(wf,2), 2表示两个质心,同时启用iter参数
print("codebook:", codebook)
print("distortion: ", distortion)
plt.scatter(fe[:,0], fe[:,1], c='g')
plt.scatter(codebook[:, 0], codebook[:, 1], c='r')
plt.show()
运行结果:红色为聚类中心
二、图像色彩聚类
(1)用PIL生成小尺寸的图片,用resize或者thumbnail(缩略图)聚类
(2)取出图像的色彩和频次,对色彩聚类并生成示意图
完整代码:
import numpy as np
from scipy.cluster.vq import vq, kmeans, whiten
import matplotlib.pyplot as plt
points=colorz('pic\image0.jpg',3)
print(points[0:10])
fe = np.array(points,dtype=float) #聚类需要是Float或者Double
print(fe[0:10])
book =np.array((fe[100],fe[1],fe[8],fe[8])) #聚类中心,初始值
print(type(book))
print("book: \n",book)
#codebook, distortion = kmeans(fe,book)
codebook, distortion = kmeans(fe,7) #7是聚类中心个数
# 可以写kmeans(wf,2), 2表示两个质心,同时启用iter参数
print("codebook:", codebook) #聚类中心
centers=np.array(codebook,dtype=int) #变为色彩,还得转为整数
print(centers)
print("distortion: ", distortion)
fe=np.array(points)
plt.scatter(fe[:,0], fe[:,2], c='b')
plt.scatter(codebook[:, 0], codebook[:,2], c='r') #聚类中心
plt.show()
运行结果:
生成了7个聚类中心
三、 将色彩提取体现在Flask网页中
可以另起imgeColor.py,也可以直接加函数
加函数完整代码呈现:
main.py
from flask import Flask, render_template, request
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from scipy.cluster.vq import vq, kmeans, whiten
import os
app = Flask(__name__)
def aHash(img):
# 缩放为8*8
img = cv2.resize(img, (8, 8))
# 转换为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# s为像素和初值为0,hash_str为hash值初值为''
s = 0
hash_str = ''
# 遍历累加求像素和
for i in range(8):
for j in range(8):
s = s + gray[i, j]
# 求平均灰度
avg = s / 64
# 灰度大于平均值为1相反为0生成图片的hash值
for i in range(8):
for j in range(8):
if gray[i, j] > avg:
hash_str = hash_str + '1'
else:
hash_str = hash_str + '0'
return hash_str
# Hash值对比
def cmpHash(hash1, hash2):
n = 0
print(hash1)
print(hash2)
# hash长度不同则返回-1代表传参出错
if len(hash1) != len(hash2):
return -1
# 遍历判断
for i in range(len(hash1)):
# 不相等则n计数+1,n最终为相似度
if hash1[i] != hash2[i]:
n = n + 1
return n
def genFrame():
v_path = 'static/ghz.mp4'
image_save = 'static/hash'
if not (os.path.exists(image_save)):
print(image_save)
os.mkdir(image_save)
cap = cv2.VideoCapture(v_path)
fc = cap.get(cv2.CAP_PROP_FRAME_COUNT)
print(fc)
_, img1 = cap.read()
cv2.imwrite('static/hash/image{}.jpg'.format(0), img1)
print(int(fc))
for i in range(1000):
_, img2 = cap.read()
hash1 = aHash(img1)
hash2 = aHash(img2)
n = cmpHash(hash1, hash2)
# print('均值哈希算法相似度:', n)
if (n >22):
cv2.imwrite('static/hash/image{}.jpg'.format(i+1),img2)
img1 = img2
def colorz(filename, n=3):
img = Image.open(filename)
img = img.rotate(-90)
img.thumbnail((200, 200))
w, h = img.size
points = []
for count, color in img.getcolors(w * h):
points.append(color)
return points
def kmeansColor(img, n):
points = colorz(img, 3)
fe = np.array(points, dtype=float)
codebook, distortion = kmeans(fe, n)
centers = np.array(codebook, dtype=int)
return centers
@app.route('/')
def index():
genFrame()
picname = request.args.get("picname", type=str)
if not picname:
picname = 'static/hash/image0.jpg'
pic = 'static/hash/image'
imgcolors = kmeansColor(picname, 5)
path = 'static/hash'
filename = os.listdir(path)
framecount = len(filename)
filename.sort(key=lambda x: int(x[5:-4]))
return render_template('index.html', pic1=pic, imgcolors=imgcolors, filename=filename, framecount=framecount)
if "__main__" == __name__:
app.run(port="5080")
index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Flask分镜</title>
</head>
<body style="background-color:#74759b">
视频分镜
<br>
<video width="640" height="480" controls autoplay>
<source src="static/ghz.mp4" type="video/mp4">
<object data="static/ghz.mp4" width="640" height="480">
<embed width="640" height="480" src="static/ghz.mp4">
</object>
</video>
<br>
哈希分镜帧数:{{framecount}}<br>
{{imgcolors}}<br>
{% for c in imgcolors %}
<font style="color:rgb({{c[0]}},{{c[1]}},{{c[2]}})">图像色彩</font>
{% endfor %}
<br>
{% for i in range(framecount) %}
<a href="/?picname=static/hash/{{filename[i]}}"><img height="40" src="static/hash/{{filename[i]}}" /></a>
{{filename[i]}}
{% endfor %}
{% for c in imgcolors %}
<div style="background-color:rgb({{c[0]}},{{c[1]}},{{c[2]}}); width: 40%; height: 30%;">
<font style="color:rgb({{c[0]}},{{c[1]}},{{c[2]}});font-size:50px">1</font>
</div>
{% endfor %}
</body>
</html>
运行结果:
注意:
点击分镜头图像,即可视化对应的聚类图像色彩。