一、聚类分析

1、基本原理：

（1）根据K个中心将数据集按到中心值距离分簇

（2）将已分的数据集，根据平均向量再确定中心值

（3）重复1、2步骤，直至中心值不再移动（每次的差值与上次相同）

2、示例

import numpy as np
from scipy.cluster.vq import vq, kmeans, whiten
import matplotlib.pyplot as plt

fe = np.array([[1.9,2.0],
                     [1.7,2.5],
                     [1.6,3.1],
                     [0.1,0.1],
                     [0.8,0.3],
                     [0.4,0.3],
                     [0.22,0.1],
                     [0.4, 0.3],
                     [0.4,0.5],
                     [1.8,1.9]])

book = np.array((fe[0], fe[1]))
print(type(book))
print("book: \n",book)

codebook, distortion = kmeans(fe, book)
# 可以写kmeans(wf,2)， 2表示两个质心，同时启用iter参数
print("codebook:", codebook)
print("distortion: ", distortion)

plt.scatter(fe[:,0], fe[:,1], c='g')
plt.scatter(codebook[:, 0], codebook[:, 1], c='r')
plt.show()

运行结果：红色为聚类中心

二、图像色彩聚类

（1）用PIL生成小尺寸的图片，用resize或者thumbnail（缩略图）聚类

（2）取出图像的色彩和频次，对色彩聚类并生成示意图

完整代码：

import numpy as np
from scipy.cluster.vq import vq, kmeans, whiten
import matplotlib.pyplot as plt


points=colorz('pic\image0.jpg',3)
print(points[0:10])

fe = np.array(points,dtype=float)   #聚类需要是Float或者Double
print(fe[0:10])
book =np.array((fe[100],fe[1],fe[8],fe[8]))   #聚类中心，初始值
print(type(book))
print("book: \n",book)

#codebook, distortion = kmeans(fe,book)
codebook, distortion = kmeans(fe,7)   #7是聚类中心个数
# 可以写kmeans(wf,2)， 2表示两个质心，同时启用iter参数

print("codebook:", codebook)   #聚类中心
centers=np.array(codebook,dtype=int)  #变为色彩，还得转为整数
print(centers)
print("distortion: ", distortion)

fe=np.array(points)
plt.scatter(fe[:,0], fe[:,2], c='b')
plt.scatter(codebook[:, 0], codebook[:,2], c='r')   #聚类中心
plt.show()

运行结果：

生成了7个聚类中心

三、将色彩提取体现在Flask网页中

可以另起imgeColor.py，也可以直接加函数

加函数完整代码呈现：

main.py

from flask import Flask, render_template, request
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from scipy.cluster.vq import vq, kmeans, whiten
import os

app = Flask(__name__)


def aHash(img):
    # 缩放为8*8
    img = cv2.resize(img, (8, 8))

    # 转换为灰度图
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # s为像素和初值为0，hash_str为hash值初值为''
    s = 0
    hash_str = ''
    # 遍历累加求像素和
    for i in range(8):
        for j in range(8):
            s = s + gray[i, j]
    # 求平均灰度
    avg = s / 64
    # 灰度大于平均值为1相反为0生成图片的hash值
    for i in range(8):
        for j in range(8):
            if gray[i, j] > avg:
                hash_str = hash_str + '1'
            else:
                hash_str = hash_str + '0'
    return hash_str


# Hash值对比
def cmpHash(hash1, hash2):
    n = 0
    print(hash1)
    print(hash2)
    # hash长度不同则返回-1代表传参出错
    if len(hash1) != len(hash2):
        return -1
    # 遍历判断
    for i in range(len(hash1)):
        # 不相等则n计数+1，n最终为相似度
        if hash1[i] != hash2[i]:
            n = n + 1
    return n

def genFrame():
    v_path = 'static/ghz.mp4'
    image_save = 'static/hash'

    if not (os.path.exists(image_save)):
        print(image_save)
        os.mkdir(image_save)

    cap = cv2.VideoCapture(v_path)
    fc = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    print(fc)
    _, img1 = cap.read()
    cv2.imwrite('static/hash/image{}.jpg'.format(0), img1)
    print(int(fc))
    for i in range(1000):
        _, img2 = cap.read()
        hash1 = aHash(img1)
        hash2 = aHash(img2)
        n = cmpHash(hash1, hash2)
        # print('均值哈希算法相似度：', n)
        if (n >22):
            cv2.imwrite('static/hash/image{}.jpg'.format(i+1),img2)
            img1 = img2

def colorz(filename, n=3):
    img = Image.open(filename)
    img = img.rotate(-90)
    img.thumbnail((200, 200))
    w, h = img.size
    points = []
    for count, color in img.getcolors(w * h):
        points.append(color)
    return points


def kmeansColor(img, n):
    points = colorz(img, 3)
    fe = np.array(points, dtype=float)
    codebook, distortion = kmeans(fe, n)
    centers = np.array(codebook, dtype=int)
    return centers


@app.route('/')
def index():
    genFrame()
    picname = request.args.get("picname", type=str)
    if not picname:
        picname = 'static/hash/image0.jpg'
    pic = 'static/hash/image'
    imgcolors = kmeansColor(picname, 5)
    path = 'static/hash'
    filename = os.listdir(path)
    framecount = len(filename)
    filename.sort(key=lambda x: int(x[5:-4]))
    return render_template('index.html', pic1=pic, imgcolors=imgcolors, filename=filename, framecount=framecount)


if "__main__" == __name__:
    app.run(port="5080")

index.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Flask分镜</title>
</head>
<body style="background-color:#74759b">
视频分镜
<br>
<video width="640" height="480" controls autoplay>
  <source src="static/ghz.mp4" type="video/mp4">
  <object data="static/ghz.mp4" width="640" height="480">
    <embed width="640" height="480" src="static/ghz.mp4">
  </object>
</video>
<br>
哈希分镜帧数：{{framecount}}<br>
{{imgcolors}}<br>
{% for c in imgcolors %}
<font style="color:rgb({{c[0]}},{{c[1]}},{{c[2]}})">图像色彩</font>
{% endfor %}
<br>
{% for i in range(framecount) %}
<a href="/?picname=static/hash/{{filename[i]}}"><img height="40" src="static/hash/{{filename[i]}}" /></a>
{{filename[i]}}
{% endfor %}
{% for c in imgcolors %}
<div style="background-color:rgb({{c[0]}},{{c[1]}},{{c[2]}}); width: 40%; height: 30%;">
    <font style="color:rgb({{c[0]}},{{c[1]}},{{c[2]}});font-size:50px">1</font>
</div>
{% endfor %}
</body>
</html>

运行结果：