一、背景介绍



二、数据准备
# 数据库连接和数据获取
import pandas as pd
import pymysql
from data.mapper import host, user, password, database
# 连接MySQL数据库
conn = pymysql.connect(
host=host,
user=user,
password=password,
database=database
)
# 从MySQL数据库中读取处理后的数据
query = "SELECT * FROM processed_customer_data"
df = pd.read_sql(query, conn)
# 关闭数据库连接
conn.close()
三、模型训练与评估
3.1 随机森林模型
# 随机森林模型训练与评估
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# 特征与标签分割
X = df.drop(columns=['Attrition_Flag'])
y = df['Attrition_Flag']
# 数据集划分
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)
# 随机森林模型训练
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
# 模型预测
y_pred = rf_model.predict(X_test)
# 模型评估
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

3.2 逻辑回归模型
# 逻辑回归模型训练与评估
from sklearn.linear_model import LogisticRegression
# 数据集划分
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 逻辑回归模型训练
logreg_model = LogisticRegression()
logreg_model.fit(X_train, y_train)
# 模型预测
y_pred = logreg_model.predict(X_test)
# 模型评估
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

四、数据可视化
# Django视图函数中的数据可视化
from django.shortcuts import render
from pyecharts.charts import Bar, Pie, Line
from pyecharts import options as opts
from pyecharts.globals import CurrentConfig, ThemeType
from web.service.task_service import get_custormer_age, get_income_category, get_education_level, get_credit_limit, \
get_months_inactive_12_mon
def bar_chart(request):
# 获取客户年龄分布数据
x, y = get_custormer_age()
line = (
Line()
.add_xaxis([str(age) for age in x])
.add_yaxis("Count", y)
.set_global_opts(
title_opts=opts.TitleOpts(title="客户年龄分布图"),
xaxis_opts=opts.AxisOpts(name="Age"),
yaxis_opts=opts.AxisOpts(name="Count"),
)
)
# 获取客户信用卡额度分布数据
x1, y1 = get_credit_limit()
line1 = (
Line()
.add_xaxis([str(age) for age in x1])
.add_yaxis("Count", y1)
.set_global_opts(
title_opts=opts.TitleOpts(title="客户信用卡额度top10分布图"),
xaxis_opts=opts.AxisOpts(name="Age"),
yaxis_opts=opts.AxisOpts(name="Count"),
)
)
# 获取客户非活跃月数分布数据
bar1 = Bar()
x1, y1 = get_months_inactive_12_mon()
bar1.add_xaxis(x1)
bar1.add_yaxis("客户去年非活跃月数分布", y1)
# 获取客户收入范围趋势数据
bar = Bar()
x, y = get_income_category()
bar.add_xaxis(x)
bar.add_yaxis("收入范围趋势图", y)
# 获取客户教育水平分布数据
pie = Pie()
tuple = get_education_level()
pie.add("教育水平分布图", tuple)
# 获取图表的JavaScript代码
line_js = line.render_embed()
bar_js = bar.render_embed()
pie_js = pie.render_embed()
bar1_js = bar1.render_embed()
line1_js = line1.render_embed()
return render(request, 'charts/bar_chart.html', {'line': line_js, 'bar': bar_js, 'pie': pie_js, 'line1': line1_js, 'bar1': bar1_js})


五、总结