0
点赞
收藏
分享

微信扫一扫

协同过滤算法代码(itemCF\userCF)


基于物品的协同过滤算法:给用户推荐与他之前喜欢的物品相似的物品;

基于用户的协同过滤算法:给用户推荐与他兴趣相似的用户喜欢的物品;


实现协同过滤算法,大致几个关键步骤:

1:根据历史数据收集用户偏好

2:过滤噪音

3:计算相似度(​​余弦相似度计算​​、​​欧氏距离计算​​、Jaccard系数、皮尔森相关系数),根据特定的业务进行计算

4:找到与目标最相邻的K个相似的用户(基于用户userCF)或物品(基于物品itemCF)

5:根据特定的业务进行过滤数据进行推荐


userCF和itemCF区别

userCF推荐是与用户兴趣相同的用户们喜欢的物品,哪怕用户根本不认识这些人

itemCF推荐是与用户之前感兴趣的物品相似的物品

userCF更横向更社会化

itemCF更纵向更个性化

userCF要维护一个用户相似度矩阵,适合用户较少的场合

itemCF要维护一个物品相似度矩阵,适合物品较少的场合


处理用户

  case class Weight(hot:Double,score:Double)
case class UserBean(userId:String,videoCode:String,weight:Weight)
case class ItemBean(videoCode:String,weight:Weight)

// 用户-物品倒排列表
val userMap = mutable.HashMap[String,mutable.HashMap[String,Weight]]()
// 物品-用户倒排列表
val itemMap = mutable.HashMap[String,mutable.HashSet[String]]()

def readData(userArray: Array[UserBean]): mutable.HashMap[String,mutable.HashMap[String,Weight]] ={

userArray.foreach(u=>{
val user = userMap.getOrElse(u.userId,mutable.HashMap[String,Weight]())
if(user.isEmpty) userMap += (u.userId -> user)
user += (u.videoCode -> u.weight)

val item = itemMap.getOrElse(u.videoCode,mutable.HashSet[String]())
if(item.isEmpty) itemMap += (u.videoCode -> item)
item += u.userId

})

userMap
}


itemCF数据计算

  def itemResult(): mutable.HashMap[String,mutable.HashMap[String,Weight]] ={

val bool = true

val itemMatrix = mutable.HashMap[String,mutable.HashMap[String,Weight]]()
val itemCount = mutable.HashMap[String,Double]()
userMap.foreach(u=>{
u._2.foreach(i=>{
val iMap = itemMatrix.getOrElse(i._1,mutable.HashMap[String,Weight]())
if(iMap.isEmpty) itemMatrix += (i._1 -> iMap)
itemCount += (i._1 -> (itemCount.getOrElse(i._1,0.0)+1))

u._2.foreach(j=>{
if(!i._1.equals(j._1)){
// 计算权重的方式有很多种
val w = iMap.getOrElse(j._1,Weight(0.0,0.0))
if(bool){
iMap += (j._1 -> Weight((1.0/math.log1p(u._2.size))+w.hot,0+w.score))
}else{

// 用户对物品带有评分、喜爱度等的,可以使用用户对对物品的权重进行计算距离
iMap += (j._1 -> Weight(math.min(i._2.hot,j._2.hot)/math.max(i._2.hot,j._2.hot)+w.hot,
math.min(i._2.hot,j._2.score)/math.max(i._2.score,j._2.hot)+w.score))

// 也可以使用其他计算距离的公式计算权重,如: 余弦相似度、欧几里得距离、皮尔逊相关系数等
}

}
})
})
})

if(bool){
itemMatrix.foreach(i=>{
i._2.foreach(j=>{
val jMap = itemMatrix.getOrElse(i._1,mutable.HashMap[String,Weight]())
// 1/log1+|N(i)|/sprt(N(i)*N(j)) 惩罚了用户u和用户j共同兴趣列表中热门物品对他们相似度的影响
val weight = jMap.getOrElse(j._1,Weight(0,0)).hot/math.sqrt(itemCount.getOrElse(i._1,0.0)*itemCount.getOrElse(j._1,0.0))
jMap += (j._1 -> Weight(weight,0.0))

})
})
}

itemMatrix
}


itemCF对用户进行推荐

  def getItemRecommend(itemMatrix:mutable.HashMap[String,mutable.HashMap[String,Weight]],userArray: Array[UserBean],
k:Int): Unit ={

val set = mutable.HashSet[String]()
set ++= userArray.map(_.videoCode)

val recommends = mutable.HashMap[String,Weight]()

userArray.foreach(u=>{
val videoList = itemMatrix.getOrElse(u.videoCode,mutable.HashMap[String,Weight]())
// 选择权重高的前k个相似的物品推荐
videoList.toList.sortBy(_._2.hot).reverse.take(k).map(v=>{
if(!set.contains(v._1)){
// 推荐给用户物品的权重 = 用户当前物品权重 * 当前物品的相关物品权重
// recommends += (v._1 -> Weight((recommends.getOrElse(v._1,Weight(0.0,0.0)).hot+v._2.hot)*u.weight.hot,(0.0+v._2.score)*v._2.score))
recommends += (v._1 -> Weight(recommends.getOrElse(v._1,Weight(0.0,0.0)).hot+v._2.hot,(0.0+v._2.score)*v._2.score))
}
})
})

println(recommends.toList.sortBy(_._2.hot).reverse.take(10).mkString(";"))
}


userCF数据计算

  def userResult(): mutable.HashMap[String,mutable.HashMap[String,Double]] ={
val userCount = mutable.HashMap[String,Double]()
val userMatrix = mutable.HashMap[String,mutable.HashMap[String,Double]]()

val bool = true

itemMap.foreach(us=>{

us._2.foreach(u=>{
val uMap = userMatrix.getOrElse(u,mutable.HashMap[String,Double]())
if(uMap.isEmpty) userMatrix += (u -> uMap)
userCount += (u -> (userCount.getOrElse(u,0.0)+1))

us._2.foreach(v=>{
if(!u.equals(v)){
if(bool){
uMap += (v -> (1.0/math.log1p(us._2.size*1.0)+uMap.getOrElse(v,0.0)))
}
}
})
})
})

if(bool){
userMatrix.foreach(u=>{
u._2.foreach(v=>{
val jMap = userMatrix.getOrElse(u._1,mutable.HashMap[String,Double]())
// 1/log1+|N(i)|/sprt(N(i)*N(j)) 惩罚了用户u和用户j共同兴趣列表中热门物品对他们相似度的影响
val weight = jMap.getOrElse(v._1,0.0)/math.sqrt(userCount.getOrElse(u._1,0.0)*userCount.getOrElse(v._1,0.0))
jMap += (v._1 -> weight)
})
})
}

userMatrix
}


userCF对用户进行推荐

  def getUserRecommend(userMatrix : mutable.HashMap[String,mutable.HashMap[String,Double]],userArray: Array[UserBean],
k:Int): Unit ={
val set = mutable.HashSet[String]()
set ++= userArray.map(_.videoCode)

val userId = userArray(0).userId

val recommends = mutable.HashMap[String,Double]()

val users = userMatrix.getOrElse(userId,null)
if(users == null) return

// 选择用户权重高的前k个用户的物品做推荐
users.toList.sortBy(_._2).reverse.take(k).foreach(u=>{

val userVideo = userMap.getOrElse(u._1,null)
if(userVideo != null){
userVideo.foreach(v=>{
if(!set.contains(v._1)){

// 用户对应的物品权重可以以多种形式计算
recommends += (v._1 -> (recommends.getOrElse(v._1,0.0)+u._2))

// recommends += (v._1 -> (recommends.getOrElse(v._1,0.0)+v._2.hot))
// recommends += (v._1 -> (u._2*v._2.hot + recommends.getOrElse(v._1,0.0)))
}
})
}

})
println(userId +" : "+recommends.toList.sortBy(_._2).reverse.take(10).mkString(";"))
}


数据测试

  def main(args: Array[String]): Unit = {

var userArray = Array[UserBean]()
userArray = userArray :+ UserBean("1","A",Weight(2.0,0.0))
userArray = userArray :+ UserBean("2","B",Weight(3.0,0.0))
userArray = userArray :+ UserBean("1","B",Weight(4.0,0.0))
userArray = userArray :+ UserBean("1","C",Weight(1.0,0.0))
userArray = userArray :+ UserBean("3","C",Weight(1.0,0.0))
userArray = userArray :+ UserBean("4","H",Weight(1.0,0.0))
userArray = userArray :+ UserBean("4","A",Weight(1.0,0.0))
userArray = userArray :+ UserBean("4","B",Weight(1.0,0.0))
userArray = userArray :+ UserBean("5","E",Weight(1.0,0.0))
userArray = userArray :+ UserBean("5","A",Weight(1.0,0.0))

readData(userArray)
println("=======itemCF")
val b = itemResult()
val u = userArray.groupBy(_.userId)
getItemRecommend(b,u.getOrElse("1",null),20)
getItemRecommend(b,u.getOrElse("2",null),20)
getItemRecommend(b,u.getOrElse("3",null),20)
getItemRecommend(b,u.getOrElse("4",null),20)


println("=======userCF")
val uMap = userResult()
getUserRecommend(uMap,u.getOrElse("1",null),20)
getUserRecommend(uMap,u.getOrElse("2",null),20)
getUserRecommend(uMap,u.getOrElse("3",null),20)
getUserRecommend(uMap,u.getOrElse("4",null),20)
}

举报

相关推荐

0 条评论