0
点赞
收藏
分享

微信扫一扫

scala:关联规则

陈情雅雅 2022-04-25 阅读 82

FP-Growth算法用代码实现:

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.fpm.FPGrowth
object FP {
  def main(args: Array[String]) {
    val conf = new SparkConf().setMaster("local").setAppName("FP") //设定名称
    val sc = new SparkContext(conf)                                 //创建环境变量实例
    val data_path = "D:\\a\\b\\c\\abc.txt"
    val data = sc.textFile(data_path)
    val examples = data.map(_.split(" "))
    val minSupport = 0.2
    val model = new FPGrowth().setMinSupport(minSupport).run(examples) //打印结果
    println(s"Number of frequent itemsets: ${model.freqItemsets.count()}")
    //输出满足最小置信度的关联规则及置信度
    model.generateAssociationRules(0.8).collect().foreach
    {
      rule =>
        println("[" + rule.antecedent.mkString(",")
          + "=>"
          + rule.consequent.mkString(",") + "]," + rule.confidence)
    }
 
    //输出所有的频繁项
    model.freqItemsets.collect().foreach {
      itemset =>
        println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq)
    }
  }
}

Apriori算法用代码实现:

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.fpm.AssociationRules
import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset
 
object AssociationRule{
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local").setAppName("ap")
    val sc = new SparkContext(conf)
 
    val freqItemsets = sc.parallelize(Seq(
      new FreqItemset(Array("a"), 15L),
      new FreqItemset(Array("b"), 35L),
      new FreqItemset(Array("a", "b"), 12L)
    ))
 
    val ar = new AssociationRules().setMinConfidence(0.8)
    val results = ar.run(freqItemsets)
 
    results.collect().foreach {
      rule =>
        println("[" + rule.antecedent.mkString(",")
        + "=>"
        + rule.consequent.mkString(",") + "]," + rule.confidence)
    }
  }
}
举报

相关推荐

0 条评论