高斯分布 最大似然估计MLE 最小二乘法的本质Logistic回归 分类问题的首选算法重要技术 梯度下降算法 最大似然估计 特征选择 交叉验证
y=ax+b (一个变量)
误差ε (i) (1≤i≤m)是独立同分布的,服从均值
为0,方差为某定值σ 2 的高斯分布。
素的独立影响的综合反应,往往近似服从正态分布。 城市耗电量:大量用户的耗电量总和 测量误差:许多观察不到的、微小误差的总和似然函数
Elastic Net正则化
把样本分出一部分验证数据,如三折交叉验证 可以分为 训练数据-训练数据-验证数据-测试数据
CrossValidator cv=new CrossValidator() .setEstimator(pipeline) .setEvaluator(new RegressionEvaluator() .setLabelCol("rating") .setPredictionCol("predict_rating") .setMetricName("rmse")) .setEstimatorParamMaps(paramGrid) .setNumFolds(5);
如特征为x1、x2 输出为y
LogisticRegression实现 分类同理
import java.io.PrintWriterimport java.utilimport org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute}import org.apache.spark.ml.classification.{BinaryLogisticRegressionTrainingSummary, LogisticRegressionModel, LogisticRegression}import org.apache.spark.mllib.classification.LogisticRegressionWithSGDimport org.apache.spark.mllib.linalg.Vectorsimport org.apache.spark.rdd.RDDimport org.apache.spark.sql.{SQLContext, DataFrame, Row}import org.apache.spark.sql.types.{DataTypes, StructField}import org.apache.spark.{SparkContext, SparkConf}object LogisticRegression { def main(args: Array[String]) { val conf = new SparkConf().setAppName("test").setMaster("local") val sc = new SparkContext(conf) val sql = new SQLContext(sc); val training: DataFrame = sql.read.format("libsvm").load("a.txt")// val training = sc.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") val data: RDD[String] = sc.textFile("string.txt") val rw= data.map{ row => var split: Array[String] = row.split(",") Row(split(0).toDouble,Vectors.dense(split(1).toDouble,split(2).toDouble)) } val defaultAttr = NumericAttribute.defaultAttr val attrs = Array("f1", "f2").map(defaultAttr.withName) val attrGroup = new AttributeGroup("features", attrs.asInstanceOf[Array[Attribute]]) val fields = new util.ArrayList[StructField]; fields.add(DataTypes.createStructField("label", DataTypes.DoubleType, true)); fields.add(attrGroup.toStructField()); val structType = DataTypes.createStructType(fields); val df: DataFrame = sql.createDataFrame(rw,structType) df.printSchema() df.show() val lr = new LogisticRegression() .setMaxIter(10) .setRegParam(0.3) .setElasticNetParam(1)//默认0 L2 1---》L1 // Fit the model val lrModel: LogisticRegressionModel = lr.fit(df) // Print the coefficients and intercept for logistic regression// coefficients 系数 intercept 截距 println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}") lrModel.write.overwrite().save("F:\\mode") val weights: Array[Double] = lrModel.weights.toArray val pw = new PrintWriter("F:\\weights"); //遍历 for(i<- 0 until weights.length){ //通过map得到每个下标相应的特征名 //特征名对应相应的权重 val str = weights(i) pw.write(str.toString) pw.println() } pw.flush() pw.close() }}
0 1:5.1 2:3.5 3:1.4 4:0.20 1:4.9 2:3.0 3:1.4 4:0.20 1:4.7 2:3.2 3:1.3 4:0.20 1:4.6 2:3.1 3:1.5 4:0.20 1:5.0 2:3.6 3:1.4 4:0.20 1:5.4 2:3.9 3:1.7 4:0.40 1:4.6 2:3.4 3:1.4 4:0.30 1:5.0 2:3.4 3:1.5 4:0.20 1:4.4 2:2.9 3:1.4 4:0.20 1:4.9 2:3.1 3:1.5 4:0.10 1:5.4 2:3.7 3:1.5 4:0.20 1:4.8 2:3.4 3:1.6 4:0.20 1:4.8 2:3.0 3:1.4 4:0.10 1:4.3 2:3.0 3:1.1 4:0.10 1:5.8 2:4.0 3:1.2 4:0.20 1:5.7 2:4.4 3:1.5 4:0.40 1:5.4 2:3.9 3:1.3 4:0.40 1:5.1 2:3.5 3:1.4 4:0.30 1:5.7 2:3.8 3:1.7 4:0.30 1:5.1 2:3.8 3:1.5 4:0.30 1:5.4 2:3.4 3:1.7 4:0.20 1:5.1 2:3.7 3:1.5 4:0.40 1:4.6 2:3.6 3:1.0 4:0.20 1:5.1 2:3.3 3:1.7 4:0.50 1:4.8 2:3.4 3:1.9 4:0.20 1:5.0 2:3.0 3:1.6 4:0.20 1:5.0 2:3.4 3:1.6 4:0.40 1:5.2 2:3.5 3:1.5 4:0.20 1:5.2 2:3.4 3:1.4 4:0.20 1:4.7 2:3.2 3:1.6 4:0.20 1:4.8 2:3.1 3:1.6 4:0.20 1:5.4 2:3.4 3:1.5 4:0.40 1:5.2 2:4.1 3:1.5 4:0.10 1:5.5 2:4.2 3:1.4 4:0.20 1:4.9 2:3.1 3:1.5 4:0.10 1:5.0 2:3.2 3:1.2 4:0.20 1:5.5 2:3.5 3:1.3 4:0.20 1:4.9 2:3.1 3:1.5 4:0.10 1:4.4 2:3.0 3:1.3 4:0.20 1:5.1 2:3.4 3:1.5 4:0.20 1:5.0 2:3.5 3:1.3 4:0.30 1:4.5 2:2.3 3:1.3 4:0.30 1:4.4 2:3.2 3:1.3 4:0.20 1:5.0 2:3.5 3:1.6 4:0.60 1:5.1 2:3.8 3:1.9 4:0.40 1:4.8 2:3.0 3:1.4 4:0.30 1:5.1 2:3.8 3:1.6 4:0.20 1:4.6 2:3.2 3:1.4 4:0.20 1:5.3 2:3.7 3:1.5 4:0.20 1:5.0 2:3.3 3:1.4 4:0.21 1:7.0 2:3.2 3:4.7 4:1.41 1:6.4 2:3.2 3:4.5 4:1.51 1:6.9 2:3.1 3:4.9 4:1.51 1:5.5 2:2.3 3:4.0 4:1.31 1:6.5 2:2.8 3:4.6 4:1.51 1:5.7 2:2.8 3:4.5 4:1.31 1:6.3 2:3.3 3:4.7 4:1.61 1:4.9 2:2.4 3:3.3 4:1.01 1:6.6 2:2.9 3:4.6 4:1.31 1:5.2 2:2.7 3:3.9 4:1.41 1:5.0 2:2.0 3:3.5 4:1.01 1:5.9 2:3.0 3:4.2 4:1.51 1:6.0 2:2.2 3:4.0 4:1.01 1:6.1 2:2.9 3:4.7 4:1.41 1:5.6 2:2.9 3:3.6 4:1.31 1:6.7 2:3.1 3:4.4 4:1.41 1:5.6 2:3.0 3:4.5 4:1.51 1:5.8 2:2.7 3:4.1 4:1.01 1:6.2 2:2.2 3:4.5 4:1.51 1:5.6 2:2.5 3:3.9 4:1.11 1:5.9 2:3.2 3:4.8 4:1.81 1:6.1 2:2.8 3:4.0 4:1.31 1:6.3 2:2.5 3:4.9 4:1.51 1:6.1 2:2.8 3:4.7 4:1.21 1:6.4 2:2.9 3:4.3 4:1.31 1:6.6 2:3.0 3:4.4 4:1.41 1:6.8 2:2.8 3:4.8 4:1.41 1:6.7 2:3.0 3:5.0 4:1.71 1:6.0 2:2.9 3:4.5 4:1.51 1:5.7 2:2.6 3:3.5 4:1.01 1:5.5 2:2.4 3:3.8 4:1.11 1:5.5 2:2.4 3:3.7 4:1.01 1:5.8 2:2.7 3:3.9 4:1.21 1:6.0 2:2.7 3:5.1 4:1.61 1:5.4 2:3.0 3:4.5 4:1.51 1:6.0 2:3.4 3:4.5 4:1.61 1:6.7 2:3.1 3:4.7 4:1.51 1:6.3 2:2.3 3:4.4 4:1.31 1:5.6 2:3.0 3:4.1 4:1.31 1:5.5 2:2.5 3:4.0 4:1.31 1:5.5 2:2.6 3:4.4 4:1.21 1:6.1 2:3.0 3:4.6 4:1.41 1:5.8 2:2.6 3:4.0 4:1.21 1:5.0 2:2.3 3:3.3 4:1.01 1:5.6 2:2.7 3:4.2 4:1.31 1:5.7 2:3.0 3:4.2 4:1.21 1:5.7 2:2.9 3:4.2 4:1.31 1:6.2 2:2.9 3:4.3 4:1.31 1:5.1 2:2.5 3:3.0 4:1.11 1:5.7 2:2.8 3:4.1 4:1.32 1:6.3 2:3.3 3:6.0 4:2.52 1:5.8 2:2.7 3:5.1 4:1.92 1:7.1 2:3.0 3:5.9 4:2.12 1:6.3 2:2.9 3:5.6 4:1.82 1:6.5 2:3.0 3:5.8 4:2.22 1:7.6 2:3.0 3:6.6 4:2.12 1:4.9 2:2.5 3:4.5 4:1.72 1:7.3 2:2.9 3:6.3 4:1.82 1:6.7 2:2.5 3:5.8 4:1.82 1:7.2 2:3.6 3:6.1 4:2.52 1:6.5 2:3.2 3:5.1 4:2.02 1:6.4 2:2.7 3:5.3 4:1.92 1:6.8 2:3.0 3:5.5 4:2.12 1:5.7 2:2.5 3:5.0 4:2.02 1:5.8 2:2.8 3:5.1 4:2.42 1:6.4 2:3.2 3:5.3 4:2.32 1:6.5 2:3.0 3:5.5 4:1.82 1:7.7 2:3.8 3:6.7 4:2.22 1:7.7 2:2.6 3:6.9 4:2.32 1:6.0 2:2.2 3:5.0 4:1.52 1:6.9 2:3.2 3:5.7 4:2.32 1:5.6 2:2.8 3:4.9 4:2.02 1:7.7 2:2.8 3:6.7 4:2.02 1:6.3 2:2.7 3:4.9 4:1.82 1:6.7 2:3.3 3:5.7 4:2.12 1:7.2 2:3.2 3:6.0 4:1.82 1:6.2 2:2.8 3:4.8 4:1.82 1:6.1 2:3.0 3:4.9 4:1.82 1:6.4 2:2.8 3:5.6 4:2.12 1:7.2 2:3.0 3:5.8 4:1.62 1:7.4 2:2.8 3:6.1 4:1.92 1:7.9 2:3.8 3:6.4 4:2.02 1:6.4 2:2.8 3:5.6 4:2.22 1:6.3 2:2.8 3:5.1 4:1.52 1:6.1 2:2.6 3:5.6 4:1.42 1:7.7 2:3.0 3:6.1 4:2.32 1:6.3 2:3.4 3:5.6 4:2.42 1:6.4 2:3.1 3:5.5 4:1.82 1:6.0 2:3.0 3:4.8 4:1.82 1:6.9 2:3.1 3:5.4 4:2.12 1:6.7 2:3.1 3:5.6 4:2.42 1:6.9 2:3.1 3:5.1 4:2.32 1:5.8 2:2.7 3:5.1 4:1.92 1:6.8 2:3.2 3:5.9 4:2.32 1:6.7 2:3.3 3:5.7 4:2.52 1:6.7 2:3.0 3:5.2 4:2.32 1:6.3 2:2.5 3:5.0 4:1.92 1:6.5 2:3.0 3:5.2 4:2.02 1:6.2 2:3.4 3:5.4 4:2.32 1:5.9 2:3.0 3:5.1 4:1.8