dimanche 22 février 2015

How I can measure the qualtiy of SimpleKmeans weka by using Java-Ml library



I try utilising from the features of Weka in Clustering , and Java-ML library for quality factors as following



package kmeansclustering;

import weka.clusterers.ClusterEvaluation;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import weka.clusterers.SimpleKMeans;
import weka.core.Instances;
import net.sf.javaml.clustering.*;
import net.sf.javaml.clustering.evaluation.*;
import net.sf.javaml.core.Dataset;
import net.sf.javaml.distance.DistanceMeasure;
import net.sf.javaml.distance.EuclideanDistance;
import net.sf.javaml.tools.data.*;
import net.sf.javaml.tools.weka.WekaClusterer;



public class SKMClustering {

public void ClusteringKmeans(String filename) throws Exception {
ClusterEvaluation eval;
Instances data;
String[] options;
SimpleKMeans cl;

data = new Instances(new BufferedReader(new FileReader(filename)));
Dataset dataa = ARFFHandler.loadARFF(new File("./iris.arff"), 4);

// normal ......... all results of executing of algorithm
System.out.println("\n--> normal");
options = new String[2];
options[0] = "-t";
options[1] = filename;
System.out.println(
ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options));


// manual call
System.out.println("\n--> manual");
cl = new SimpleKMeans();
cl.buildClusterer(data);
eval = new ClusterEvaluation();
eval.setClusterer(cl);
eval.evaluateClusterer(new Instances(data));
System.out.println("# of clusters: " + eval.getNumClusters());
/**System.out.println(eval.clusterResultsToString());
clusterAssignments = eval.getClusterAssignments();**/


/*************** Measure the quality of the clustering *********/
//******************************************************************/
Clusterer jmlxm = new WekaClusterer(cl);
Dataset[] clusters = jmlxm.cluster(dataa);

System.out.println("quality Measure" + clusters.length);
SumOfSquaredErrors sse = new SumOfSquaredErrors();
SumOfCentroidSimilarities scs = new SumOfCentroidSimilarities();
AICScore aic = new AICScore();
BICScore bic = new BICScore();
Gamma gamma = new Gamma(new EuclideanDistance());
DistanceMeasure ed = new EuclideanDistance();
/* Measure the quality of the clustering */
double sseScore = sse.score(clusters);
double scsScore = scs.score(clusters);
double aicScore = aic.score(clusters);
double bicScore = bic.score(clusters);
double gammaScore = gamma.score(clusters);
double gammaPlusScore = new GPlus(ed).score(clusters);
double hcsScore = new HybridCentroidSimilarity().score(clusters);
double hpsScore = new HybridPairwiseSimilarities().score(clusters);
double sapaScore = new SumOfAveragePairwiseSimilarities().score(clusters);
double cScore = new CIndex(ed).score(clusters);
double mmcScore = new MinMaxCut(ed).score(clusters);
double pbScore = new PointBiserial(ed).score(clusters);
double tauScore = new Tau(ed).score(clusters);
double tsmScore = new TraceScatterMatrix().score(clusters);
double wbScore = new WB(ed).score(clusters);

String metrics = "k:\t\tSSE: " + sseScore + "\t\t\tSCS: " + scsScore
+ "\t\t\taic: " + aicScore + "\t\t\tbic: " + bicScore + "\t\t\tgamma: " + gammaScore
+ "\t\t\tg+: " + gammaPlusScore + "\t\t\thcs: " + hcsScore + "\t\t\thps: "
+ hpsScore + "\t\t\tsapa: " + sapaScore + "\t\t\tc: " + cScore + "\t\t\tmmc: "
+ mmcScore + "\t\t\tpb: " + pbScore + "\t\t\ttau: " + tauScore + "\t\t\ttsm: "
+ tsmScore + "\t\t\twb: " + wbScore;
System.out.println(metrics);

}

public static void main(String[] args) throws Exception {

SKMClustering D = new SKMClustering();
D.ClusteringKmeans("./iris.arff");
}
}


but I feel something is wrong , where SumOfSquaredErrors that are result of weka differ from SumOfSquaredErrors that are result of Java-ML..




Aucun commentaire:

Enregistrer un commentaire