Multi-search

classic Classic list List threaded Threaded
6 messages Options
Tas
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Multi-search

Tas
Hi I am trying to optimize the parameters of J48 using multi search in Java. However I had a problem with that. At the begging I tried optimize only the Confidence factor (-C) and minNumObj (-M) and after evaluation i got Accuracy of 71.7529% then I tried using and the subtreeraising and the binarysplit however the accuracy was reduce to 71.4858 even though I was optimizing 4 parameters. How is this possible? Any ideas? Thanks


public class J48ConfidenceFactor {

        public void read() throws Exception {
                // load data
                Instances data;
                DataSource source = new DataSource(
                                "/file.csv");
                data = source.getDataSet();
                if (data.classIndex() == -1)
                        data.setClassIndex(data.numAttributes() - 1);
                this.optimizeJ48(data);
                //this.optimizeSmo(data);
        }

        public void optimizeJ48(Instances data) throws Exception {
                // configure classifier we want to optimize
                J48 j48 = new J48();

                // configure multisearch
                MathParameter conf = new MathParameter();
                conf.setProperty("confidenceFactor");
                System.out.println("Expression" + conf.expressionTipText());
                conf.setBase(10);
                conf.setMin(0.1);
                conf.setMax(0.5);
                conf.setStep(0.05);
                conf.setExpression("I");
               

                MathParameter conf2 = new MathParameter();
                conf2.setProperty("minNumObj");
                conf2.setBase(10);
                conf2.setMin(1);
                conf2.setMax(64);
                conf2.setStep(1);
                conf2.setExpression("I");
               
                ListParameter conf3 = new ListParameter();
                conf3.setProperty("subtreeRaising");
                conf3.setCustomDelimiter(",");
                conf3.setList("true,false");

                System.out.println(conf3.getProperty());
                //System.out.println("Options" + conf3.listOptions());
                System.out.println(conf3.globalInfo());
               
                ListParameter conf4 = new ListParameter();
                conf4.setProperty("binarySplits");
                conf4.setCustomDelimiter(",");
                conf4.setList("true,false");
                       
               
                MultiSearch multi = new MultiSearch();
                multi.setClassifier(j48);
                multi.setSearchParameters(new AbstractParameter[] { conf, conf2, conf3, conf4});
                SelectedTag tag = new SelectedTag(DefaultEvaluationMetrics.EVALUATION_ACC,
                                new DefaultEvaluationMetrics().getTags());
                multi.setEvaluation(tag);
                System.out.println("multi---------" + multi.toSummaryString());
                // output configuration
                System.out.println("\nMultiSearch commandline:\n" + Utils.toCommandLine(multi));

               
                // optimize
                System.out.println("\nOptimizing...\n");
                multi.buildClassifier(data);
                System.out.println("Best setup:\n" + Utils.toCommandLine(multi.getBestClassifier()));
                System.out.println("Best parameter: " + multi.getGenerator().evaluate(multi.getBestValues()));
           
                System.out.println("Coordinates" + multi.getGenerator().evaluate(multi.getBestCoordinates()));
            System.out.println("Revision" + multi.getRevision());
            //System.out.println(multi.evaluationTipText());
        }

       

        public void evaluate(Instances data, MultiSearch multi) throws Exception {
                Evaluation eval = new Evaluation(data);
                eval.crossValidateModel(multi, data, 10, new Random(1));

                System.out.println("=== Stratified cross-validation ===" + '\n' + "=== Summary ===" + '\n');
                String strSummary = eval.toSummaryString();

                // wr.write("=== Stratified cross-validation ===" + '\n' + "=== Summary
                // ===" + '\n');
                // wr.write(strSummary);
                // wr.write(eval.toClassDetailsString());

                System.out.println(strSummary);

                System.out.println('\n');
                System.out.println(eval.toClassDetailsString());

                System.out.println("=== Confusion Matrix ===");
                // wr.write("=== Confusion Matrix ===");
                double[][] cmMatrix = eval.confusionMatrix();
                int j = 0;
                double a = 0.0;
                for (int i = 0; i < cmMatrix.length; i++) {
                        System.out.println();
                        // wr.write("\n");
                        for (j = 0; j < cmMatrix.length; j++) {
                                System.out.print(cmMatrix[i][j] + "  ");
                                a += cmMatrix[i][j];
                                // wr.write(cmMatrix[i][j] + " ");
                        }
                        // System.err.println(a);
                }
        }

        /**
         * The first parameter must be dataset, the (optional) second the class
         * index (1-based, 'first' and 'last' also supported).
         *
         * @param args
         *            the commandline options
         * @throws Exception
         *             if optimization fails for some reason
         */

        public static void main(String[] args) throws Exception {
                J48ConfidenceFactor j48 = new J48ConfidenceFactor();
                j48.read();

                //J48 opt: 0.3 24

        }

}
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Multi-search

Eibe Frank-2
Administrator
Presumably, the accuracy you are reporting was obtained by evaluating the MultiSearch classifier using cross-validation, a percentage split, or a separate test set?

In MultiSearch, parameter values are optimised by estimating performance using internal cross-validation. This is a very good heuristic but it is not guaranteed to give you best performance on new, external test data.

Having said that, assuming your test data isn’t extremely large, your observed difference (less than 0.3%) is unlikely to be statistically significant anyway.

Cheers,
Eibe

> On 27/07/2017, at 2:45 PM, Tas <[hidden email]> wrote:
>
> Hi I am trying to optimize the parameters of J48 using multi search in Java.
> However I had a problem with that. At the begging I tried optimize only the
> Confidence factor (-C) and minNumObj (-M) and after evaluation i got
> Accuracy of 71.7529% then I tried using and the subtreeraising and the
> binarysplit however the accuracy was reduce to 71.4858 even though I was
> optimizing 4 parameters. How is this possible? Any ideas? Thanks
>
>
> public class J48ConfidenceFactor {
>
> public void read() throws Exception {
> // load data
> Instances data;
> DataSource source = new DataSource(
> "/file.csv");
> data = source.getDataSet();
> if (data.classIndex() == -1)
> data.setClassIndex(data.numAttributes() - 1);
> this.optimizeJ48(data);
> //this.optimizeSmo(data);
> }
>
> public void optimizeJ48(Instances data) throws Exception {
> // configure classifier we want to optimize
> J48 j48 = new J48();
>
> // configure multisearch
> MathParameter conf = new MathParameter();
> conf.setProperty("confidenceFactor");
> System.out.println("Expression" + conf.expressionTipText());
> conf.setBase(10);
> conf.setMin(0.1);
> conf.setMax(0.5);
> conf.setStep(0.05);
> conf.setExpression("I");
>
>
> MathParameter conf2 = new MathParameter();
> conf2.setProperty("minNumObj");
> conf2.setBase(10);
> conf2.setMin(1);
> conf2.setMax(64);
> conf2.setStep(1);
> conf2.setExpression("I");
>
> ListParameter conf3 = new ListParameter();
> conf3.setProperty("subtreeRaising");
> conf3.setCustomDelimiter(",");
> conf3.setList("true,false");
>
> System.out.println(conf3.getProperty());
> //System.out.println("Options" + conf3.listOptions());
> System.out.println(conf3.globalInfo());
>
> ListParameter conf4 = new ListParameter();
> conf4.setProperty("binarySplits");
> conf4.setCustomDelimiter(",");
> conf4.setList("true,false");
>
>
> MultiSearch multi = new MultiSearch();
> multi.setClassifier(j48);
> multi.setSearchParameters(new AbstractParameter[] { conf, conf2, conf3,
> conf4});
> SelectedTag tag = new SelectedTag(DefaultEvaluationMetrics.EVALUATION_ACC,
> new DefaultEvaluationMetrics().getTags());
> multi.setEvaluation(tag);
> System.out.println("multi---------" + multi.toSummaryString());
> // output configuration
> System.out.println("\nMultiSearch commandline:\n" +
> Utils.toCommandLine(multi));
>
>
> // optimize
> System.out.println("\nOptimizing...\n");
> multi.buildClassifier(data);
> System.out.println("Best setup:\n" +
> Utils.toCommandLine(multi.getBestClassifier()));
> System.out.println("Best parameter: " +
> multi.getGenerator().evaluate(multi.getBestValues()));
>    
> System.out.println("Coordinates" +
> multi.getGenerator().evaluate(multi.getBestCoordinates()));
>    System.out.println("Revision" + multi.getRevision());
>    //System.out.println(multi.evaluationTipText());
> }
>
>
>
> public void evaluate(Instances data, MultiSearch multi) throws Exception {
> Evaluation eval = new Evaluation(data);
> eval.crossValidateModel(multi, data, 10, new Random(1));
>
> System.out.println("=== Stratified cross-validation ===" + '\n' + "===
> Summary ===" + '\n');
> String strSummary = eval.toSummaryString();
>
> // wr.write("=== Stratified cross-validation ===" + '\n' + "=== Summary
> // ===" + '\n');
> // wr.write(strSummary);
> // wr.write(eval.toClassDetailsString());
>
> System.out.println(strSummary);
>
> System.out.println('\n');
> System.out.println(eval.toClassDetailsString());
>
> System.out.println("=== Confusion Matrix ===");
> // wr.write("=== Confusion Matrix ===");
> double[][] cmMatrix = eval.confusionMatrix();
> int j = 0;
> double a = 0.0;
> for (int i = 0; i < cmMatrix.length; i++) {
> System.out.println();
> // wr.write("\n");
> for (j = 0; j < cmMatrix.length; j++) {
> System.out.print(cmMatrix[i][j] + "  ");
> a += cmMatrix[i][j];
> // wr.write(cmMatrix[i][j] + " ");
> }
> // System.err.println(a);
> }
> }
>
> /**
> * The first parameter must be dataset, the (optional) second the class
> * index (1-based, 'first' and 'last' also supported).
> *
> * @param args
> *            the commandline options
> * @throws Exception
> *             if optimization fails for some reason
> */
>
> public static void main(String[] args) throws Exception {
> J48ConfidenceFactor j48 = new J48ConfidenceFactor();
> j48.read();
>
> //J48 opt: 0.3 24
>
> }
>
> }
>
>
>
> --
> View this message in context: http://weka.8497.n7.nabble.com/Multi-search-tp41293.html
> Sent from the WEKA mailing list archive at Nabble.com.
> _______________________________________________
> Wekalist mailing list
> Send posts to: [hidden email]
> List info and subscription status: https://list.waikato.ac.nz/mailman/listinfo/wekalist
> List etiquette: http://www.cs.waikato.ac.nz/~ml/weka/mailinglist_etiquette.html

_______________________________________________
Wekalist mailing list
Send posts to: [hidden email]
List info and subscription status: https://list.waikato.ac.nz/mailman/listinfo/wekalist
List etiquette: http://www.cs.waikato.ac.nz/~ml/weka/mailinglist_etiquette.html
Tas
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Multi-search

Tas
Dear Eibe thanks for the reply,

yes the accuracy u reported was by using 10 fold cross validation using the parameters Multi-search gave me. What worries me is that the system gave me different parameter values when I tried to optimize the subtreeraising parameter and choose to optimise it using it as true. However the default value for it is true so it does not make any sense to change the parameters. is n't it?

Also something also strange is that when I was optimising using multi-search for the Area Under Curve(DefaultEvaluationMetrics.EVALUATION_AUC, ) using same dataset same parameters I was get higher accuracy than now I am testing for Accuraacy(DefaultEvaluationMetrics.EVALUATION_ACC, )

My data set has 3000 instances.

Cheers!    
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Multi-search

Eibe Frank-2
Administrator

> On 27/07/2017, at 11:00 PM, Tas <[hidden email]> wrote:
>
> yes the accuracy u reported was by using 10 fold cross validation using the
> parameters Multi-search gave me. What worries me is that the system gave me
> different parameter values when I tried to optimize the subtreeraising
> parameter and choose to optimise it using it as true. However the default
> value for it is true so it does not make any sense to change the parameters.
> is n't it?

Can you perhaps make your question more precise?

As I said, the internal cross-validation will not necessarily choose the best parameter settings for the outer cross-validation (or whichever other evaluation option you are using). Perhaps you should read up on internal cross-validation for parameter tuning.

> Also something also strange is that when I was optimising using multi-search
> for the Area Under Curve(DefaultEvaluationMetrics.EVALUATION_AUC, ) using
> same dataset same parameters I was get higher accuracy than now I am testing
> for Accuraacy(DefaultEvaluationMetrics.EVALUATION_ACC, )

This is definitely possible. I think there’s actually some research on this: sometimes it’s better to optimise for AUROC even if your main target is classification accuracy. AUROC is a more sensitive measure than classification accuracy.

Cheers,
Eibe
_______________________________________________
Wekalist mailing list
Send posts to: [hidden email]
List info and subscription status: https://list.waikato.ac.nz/mailman/listinfo/wekalist
List etiquette: http://www.cs.waikato.ac.nz/~ml/weka/mailinglist_etiquette.html
Tas
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Multi-search

Tas
Sorry for late reply and thanks again for your reply.

I tried to optimise the J48 algorithm. First time I tried using two parameters the second time I tried using these two parameters and the subtreeraising parameter(3 parameters). in both of optimisation the subtreerasing was set to true. In the first time because is the default value and the second time because multisearch set it to true.  So i tried the parameters results on training and evaluation of the j48 algorithm and the accuracy of using the three parameters was less.

So my question is how did this happened if I  gave to multisearch another one parameter to optimise? (actually what I observed the more parameters give for optimization the less the accuracy is) And if it is set to true both times why it gave me parameters which will train with less accuracy the second time.?

I will do some research with optimization with AuROC and I will try to find out if this occures in all my classification algoriths.

Thanks again!!
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Multi-search

Eibe Frank-2
Administrator
It is possible that parameter selection based on internal cross-validation chooses different “optimal” parameter values for each fold of the outer/external cross-validation. Hence, if you fix the parameters to the globally “optimal” values output by MultiSearch and then run a cross-validation, you may get a different result.

MultiSearch is a learning algorithm just like any other WEKA learning algorithm: if the training data changes, it may build a different model, i.e., choose different parameter values. You need to consider this when running a k-fold cross-validation or percentage split evaluation with MultiSearch.

If you pick the globally “optimal” parameters that MultiSearch chooses based on the *entire* training set loaded into the Preprocess panel (the ones shown in the output of the MultiSearch model) and use these to run a cross-validation with the base learner, you are effectively cheating because the globally “optimal” parameters are chosen based on the entire dataset, i.e., the training *and* test data used in the cross-validation.

Cheers,
Eibe

> On 1/08/2017, at 9:37 AM, Tas <[hidden email]> wrote:
>
> Sorry for late reply and thanks again for your reply.
>
> I tried to optimise the J48 algorithm. First time I tried using two
> parameters the second time I tried using these two parameters and the
> subtreeraising parameter(3 parameters). in both of optimisation the
> subtreerasing was set to true. In the first time because is the default
> value and the second time because multisearch set it to true.  So i tried
> the parameters results on training and evaluation of the j48 algorithm and
> the accuracy of using the three parameters was less.
>
> So my question is how did this happened if I  gave to multisearch another
> one parameter to optimise? (actually what I observed the more parameters
> give for optimization the less the accuracy is) And if it is set to true
> both times why it gave me parameters which will train with less accuracy the
> second time.?
>
> I will do some research with optimization with AuROC and I will try to find
> out if this occures in all my classification algoriths.
>
> Thanks again!!
>
>
>
> --
> View this message in context: http://weka.8497.n7.nabble.com/Multi-search-tp41293p41370.html
> Sent from the WEKA mailing list archive at Nabble.com.
> _______________________________________________
> Wekalist mailing list
> Send posts to: [hidden email]
> List info and subscription status: https://list.waikato.ac.nz/mailman/listinfo/wekalist
> List etiquette: http://www.cs.waikato.ac.nz/~ml/weka/mailinglist_etiquette.html

_______________________________________________
Wekalist mailing list
Send posts to: [hidden email]
List info and subscription status: https://list.waikato.ac.nz/mailman/listinfo/wekalist
List etiquette: http://www.cs.waikato.ac.nz/~ml/weka/mailinglist_etiquette.html
Loading...