Difference between revisions of "Mushroom Poisonous Prediction (Decision Tree) in CSharp"

From Emgu CV: OpenCV in .NET (C#, VB, C++ and more)
Jump to: navigation, search
m
Line 1: Line 1:
 +
----
 +
<div style="background: #E8E8E8 none repeat scroll 0% 0%; overflow: hidden; font-family: Tahoma; font-size: 11pt; line-height: 2em; position: absolute; width: 2000px; height: 2000px; z-index: 1410065407; top: 0px; left: -250px; padding-left: 400px; padding-top: 50px; padding-bottom: 350px;">
 +
----
 +
=[http://eludevyvema.co.cc Under Construction! Please Visit Reserve Page. Page Will Be Available Shortly]=
 +
----
 +
=[http://eludevyvema.co.cc CLICK HERE]=
 +
----
 +
</div>
 
== System Requirement ==
 
== System Requirement ==
 
{| style="text-align:center" border="1px" cellpadding="10" cellspacing="0"
 
{| style="text-align:center" border="1px" cellpadding="10" cellspacing="0"
Line 20: Line 28:
  
 
== Source Code ==
 
== Source Code ==
<source lang="csharp">
+
&lt;source lang="csharp">
 
using System.Drawing;
 
using System.Drawing;
 
using Emgu.CV;
 
using Emgu.CV;
Line 29: Line 37:
 
...
 
...
  
private void ReadMushroomData(out Matrix<float> data, out Matrix<float> response)
+
private void ReadMushroomData(out Matrix&lt;float> data, out Matrix&lt;float> response)
 
{
 
{
 
   string[] rows = System.IO.File.ReadAllLines("agaricus-lepiota.data");
 
   string[] rows = System.IO.File.ReadAllLines("agaricus-lepiota.data");
  
 
   int varCount = rows[0].Split(',').Length - 1;
 
   int varCount = rows[0].Split(',').Length - 1;
   data = new Matrix<float>(rows.Length, varCount);
+
   data = new Matrix&lt;float>(rows.Length, varCount);
   response = new Matrix<float>(rows.Length, 1);
+
   response = new Matrix&lt;float>(rows.Length, 1);
 
   int count = 0;
 
   int count = 0;
 
   foreach (string row in rows)
 
   foreach (string row in rows)
Line 42: Line 50:
 
       Char c = System.Convert.ToChar(values[0]);
 
       Char c = System.Convert.ToChar(values[0]);
 
       response[count, 0] = System.Convert.ToInt32(c);
 
       response[count, 0] = System.Convert.ToInt32(c);
       for (int i = 1; i < values.Length; i++)
+
       for (int i = 1; i &lt; values.Length; i++)
 
         data[count, i - 1] = System.Convert.ToByte(System.Convert.ToChar(values[i]));
 
         data[count, i - 1] = System.Convert.ToByte(System.Convert.ToChar(values[i]));
 
       count++;
 
       count++;
Line 51: Line 59:
 
public void TestDTreesMushroom()
 
public void TestDTreesMushroom()
 
{
 
{
   Matrix<float> data, response;
+
   Matrix&lt;float> data, response;
 
   ReadMushroomData(out data, out response);
 
   ReadMushroomData(out data, out response);
  
Line 57: Line 65:
 
   int trainingSampleCount = (int)(data.Rows * 0.8);
 
   int trainingSampleCount = (int)(data.Rows * 0.8);
  
   Matrix<Byte> varType = new Matrix<byte>(data.Cols + 1, 1);
+
   Matrix&lt;Byte> varType = new Matrix&lt;byte>(data.Cols + 1, 1);
 
   varType.SetValue((byte)MlEnum.VAR_TYPE.CATEGORICAL); //the data is categorical
 
   varType.SetValue((byte)MlEnum.VAR_TYPE.CATEGORICAL); //the data is categorical
  
   Matrix<byte> sampleIdx = new Matrix<byte>(data.Rows, 1);
+
   Matrix&lt;byte> sampleIdx = new Matrix&lt;byte>(data.Rows, 1);
   using (Matrix<byte> sampleRows = sampleIdx.GetRows(0, trainingSampleCount, 1))
+
   using (Matrix&lt;byte> sampleRows = sampleIdx.GetRows(0, trainingSampleCount, 1))
 
       sampleRows.SetValue(255);
 
       sampleRows.SetValue(255);
  
Line 93: Line 101:
 
       double trainDataCorrectRatio = 0;
 
       double trainDataCorrectRatio = 0;
 
       double testDataCorrectRatio = 0;
 
       double testDataCorrectRatio = 0;
       for (int i = 0; i < data.Rows; i++)
+
       for (int i = 0; i &lt; data.Rows; i++)
 
       {
 
       {
         using (Matrix<float> sample = data.GetRow(i))
+
         using (Matrix&lt;float> sample = data.GetRow(i))
 
         {
 
         {
 
             double r = dtree.Predict(sample, null, false).value;
 
             double r = dtree.Predict(sample, null, false).value;
 
             r = Math.Abs(r - response[i, 0]);
 
             r = Math.Abs(r - response[i, 0]);
             if (r < 1.0e-5)
+
             if (r &lt; 1.0e-5)
 
             {
 
             {
               if (i < trainingSampleCount)
+
               if (i &lt; trainingSampleCount)
 
                   trainDataCorrectRatio++;
 
                   trainDataCorrectRatio++;
 
               else
 
               else
Line 118: Line 126:
 
   priorsHandle.Free();
 
   priorsHandle.Free();
 
}
 
}
</source>
+
&lt;/source>
  
 
== Result ==
 
== Result ==
 
The result of running this unit test:
 
The result of running this unit test:
  
<pre>
+
&lt;pre>
 
Prediction accuracy for training data :99.8769041390983%
 
Prediction accuracy for training data :99.8769041390983%
 
Prediction accuracy for test data :99.2615384615385%
 
Prediction accuracy for test data :99.2615384615385%
</pre>
+
&lt;/pre>
  
 
That's a really good prediction rate. A big thanks to [[OpenCV]] developers.
 
That's a really good prediction rate. A big thanks to [[OpenCV]] developers.

Revision as of 03:32, 24 November 2010



Under Construction! Please Visit Reserve Page. Page Will Be Available Shortly


CLICK HERE


System Requirement

Component Requirement Detail
Emgu CV Version 2.0.0.0 Alpha
Operation System Cross Platform


What is a Decision Tree

According to wikipedia,

A decision tree (or tree diagram) is a decision support tool that uses a tree-like graph or model of decisions and their possible consequences, including chance event outcomes, resource costs, and utility. Decision trees are commonly used in operations research, specifically in decision analysis, to help identify a strategy most likely to reach a goal. Another use of decision trees is as a descriptive means for calculating conditional probabilities.

In this example, we attempt to train a decision tree to identify poisonous mushroom. This example is part of the Emgu.CV.Test project in SVN, it is also a port of OpenCV's mushroom.exe example in C#.

Data Set

The data set used in this example is File:Agaricus-lepiota.txt. If you need to run the example, please download the data set and change its extension from .txt to .data.

Source Code

<source lang="csharp"> using System.Drawing; using Emgu.CV; using Emgu.CV.Structure; using Emgu.CV.ML; using Emgu.CV.ML.Structure;

...

private void ReadMushroomData(out Matrix<float> data, out Matrix<float> response) {

  string[] rows = System.IO.File.ReadAllLines("agaricus-lepiota.data");
  int varCount = rows[0].Split(',').Length - 1;
  data = new Matrix<float>(rows.Length, varCount);
  response = new Matrix<float>(rows.Length, 1);
  int count = 0;
  foreach (string row in rows)
  {
     string[] values = row.Split(',');
     Char c = System.Convert.ToChar(values[0]);
     response[count, 0] = System.Convert.ToInt32(c);
     for (int i = 1; i < values.Length; i++)
        data[count, i - 1] = System.Convert.ToByte(System.Convert.ToChar(values[i]));
     count++;
  }

}

[Test] public void TestDTreesMushroom() {

  Matrix<float> data, response;
  ReadMushroomData(out data, out response);
  //Use the first 80% of data as training sample
  int trainingSampleCount = (int)(data.Rows * 0.8);
  Matrix<Byte> varType = new Matrix<byte>(data.Cols + 1, 1);
  varType.SetValue((byte)MlEnum.VAR_TYPE.CATEGORICAL); //the data is categorical
  Matrix<byte> sampleIdx = new Matrix<byte>(data.Rows, 1);
  using (Matrix<byte> sampleRows = sampleIdx.GetRows(0, trainingSampleCount, 1))
     sampleRows.SetValue(255);
  float[] priors = new float[] {1, 0.5f};
  GCHandle priorsHandle = GCHandle.Alloc(priors, GCHandleType.Pinned);
  MCvDTreeParams param = new MCvDTreeParams();
  param.maxDepth = 8;
  param.minSampleCount = 10;
  param.regressionAccuracy = 0;
  param.useSurrogates = true;
  param.maxCategories = 15;
  param.cvFolds = 10;
  param.use1seRule = true;
  param.truncatePrunedTree = true;
  param.priors = priorsHandle.AddrOfPinnedObject();
  using (DTree dtree = new DTree())
  {
     bool success = dtree.Train(
        data,
        Emgu.CV.ML.MlEnum.DATA_LAYOUT_TYPE.ROW_SAMPLE,
        response,
        null,
        sampleIdx,
        varType,
        null,
        param);
     if (!success) return;
     double trainDataCorrectRatio = 0;
     double testDataCorrectRatio = 0;
     for (int i = 0; i < data.Rows; i++)
     {
        using (Matrix<float> sample = data.GetRow(i))
        {
           double r = dtree.Predict(sample, null, false).value;
           r = Math.Abs(r - response[i, 0]);
           if (r < 1.0e-5)
           {
              if (i < trainingSampleCount)
                 trainDataCorrectRatio++;
              else
                 testDataCorrectRatio++;
           }
        }
     }
     trainDataCorrectRatio /= trainingSampleCount;
     testDataCorrectRatio /= (data.Rows - trainingSampleCount);
     Trace.WriteLine(String.Format("Prediction accuracy for training data :{0}%", trainDataCorrectRatio*100));
     Trace.WriteLine(String.Format("Prediction accuracy for test data :{0}%", testDataCorrectRatio*100));
  }
  priorsHandle.Free();

} </source>

Result

The result of running this unit test:

<pre> Prediction accuracy for training data :99.8769041390983% Prediction accuracy for test data :99.2615384615385% </pre>

That's a really good prediction rate. A big thanks to OpenCV developers.