Skip to content

Commit

Permalink
Updating Codification filter to work without DataTables.
Browse files Browse the repository at this point in the history
  • Loading branch information
cesarsouza committed Nov 30, 2013
1 parent a4683cb commit c9b06a5
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 1 deletion.
122 changes: 121 additions & 1 deletion Sources/Accord.Statistics/Filters/Codification.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace Accord.Statistics.Filters
using System.Collections.Generic;
using System.Data;
using System.ComponentModel;
using Accord.Math;

/// <summary>
/// Codification Filter class.
Expand Down Expand Up @@ -198,7 +199,33 @@ public Codification(DataTable data, params string[] columns)
this.Detect(data, columns);
}


/// <summary>
/// Creates a new Codification Filter.
/// </summary>
///
public Codification(string columnName, params string[] values)
{
parseColumn(columnName, values);
}

/// <summary>
/// Creates a new Codification Filter.
/// </summary>
///
public Codification(string[] columnNames, string[][] values)
{
Detect(columnNames, values);
}

/// <summary>
/// Creates a new Codification Filter.
/// </summary>
///
public Codification(string columnName, string[][] values)
{
Detect(columnName, values);
}


/// <summary>
/// Translates a value of a given variable
Expand Down Expand Up @@ -300,6 +327,57 @@ public int[] Translate(string[] columnNames, string[] values)
return result;
}

/// <summary>
/// Translates a value of the given variables
/// into their integer (codeword) representation.
/// </summary>
///
/// <param name="columnNames">The names of the variable's data column.</param>
/// <param name="values">The values to be translated.</param>
///
/// <returns>An array of integers in which each integer
/// uniquely identifies the given value for the given
/// variables.</returns>
///
public int[] Translate(string columnName, string[] values)
{
int[] result = new int[values.Length];

Options options = this.Columns[columnName];
for (int i = 0; i < result.Length; i++)
result[i] = options.Mapping[values[i]];

return result;
}

/// <summary>
/// Translates a value of the given variables
/// into their integer (codeword) representation.
/// </summary>
///
/// <param name="columnNames">The names of the variable's data column.</param>
/// <param name="values">The values to be translated.</param>
///
/// <returns>An array of integers in which each integer
/// uniquely identifies the given value for the given
/// variables.</returns>
///
public int[][] Translate(string columnName, string[][] values)
{
int[][] result = new int[values.Length][];

Options options = this.Columns[columnName];

for (int i = 0; i < result.Length; i++)
{
result[i] = new int[values[i].Length];
for (int j = 0; j < result[i].Length; j++)
result[i][j] = options.Mapping[values[i][j]];
}

return result;
}

/// <summary>
/// Translates an integer (codeword) representation of
/// the value of a given variable into its original
Expand All @@ -323,6 +401,26 @@ public string Translate(string columnName, int codeword)
return null;
}

/// <summary>
/// Translates an integer (codeword) representation of
/// the value of a given variable into its original
/// value.
/// </summary>
///
/// <param name="columnName">The name of the variable's data column.</param>
/// <param name="codeword">The codeword to be translated.</param>
///
/// <returns>The original meaning of the given codeword.</returns>
///
public string[] Translate(string columnName, int[] codewords)
{
string[] result = new string[codewords.Length];
for (int i = 0; i < result.Length; i++)
result[i] = Translate(columnName, codewords[i]);

return result;
}

/// <summary>
/// Translates the integer (codeword) representations of
/// the values of the given variables into their original
Expand Down Expand Up @@ -431,6 +529,28 @@ public void Detect(DataTable data)
parseColumn(data, column);
}

public void Detect(string columnName, string[][] values)
{
parseColumn(columnName, values.Reshape(0));
}

public void Detect(string[] columnNames, string[][] values)
{
for (int i = 0; i < columnNames.Length; i++)
parseColumn(columnNames[i], values[i]);
}

public void parseColumn(string name, string[] values)
{
string[] distinct = values.Distinct();

var map = new Dictionary<string, int>();
Columns.Add(new Options(name, map));

for (int j = 0; j < distinct.Length; j++)
map.Add(distinct[j], j);
}

private void parseColumn(DataTable data, DataColumn column)
{
// If the column has string type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,5 +230,33 @@ public void ApplyTest2()

}

[TestMethod()]
public void ApplyTest3()
{

string[] names = { "child", "adult", "elder" };

Codification codebook = new Codification("Label", names);


// After that, we can use the codebook to "translate"
// the text labels into discrete symbols, such as:

int a = codebook.Translate("Label", "child"); // returns 0
int b = codebook.Translate("Label", "adult"); // returns 1
int c = codebook.Translate("Label", "elder"); // returns 2

// We can also do the reverse:
string labela = codebook.Translate("Label", 0); // returns "child"
string labelb = codebook.Translate("Label", 1); // returns "adult"
string labelc = codebook.Translate("Label", 2); // returns "elder"

Assert.AreEqual(0, a);
Assert.AreEqual(1, b);
Assert.AreEqual(2, c);
Assert.AreEqual("child", labela);
Assert.AreEqual("adult", labelb);
Assert.AreEqual("elder", labelc);
}
}
}

0 comments on commit c9b06a5

Please sign in to comment.