From c9b06a5b0b90aff155a9e70b1a6152c3fb1f52d2 Mon Sep 17 00:00:00 2001 From: Cesar Souza Date: Sat, 30 Nov 2013 15:49:54 +0100 Subject: [PATCH] Updating Codification filter to work without DataTables. --- .../Accord.Statistics/Filters/Codification.cs | 122 +++++++++++++++++- .../Filters/CodificationFilterTest.cs | 28 ++++ 2 files changed, 149 insertions(+), 1 deletion(-) diff --git a/Sources/Accord.Statistics/Filters/Codification.cs b/Sources/Accord.Statistics/Filters/Codification.cs index a4d48ab56..b0f345639 100644 --- a/Sources/Accord.Statistics/Filters/Codification.cs +++ b/Sources/Accord.Statistics/Filters/Codification.cs @@ -26,6 +26,7 @@ namespace Accord.Statistics.Filters using System.Collections.Generic; using System.Data; using System.ComponentModel; + using Accord.Math; /// /// Codification Filter class. @@ -198,7 +199,33 @@ public Codification(DataTable data, params string[] columns) this.Detect(data, columns); } - + /// + /// Creates a new Codification Filter. + /// + /// + public Codification(string columnName, params string[] values) + { + parseColumn(columnName, values); + } + + /// + /// Creates a new Codification Filter. + /// + /// + public Codification(string[] columnNames, string[][] values) + { + Detect(columnNames, values); + } + + /// + /// Creates a new Codification Filter. + /// + /// + public Codification(string columnName, string[][] values) + { + Detect(columnName, values); + } + /// /// Translates a value of a given variable @@ -300,6 +327,57 @@ public int[] Translate(string[] columnNames, string[] values) return result; } + /// + /// Translates a value of the given variables + /// into their integer (codeword) representation. + /// + /// + /// The names of the variable's data column. + /// The values to be translated. + /// + /// An array of integers in which each integer + /// uniquely identifies the given value for the given + /// variables. + /// + public int[] Translate(string columnName, string[] values) + { + int[] result = new int[values.Length]; + + Options options = this.Columns[columnName]; + for (int i = 0; i < result.Length; i++) + result[i] = options.Mapping[values[i]]; + + return result; + } + + /// + /// Translates a value of the given variables + /// into their integer (codeword) representation. + /// + /// + /// The names of the variable's data column. + /// The values to be translated. + /// + /// An array of integers in which each integer + /// uniquely identifies the given value for the given + /// variables. + /// + public int[][] Translate(string columnName, string[][] values) + { + int[][] result = new int[values.Length][]; + + Options options = this.Columns[columnName]; + + for (int i = 0; i < result.Length; i++) + { + result[i] = new int[values[i].Length]; + for (int j = 0; j < result[i].Length; j++) + result[i][j] = options.Mapping[values[i][j]]; + } + + return result; + } + /// /// Translates an integer (codeword) representation of /// the value of a given variable into its original @@ -323,6 +401,26 @@ public string Translate(string columnName, int codeword) return null; } + /// + /// Translates an integer (codeword) representation of + /// the value of a given variable into its original + /// value. + /// + /// + /// The name of the variable's data column. + /// The codeword to be translated. + /// + /// The original meaning of the given codeword. + /// + public string[] Translate(string columnName, int[] codewords) + { + string[] result = new string[codewords.Length]; + for (int i = 0; i < result.Length; i++) + result[i] = Translate(columnName, codewords[i]); + + return result; + } + /// /// Translates the integer (codeword) representations of /// the values of the given variables into their original @@ -431,6 +529,28 @@ public void Detect(DataTable data) parseColumn(data, column); } + public void Detect(string columnName, string[][] values) + { + parseColumn(columnName, values.Reshape(0)); + } + + public void Detect(string[] columnNames, string[][] values) + { + for (int i = 0; i < columnNames.Length; i++) + parseColumn(columnNames[i], values[i]); + } + + public void parseColumn(string name, string[] values) + { + string[] distinct = values.Distinct(); + + var map = new Dictionary(); + Columns.Add(new Options(name, map)); + + for (int j = 0; j < distinct.Length; j++) + map.Add(distinct[j], j); + } + private void parseColumn(DataTable data, DataColumn column) { // If the column has string type diff --git a/Sources/Accord.Tests/Accord.Tests.Statistics/Filters/CodificationFilterTest.cs b/Sources/Accord.Tests/Accord.Tests.Statistics/Filters/CodificationFilterTest.cs index 01ce3186d..a496d058b 100644 --- a/Sources/Accord.Tests/Accord.Tests.Statistics/Filters/CodificationFilterTest.cs +++ b/Sources/Accord.Tests/Accord.Tests.Statistics/Filters/CodificationFilterTest.cs @@ -230,5 +230,33 @@ public void ApplyTest2() } + [TestMethod()] + public void ApplyTest3() + { + + string[] names = { "child", "adult", "elder" }; + + Codification codebook = new Codification("Label", names); + + + // After that, we can use the codebook to "translate" + // the text labels into discrete symbols, such as: + + int a = codebook.Translate("Label", "child"); // returns 0 + int b = codebook.Translate("Label", "adult"); // returns 1 + int c = codebook.Translate("Label", "elder"); // returns 2 + + // We can also do the reverse: + string labela = codebook.Translate("Label", 0); // returns "child" + string labelb = codebook.Translate("Label", 1); // returns "adult" + string labelc = codebook.Translate("Label", 2); // returns "elder" + + Assert.AreEqual(0, a); + Assert.AreEqual(1, b); + Assert.AreEqual(2, c); + Assert.AreEqual("child", labela); + Assert.AreEqual("adult", labelb); + Assert.AreEqual("elder", labelc); + } } }