Skip to content

Commit

Permalink
vstat -s
Browse files Browse the repository at this point in the history
  • Loading branch information
takenori-y committed Nov 1, 2023
1 parent 914c168 commit 8163414
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 37 deletions.
15 changes: 14 additions & 1 deletion include/SPTK/math/statistics_accumulation.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class StatisticsAccumulation {
* @return True on success, false on failure.
*/
bool GetFirst(const StatisticsAccumulation::Buffer& buffer,
std::vector<double>* first) const;
std::vector<double>* first) const;

/**
* @param[in] buffer Buffer.
Expand Down Expand Up @@ -205,6 +205,19 @@ class StatisticsAccumulation {
bool Run(const std::vector<double>& data,
StatisticsAccumulation::Buffer* buffer) const;

/**
* Merge statistics.
*
* @param[in] num_data Number of data.
* @param[in] first First order statistics.
* @param[in] second Second order statistics.
* @param[in,out] buffer Buffer.
* @return True on success, false on failure.
*/
bool Merge(int num_data, const std::vector<double>& first,
const SymmetricMatrix& second,
StatisticsAccumulation::Buffer* buffer) const;

private:
const int num_order_;
const int num_statistics_order_;
Expand Down
17 changes: 17 additions & 0 deletions include/SPTK/utils/misc_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,23 @@ bool ComputeFirstOrderRegressionCoefficients(int n,
bool ComputeSecondOrderRegressionCoefficients(
int n, std::vector<double>* coefficients);

/**
* Compute lower and upper bounds.
*
* @param[in] confidence_level Confidence level.
* @param[in] num_data Number of data.
* @param[in] mean Mean vector.
* @param[in] variance Variance vector.
* @param[out] lower_bound Lower bound.
* @param[out] upper_bound Upper bound.
* @return True on success, false on failure.
*/
bool ComputeLowerAndUpperBounds(double confidence_level, int num_data,
const std::vector<double> mean,
const std::vector<double> variance,
std::vector<double>* lower_bound,
std::vector<double>* upper_bound);

} // namespace sptk

#endif // SPTK_UTILS_MISC_UTILS_H_
2 changes: 1 addition & 1 deletion src/main/imsvq.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void PrintUsage(std::ostream* stream) {
*/
int main(int argc, char* argv[]) {
int num_order(kDefaultNumOrder);
std::vector<char*> codebook_vectors_file;
std::vector<const char*> codebook_vectors_file;

for (;;) {
const int option_char(getopt_long(argc, argv, "l:m:s:h", NULL, NULL));
Expand Down
2 changes: 1 addition & 1 deletion src/main/msvq.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ void PrintUsage(std::ostream* stream) {
*/
int main(int argc, char* argv[]) {
int num_order(kDefaultNumOrder);
std::vector<char*> codebook_vectors_file;
std::vector<const char*> codebook_vectors_file;

for (;;) {
const int option_char(getopt_long(argc, argv, "l:m:s:h", NULL, NULL));
Expand Down
118 changes: 89 additions & 29 deletions src/main/vstat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ enum OutputFormats {
kCorrelation,
kPrecision,
kMeanAndLowerAndUpperBounds,
kForMerge,
kSufficientStatistics,
kNumOutputFormats
};

Expand Down Expand Up @@ -68,7 +68,8 @@ void PrintUsage(std::ostream* stream) {
*stream << " 4 (correlation)" << std::endl;
*stream << " 5 (precision)" << std::endl;
*stream << " 6 (mean and lower/upper bounds)" << std::endl;
*stream << " 7 (statistics for merge)" << std::endl;
*stream << " 7 (sufficient statistics)" << std::endl;
*stream << " -s s : statistics file (string)[" << std::setw(5) << std::right << "N/A" << "]" << std::endl; // NOLINT
*stream << " -d : output only diagonal ( bool)[" << std::setw(5) << std::right << sptk::ConvertBooleanToString(kDefaultOutputOnlyDiagonalElementsFlag) << "]" << std::endl; // NOLINT
*stream << " elements" << std::endl;
*stream << " -e : use a neumerically ( bool)[" << std::setw(5) << std::right << sptk::ConvertBooleanToString(kDefaultNeumericallyStableFlag) << "]" << std::endl; // NOLINT
Expand All @@ -80,6 +81,7 @@ void PrintUsage(std::ostream* stream) {
*stream << " statistics (double)" << std::endl;
*stream << " notice:" << std::endl;
*stream << " -d is valid only if o = 0 or o = 2" << std::endl;
*stream << " -s can be specified multiple times" << std::endl;
*stream << std::endl;
*stream << " SPTK: version " << sptk::kVersion << std::endl;
*stream << std::endl;
Expand Down Expand Up @@ -160,37 +162,27 @@ bool OutputStatistics(const sptk::StatisticsAccumulation& accumulation,
}

if (kMeanAndLowerAndUpperBounds == output_format) {
int num_vector;
if (!accumulation.GetNumData(buffer, &num_vector)) {
return false;
}

const int degrees_of_freedom(num_vector - 1);
if (0 == degrees_of_freedom) {
return false;
}
double t;
if (!sptk::ComputePercentagePointOfTDistribution(
0.5 * (1.0 - confidence_level / 100.0), degrees_of_freedom, &t)) {
int num_data;
if (!accumulation.GetNumData(buffer, &num_data)) {
return false;
}
std::vector<double> mean(vector_length);
std::vector<double> variance(vector_length);
if (!accumulation.GetMean(buffer, &mean)) {
return false;
}
std::vector<double> variance(vector_length);
if (!accumulation.GetDiagonalCovariance(buffer, &variance)) {
return false;
}

const double inverse_degrees_of_freedom(1.0 / degrees_of_freedom);
std::vector<double> lower_bound(vector_length);
std::vector<double> upper_bound(vector_length);
for (int i(0); i < vector_length; ++i) {
const double error(std::sqrt(variance[i] * inverse_degrees_of_freedom));
lower_bound[i] = mean[i] - t * error;
upper_bound[i] = mean[i] + t * error;
if (!sptk::ComputeLowerAndUpperBounds(confidence_level, num_data, mean,
variance, &lower_bound,
&upper_bound)) {
return false;
}

if (!sptk::WriteStream(0, vector_length, lower_bound, &std::cout, NULL)) {
return false;
}
Expand All @@ -199,12 +191,12 @@ bool OutputStatistics(const sptk::StatisticsAccumulation& accumulation,
}
}

if (kForMerge == output_format) {
int zero;
if (!accumulation.GetNumData(buffer, &zero)) {
if (kSufficientStatistics == output_format) {
int num_data;
if (!accumulation.GetNumData(buffer, &num_data)) {
return false;
}
if (!sptk::WriteStream(static_cast<double>(zero), &std::cout)) {
if (!sptk::WriteStream(static_cast<double>(num_data), &std::cout)) {
return false;
}

Expand Down Expand Up @@ -250,7 +242,9 @@ bool OutputStatistics(const sptk::StatisticsAccumulation& accumulation,
* \arg @c 4 correlation
* \arg @c 5 precision
* \arg @c 6 mean and lower/upper bounds
* \arg @c 7 stats for merge
* \arg @c 7 sufficient statistics
* - @b -s @e str
* - statistics file
* - @b -d
* - output only diagonal elements
* - @b -e
Expand Down Expand Up @@ -356,6 +350,14 @@ bool OutputStatistics(const sptk::StatisticsAccumulation& accumulation,
* # 2, 7
* @endcode
*
* @code{.sh}
* cat data1.d data2.d | vstat -o 7 > data12.stat
* cat data3.d data4.d | vstat -o 7 > data34.stat
* echo | vstat -s data12.stat -s data34.stat -o 1 > data.mean
* # equivalent to the following line
* cat data?.d | vstat -o 1 > data.mean
* @endcode
*
* @param[in] argc Number of arguments.
* @param[in] argv Argument vector.
* @return 0 on success, 1 on failure.
Expand All @@ -365,11 +367,13 @@ int main(int argc, char* argv[]) {
int output_interval(kMagicNumberForEndOfFile);
double confidence_level(kDefaultConfidenceLevel);
OutputFormats output_format(kDefaultOutputFormat);
std::vector<const char*> statistics_file;
bool outputs_only_diagonal_elements(kDefaultOutputOnlyDiagonalElementsFlag);
bool neumerically_stable(kDefaultNeumericallyStableFlag);

for (;;) {
const int option_char(getopt_long(argc, argv, "l:m:t:c:o:deh", NULL, NULL));
const int option_char(
getopt_long(argc, argv, "l:m:t:c:o:s:deh", NULL, NULL));
if (-1 == option_char) break;

switch (option_char) {
Expand Down Expand Up @@ -433,6 +437,10 @@ int main(int argc, char* argv[]) {
output_format = static_cast<OutputFormats>(tmp);
break;
}
case 's': {
statistics_file.push_back(optarg);
break;
}
case 'd': {
outputs_only_diagonal_elements = true;
break;
Expand All @@ -452,6 +460,13 @@ int main(int argc, char* argv[]) {
}
}

if (kMagicNumberForEndOfFile != output_interval && !statistics_file.empty()) {
std::ostringstream error_message;
error_message << "Cannot specify -t option and -s option at the same time";
sptk::PrintErrorMessage("vstat", error_message);
return 1;
}

const int num_input_files(argc - optind);
if (1 < num_input_files) {
std::ostringstream error_message;
Expand Down Expand Up @@ -501,6 +516,51 @@ int main(int argc, char* argv[]) {
return 1;
}

for (const char* file : statistics_file) {
std::ifstream ifs;
ifs.open(file, std::ios::in | std::ios::binary);
if (ifs.fail()) {
std::ostringstream error_message;
error_message << "Cannot open file " << file;
sptk::PrintErrorMessage("vstat", error_message);
return 1;
}
std::istream& input_stream(ifs);

double num_data;
if (!sptk::ReadStream(&num_data, &input_stream)) {
std::ostringstream error_message;
error_message << "Failed to read statistics (zeroth order)";
sptk::PrintErrorMessage("vstat", error_message);
return 1;
}

std::vector<double> first(vector_length);
if (!sptk::ReadStream(false, 0, 0, vector_length, &first, &input_stream,
NULL)) {
std::ostringstream error_message;
error_message << "Failed to read statistics (first order)";
sptk::PrintErrorMessage("vstat", error_message);
return 1;
}

sptk::SymmetricMatrix second(vector_length);
if (!sptk::ReadStream(&second, &input_stream)) {
std::ostringstream error_message;
error_message << "Failed to read statistics (second order)";
sptk::PrintErrorMessage("vstat", error_message);
return 1;
}

if (!accumulation.Merge(static_cast<int>(num_data), first, second,
&buffer)) {
std::ostringstream error_message;
error_message << "Failed to merge statistics";
sptk::PrintErrorMessage("vstat_merge", error_message);
return 1;
}
}

std::vector<double> data(vector_length);
for (int vector_index(1);
sptk::ReadStream(false, 0, 0, vector_length, &data, &input_stream, NULL);
Expand All @@ -525,15 +585,15 @@ int main(int argc, char* argv[]) {
}
}

int num_actual_vector;
if (!accumulation.GetNumData(buffer, &num_actual_vector)) {
int num_data;
if (!accumulation.GetNumData(buffer, &num_data)) {
std::ostringstream error_message;
error_message << "Failed to accumulate statistics";
sptk::PrintErrorMessage("vstat", error_message);
return 1;
}

if (kMagicNumberForEndOfFile == output_interval && 0 < num_actual_vector) {
if (kMagicNumberForEndOfFile == output_interval && 0 < num_data) {
if (!OutputStatistics(accumulation, buffer, vector_length, output_format,
confidence_level, outputs_only_diagonal_elements)) {
std::ostringstream error_message;
Expand Down
69 changes: 69 additions & 0 deletions src/math/statistics_accumulation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -329,4 +329,73 @@ bool StatisticsAccumulation::Run(const std::vector<double>& data,
return true;
}

bool StatisticsAccumulation::Merge(
int num_data, const std::vector<double>& first,
const SymmetricMatrix& second,
StatisticsAccumulation::Buffer* buffer) const {
if (!is_valid_ || NULL == buffer) {
return false;
}

if (0 == buffer->zeroth_order_statistics_) {
buffer->zeroth_order_statistics_ = num_data;
buffer->first_order_statistics_ = first;
buffer->second_order_statistics_ = second;
return true;
}

if (num_data <= 0 || first.size() != buffer->first_order_statistics_.size() ||
second.GetNumDimension() !=
buffer->second_order_statistics_.GetNumDimension()) {
return false;
}

// Save the current statistics.
const int m(buffer->zeroth_order_statistics_);
std::vector<double> prev_first_order_statistics(
buffer->first_order_statistics_);

const int n(num_data);
const int mn(m + n);
buffer->zeroth_order_statistics_ = mn;

if (1 <= num_statistics_order_) {
if (numerically_stable_) {
const double a(static_cast<double>(n) / mn);
const double b(static_cast<double>(m) / mn);
std::transform(first.begin(), first.end(),
buffer->first_order_statistics_.begin(),
buffer->first_order_statistics_.begin(),
[a, b](double x, double y) { return a * x + b * y; });
} else {
std::transform(
first.begin(), first.end(), buffer->first_order_statistics_.begin(),
buffer->first_order_statistics_.begin(), std::plus<double>());
}
}

if (2 <= num_statistics_order_) {
if (numerically_stable_) {
const double* mu1(&(prev_first_order_statistics[0]));
const double* mu2(&(first[0]));
const double c(static_cast<double>(m * n) / mn);
for (int i(0); i <= num_order_; ++i) {
for (int j(diagonal_ ? i : 0); j <= i; ++j) {
buffer->second_order_statistics_[i][j] +=
second[i][j] + c * ((mu1[i] * mu1[j] + mu2[i] * mu2[j]) -
(mu1[i] * mu2[j] + mu2[i] * mu1[j]));
}
}
} else {
for (int i(0); i <= num_order_; ++i) {
for (int j(diagonal_ ? i : 0); j <= i; ++j) {
buffer->second_order_statistics_[i][j] += second[i][j];
}
}
}
}

return true;
}

} // namespace sptk
Loading

0 comments on commit 8163414

Please sign in to comment.