Skip to content

Commit

Permalink
issue #32 - date anonymizer
Browse files Browse the repository at this point in the history
  • Loading branch information
pounard committed Mar 14, 2024
1 parent c6e9122 commit 016d3b3
Show file tree
Hide file tree
Showing 3 changed files with 420 additions and 1 deletion.
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"php": ">=8.1",
"doctrine/doctrine-bundle": "^2.10.0",
"doctrine/orm": "^2.15",
"makinacorpus/query-builder": "^0.3.1",
"makinacorpus/query-builder": "^1.1.2",
"symfony/config": "^6.0|^7.0",
"symfony/console": "^6.0|^7.0",
"symfony/dependency-injection": "^6.0|^7.0",
Expand Down
199 changes: 199 additions & 0 deletions src/Anonymization/Anonymizer/Core/DateAnonymizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
<?php

declare(strict_types=1);

namespace MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core;

use Doctrine\DBAL\Platforms\AbstractMySQLPlatform;
use Doctrine\DBAL\Platforms\SqlitePlatform;
use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\AbstractAnonymizer;
use MakinaCorpus\DbToolsBundle\Attribute\AsAnonymizer;
use MakinaCorpus\QueryBuilder\Query\Update;

#[AsAnonymizer(
name: 'date',
pack: 'core',
description: <<<TXT
Anonymize a column by changing the date it contains.
You can either choose a 'min' and a 'max' date, case in which a random date will
be selected between these bounds, or alternatively set a 'delta' which must be
a valid date interval string (e.g. "1 week", "1 day 10 hours", ...).
You should set the 'format' (default: 'datetime') value, this anonymizator can
work with 'datetime' or 'date' formats.
TXT
)]
class DateAnonymizer extends AbstractAnonymizer
{
/**
* @inheritdoc
*/
public function anonymize(Update $update): void
{
$format = $this->options->get('format', 'datetime');
if (!\in_array($format, ['date', 'datetime'])) {
throw new \InvalidArgumentException(\sprintf("'format' value is invalid, expected 'date' or 'datetime', got '%s'.", $format));
}

// @todo Propose getInt(), getDate(), getDateInterval() ... on Option class.
$min = $max = null;
if ($min = $this->options->get('min')) {
$min = $this->parseDate($min, 'min');
}
if ($max = $this->options->get('max')) {
$max = $this->parseDate($max, 'max');
}
if (($min && !$max) || ($max && !$min)) {
throw new \InvalidArgumentException("You must specify both 'min' and 'max' boundaries.");
}
if ($min && $max) {

Check failure on line 48 in src/Anonymization/Anonymizer/Core/DateAnonymizer.php

View workflow job for this annotation

GitHub Actions / Static Analysis (8.2)

Right side of && is always true.
if ($this->options->get('delta')) {
throw new \InvalidArgumentException("'delta' option cannot be specified if 'min' and 'max' are in use.");
}

$this->anonmizeWithDateRange($update, $format, $min, $max);

return;
}

if ($delta = $this->options->get('delta')) {
try {
$delta = new \DateInterval($delta);
} catch (\Throwable) {
if (!$delta = \DateInterval::createFromDateString($delta)) {
throw new \InvalidArgumentException("'delta' option interval string format is invalid.");
}
}

$this->anonmizeWithDelta($update, $format, $delta);

return;
}

throw new \InvalidArgumentException("Providing either the 'delta' option, or 'min' and 'max' options is required.");
}

private function anonmizeWithDateRange(Update $update, string $format, \DateTimeImmutable $min, \DateTimeImmutable $max): void
{
$diff = $max->diff($min, true);

if ('date' === $format) {
// Compute a diff in number of days.
$unit = 'day';
$delta = $diff->d + $diff->m * 30 + $diff->y * 360;
} else if (68 < $diff->y) {
// We hit UNIX timestamp maximum integer limit, and may cause
// int overflow or other kind of crashes server side.
$unit = 'hour';
$delta = $diff->h + $diff->d * 24 + $diff->m * 720 + $diff->y * 8640;
} else {
// We hit UNIX timestamp maximum integer limit, and may cause
// int overflow or other kind of crashes server side.
$unit = 'hour';
$delta = $diff->h + $diff->d * 24 + $diff->m * 720 + $diff->y * 8640;
}

// Cut in half to compute middle date.
$delta /= 2;
$middleDate = $min->add(\DateInterval::createFromDateString(\sprintf("%d %s", $delta, $unit)));

$this->anonymizeWithDeltaAndReferenceDate($update, $format, $middleDate, $delta, $unit);
}

private function anonmizeWithDelta(Update $update, string $format, \DateInterval $delta): void
{
// @todo I wish for a better alternative...
// query-builder can deal with \DateInterval by- itself, but we are
// randomizing values here, so we need to be able to apply a single
// figure random delta, in order to be able to use SQL random at the
// right place, otherwise the algorithm would be very complex..
if ('date' !== $format && $delta->s) {
// Please, never use seconds...
$delta = $delta->s + $delta->i * 60 + $delta->h * 3600 + $delta->d * 86400 + $delta->m * 2592000 + $delta->y * 31104000;
$unit = 'second';
} else if ('date' !== $format && $delta->i) {
$delta = $delta->i + $delta->h * 60 + $delta->d * 1440 + $delta->m * 43200 + $delta->y * 518400;
$unit = 'minute';
} else if ('date' !== $format && $delta->h) {
$delta = $delta->h + $delta->d * 24 + $delta->m * 720 + $delta->y * 8640;
$unit = 'hour';
} else if ($delta->d) {
$delta = $delta->d + $delta->m * 30 + $delta->y * 360;
$unit = 'day';
} else if ($delta->m) {
$delta = $delta->m + $delta->y * 12;
$unit = 'month';
} else if ($delta->y) {
$delta = $delta->y;
$unit = 'year';
} else {
throw new \InvalidArgumentException("'delta' option interval is empty.");
}

$expr = $update->expression();
$columnExpr = $expr->column($this->columnName, $this->tableName);

$this->anonymizeWithDeltaAndReferenceDate($update, $format, $columnExpr, $delta, $unit);
}

private function anonymizeWithDeltaAndReferenceDate(Update $update, string $format, mixed $referenceDate, int $delta, string $unit): void
{
$expr = $update->expression();

$platform = $this->connection->getDatabasePlatform();
if ($platform instanceof AbstractMySQLPlatform) {
$type = 'date' === $format ? 'date' : 'datetime';
} else {
$type = 'date' === $format ? 'date' : 'timestamp';
}

$dateAddExpr = $expr->dateAdd(
$referenceDate,
$expr->intervalUnit(
// This additional cast is necessary for SQLite only because it
// will mix up int addition and string concatenation, causing
// the interval string to be malformed. For all other vendors,
// it's a no-op.
$expr->cast(
$this->getRandomIntExpression(
$delta,
0 - $delta,
),
'text'
),
$unit
),
);

if ($platform instanceof SqlitePlatform) {
$update->set(
$this->columnName,
$this->getSetIfNotNullExpression(
$dateAddExpr,
)
);
} else {
$update->set(
$this->columnName,
$this->getSetIfNotNullExpression(
$expr->cast(
$dateAddExpr,
$type,
)
)
);
}
}

private function parseDate(string $value, string $option): \DateTimeImmutable
{
try {
return new \DateTimeImmutable($value);
} catch (\Throwable) {
throw new \InvalidArgumentException(\sprintf(
"'%s' value is invalid, expected a valid date time format, got '%s'",
$option,
$value,
));
}
}
}
Loading

0 comments on commit 016d3b3

Please sign in to comment.