From a01031303fb513d13edbea26e579b8697e24c12b Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Thu, 13 Sep 2018 23:50:09 +0200 Subject: [PATCH] Created migration which parses existing IP addresses, generating hashes and droping already used IPs --- composer.json | 1 + data/migrations/Version20180913205455.php | 94 +++++++++++++++++++ docs/swagger/definitions/Visit.json | 16 +++- docs/swagger/definitions/VisitLocation.json | 26 +++++ .../v1_short-codes_{shortCode}_visits.json | 20 ++-- module/Common/src/Util/IpAddress.php | 24 ++--- module/Core/src/Entity/Visit.php | 12 ++- 7 files changed, 169 insertions(+), 24 deletions(-) create mode 100644 data/migrations/Version20180913205455.php create mode 100644 docs/swagger/definitions/VisitLocation.json diff --git a/composer.json b/composer.json index 9d46a882..6a2c3b14 100644 --- a/composer.json +++ b/composer.json @@ -14,6 +14,7 @@ "require": { "php": "^7.1", "ext-json": "*", + "ext-pdo": "*", "acelaya/ze-content-based-error-handler": "^2.2", "cocur/slugify": "^3.0", "doctrine/cache": "^1.6", diff --git a/data/migrations/Version20180913205455.php b/data/migrations/Version20180913205455.php new file mode 100644 index 00000000..78c07855 --- /dev/null +++ b/data/migrations/Version20180913205455.php @@ -0,0 +1,94 @@ +getTable('visits'); + if ($visits->hasColumn('remote_addr_hash')) { + return; + } + + $visits->addColumn('remote_addr_hash', Type::STRING, [ + 'notnull' => false, + 'length' => 128, + ]); + } + + /** + * @param Schema $schema + * @throws DBALException + */ + public function postUp(Schema $schema) + { + $qb = $this->connection->createQueryBuilder(); + $qb->select('id', 'remote_addr', 'visit_location_id') + ->from('visits'); + $st = $this->connection->executeQuery($qb->getSQL()); + + $qb = $this->connection->createQueryBuilder(); + $qb->update('visits', 'v') + ->set('v.remote_addr_hash', ':hash') + ->set('v.remote_addr', ':obfuscatedAddr') + ->where('v.id=:id'); + + while ($row = $st->fetch(\PDO::FETCH_ASSOC)) { + $addr = $row['remote_addr'] ?? null; + if ($addr === null) { + continue; + } + + $qb->setParameters([ + 'id' => $row['id'], + 'hash' => \hash('sha256', $addr), + 'obfuscatedAddr' => $this->determineAddress((string) $addr, $row), + ])->execute(); + } + } + + private function determineAddress(string $addr, array $row): ?string + { + // When the visit has already been located, drop the IP address + if (isset($row['visit_location_id'])) { + return null; + } + + if ($addr === IpAddress::LOCALHOST) { + return $addr; + } + + try { + return (string) IpAddress::fromString($addr)->getObfuscatedCopy(); + } catch (WrongIpException $e) { + return null; + } + } + + /** + * @param Schema $schema + * @throws SchemaException + */ + public function down(Schema $schema): void + { + $visits = $schema->getTable('visits'); + $visits->dropColumn('remote_addr_hash'); + } +} diff --git a/docs/swagger/definitions/Visit.json b/docs/swagger/definitions/Visit.json index 8e0bc944..ab80ad8a 100644 --- a/docs/swagger/definitions/Visit.json +++ b/docs/swagger/definitions/Visit.json @@ -2,17 +2,25 @@ "type": "object", "properties": { "referer": { - "type": "string" + "type": "string", + "description": "The origin from which the visit was performed" }, "date": { "type": "string", - "format": "date-time" + "format": "date-time", + "description": "The date in which the visit was performed" }, "remoteAddr": { - "type": "string" + "type": "string", + "description": "This value is deprecated and will always be null", + "deprecated": true }, "userAgent": { - "type": "string" + "type": "string", + "description": "The user agent from which the visit was performed" + }, + "visitLocation": { + "$ref": "./VisitLocation.json" } } } diff --git a/docs/swagger/definitions/VisitLocation.json b/docs/swagger/definitions/VisitLocation.json new file mode 100644 index 00000000..e4c9fc90 --- /dev/null +++ b/docs/swagger/definitions/VisitLocation.json @@ -0,0 +1,26 @@ +{ + "type": "object", + "properties": { + "cityName": { + "type": "string" + }, + "countryCode": { + "type": "string" + }, + "countryName": { + "type": "string" + }, + "latitude": { + "type": "string" + }, + "longitude": { + "type": "string" + }, + "regionName": { + "type": "string" + }, + "timezone": { + "type": "string" + } + } +} diff --git a/docs/swagger/paths/v1_short-codes_{shortCode}_visits.json b/docs/swagger/paths/v1_short-codes_{shortCode}_visits.json index f431fc00..ec8d372e 100644 --- a/docs/swagger/paths/v1_short-codes_{shortCode}_visits.json +++ b/docs/swagger/paths/v1_short-codes_{shortCode}_visits.json @@ -70,20 +70,28 @@ { "referer": "https://twitter.com", "date": "2015-08-20T05:05:03+04:00", - "remoteAddr": "10.20.30.40", - "userAgent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0 Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0" + "userAgent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0 Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0", + "visitLocation": null }, { "referer": "https://t.co", "date": "2015-08-20T05:05:03+04:00", - "remoteAddr": "11.22.33.44", - "userAgent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36" + "userAgent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36", + "visitLocation": { + "cityName": "Cupertino", + "countryCode": "US", + "countryName": "United States", + "latitude": "37.3042", + "longitude": "-122.0946", + "regionName": "California", + "timezone": "America/Los_Angeles" + } }, { "referer": null, "date": "2015-08-20T05:05:03+04:00", - "remoteAddr": "110.220.5.6", - "userAgent": "some_web_crawler/1.4" + "userAgent": "some_web_crawler/1.4", + "visitLocation": null } ] } diff --git a/module/Common/src/Util/IpAddress.php b/module/Common/src/Util/IpAddress.php index 1671038f..efa6ba9e 100644 --- a/module/Common/src/Util/IpAddress.php +++ b/module/Common/src/Util/IpAddress.php @@ -27,13 +27,13 @@ final class IpAddress * @var string */ private $fourthOctet; - /** - * @var bool - */ - private $isLocalhost; - private function __construct() + private function __construct(string $firstOctet, string $secondOctet, string $thirdOctet, string $fourthOctet) { + $this->firstOctet = $firstOctet; + $this->secondOctet = $secondOctet; + $this->thirdOctet = $thirdOctet; + $this->fourthOctet = $fourthOctet; } /** @@ -49,17 +49,17 @@ final class IpAddress throw WrongIpException::fromIpAddress($address); } - $instance = new self(); - $instance->isLocalhost = $address === self::LOCALHOST; - [$instance->firstOctet, $instance->secondOctet, $instance->thirdOctet, $instance->fourthOctet] = $parts; - return $instance; + return new self(...$parts); } public function getObfuscatedCopy(): self { - $copy = clone $this; - $copy->fourthOctet = $this->isLocalhost ? $this->fourthOctet : self::OBFUSCATED_OCTET; - return $copy; + return new self( + $this->firstOctet, + $this->secondOctet, + $this->thirdOctet, + self::OBFUSCATED_OCTET + ); } public function __toString(): string diff --git a/module/Core/src/Entity/Visit.php b/module/Core/src/Entity/Visit.php index fd918587..5e2cca75 100644 --- a/module/Core/src/Entity/Visit.php +++ b/module/Core/src/Entity/Visit.php @@ -35,6 +35,7 @@ class Visit extends AbstractEntity implements \JsonSerializable private $remoteAddr; /** * @var string + * @ORM\Column(type="string", length=256, name="remote_addr_hash", nullable=true) */ private $remoteAddrHash; /** @@ -108,8 +109,9 @@ class Visit extends AbstractEntity implements \JsonSerializable private function obfuscateAddress(?string $address): ?string { - if ($address === null) { - return null; + // Localhost addresses do not need to be obfuscated + if ($address === null || $address === IpAddress::LOCALHOST) { + return $address; } try { @@ -124,6 +126,12 @@ class Visit extends AbstractEntity implements \JsonSerializable return $address ? \hash('sha256', $address) : null; } + public function resetObfuscatedAddr(): self + { + $this->remoteAddr = null; + return $this; + } + public function getUserAgent(): string { return $this->userAgent;