Created migration which parses existing IP addresses, generating hashes and droping already used IPs

This commit is contained in:
Alejandro Celaya 2018-09-13 23:50:09 +02:00
parent 7808f6d182
commit a01031303f
7 changed files with 169 additions and 24 deletions

View File

@ -14,6 +14,7 @@
"require": {
"php": "^7.1",
"ext-json": "*",
"ext-pdo": "*",
"acelaya/ze-content-based-error-handler": "^2.2",
"cocur/slugify": "^3.0",
"doctrine/cache": "^1.6",

View File

@ -0,0 +1,94 @@
<?php
declare(strict_types=1);
namespace ShlinkMigrations;
use Doctrine\DBAL\DBALException;
use Doctrine\DBAL\Schema\Schema;
use Doctrine\DBAL\Schema\SchemaException;
use Doctrine\DBAL\Types\Type;
use Doctrine\Migrations\AbstractMigration;
use Shlinkio\Shlink\Common\Exception\WrongIpException;
use Shlinkio\Shlink\Common\Util\IpAddress;
/**
* Auto-generated Migration: Please modify to your needs!
*/
final class Version20180913205455 extends AbstractMigration
{
/**
* @param Schema $schema
* @throws SchemaException
*/
public function up(Schema $schema): void
{
$visits = $schema->getTable('visits');
if ($visits->hasColumn('remote_addr_hash')) {
return;
}
$visits->addColumn('remote_addr_hash', Type::STRING, [
'notnull' => false,
'length' => 128,
]);
}
/**
* @param Schema $schema
* @throws DBALException
*/
public function postUp(Schema $schema)
{
$qb = $this->connection->createQueryBuilder();
$qb->select('id', 'remote_addr', 'visit_location_id')
->from('visits');
$st = $this->connection->executeQuery($qb->getSQL());
$qb = $this->connection->createQueryBuilder();
$qb->update('visits', 'v')
->set('v.remote_addr_hash', ':hash')
->set('v.remote_addr', ':obfuscatedAddr')
->where('v.id=:id');
while ($row = $st->fetch(\PDO::FETCH_ASSOC)) {
$addr = $row['remote_addr'] ?? null;
if ($addr === null) {
continue;
}
$qb->setParameters([
'id' => $row['id'],
'hash' => \hash('sha256', $addr),
'obfuscatedAddr' => $this->determineAddress((string) $addr, $row),
])->execute();
}
}
private function determineAddress(string $addr, array $row): ?string
{
// When the visit has already been located, drop the IP address
if (isset($row['visit_location_id'])) {
return null;
}
if ($addr === IpAddress::LOCALHOST) {
return $addr;
}
try {
return (string) IpAddress::fromString($addr)->getObfuscatedCopy();
} catch (WrongIpException $e) {
return null;
}
}
/**
* @param Schema $schema
* @throws SchemaException
*/
public function down(Schema $schema): void
{
$visits = $schema->getTable('visits');
$visits->dropColumn('remote_addr_hash');
}
}

View File

@ -2,17 +2,25 @@
"type": "object",
"properties": {
"referer": {
"type": "string"
"type": "string",
"description": "The origin from which the visit was performed"
},
"date": {
"type": "string",
"format": "date-time"
"format": "date-time",
"description": "The date in which the visit was performed"
},
"remoteAddr": {
"type": "string"
"type": "string",
"description": "This value is deprecated and will always be null",
"deprecated": true
},
"userAgent": {
"type": "string"
"type": "string",
"description": "The user agent from which the visit was performed"
},
"visitLocation": {
"$ref": "./VisitLocation.json"
}
}
}

View File

@ -0,0 +1,26 @@
{
"type": "object",
"properties": {
"cityName": {
"type": "string"
},
"countryCode": {
"type": "string"
},
"countryName": {
"type": "string"
},
"latitude": {
"type": "string"
},
"longitude": {
"type": "string"
},
"regionName": {
"type": "string"
},
"timezone": {
"type": "string"
}
}
}

View File

@ -70,20 +70,28 @@
{
"referer": "https://twitter.com",
"date": "2015-08-20T05:05:03+04:00",
"remoteAddr": "10.20.30.40",
"userAgent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0 Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0"
"userAgent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0 Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0",
"visitLocation": null
},
{
"referer": "https://t.co",
"date": "2015-08-20T05:05:03+04:00",
"remoteAddr": "11.22.33.44",
"userAgent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
"userAgent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
"visitLocation": {
"cityName": "Cupertino",
"countryCode": "US",
"countryName": "United States",
"latitude": "37.3042",
"longitude": "-122.0946",
"regionName": "California",
"timezone": "America/Los_Angeles"
}
},
{
"referer": null,
"date": "2015-08-20T05:05:03+04:00",
"remoteAddr": "110.220.5.6",
"userAgent": "some_web_crawler/1.4"
"userAgent": "some_web_crawler/1.4",
"visitLocation": null
}
]
}

View File

@ -27,13 +27,13 @@ final class IpAddress
* @var string
*/
private $fourthOctet;
/**
* @var bool
*/
private $isLocalhost;
private function __construct()
private function __construct(string $firstOctet, string $secondOctet, string $thirdOctet, string $fourthOctet)
{
$this->firstOctet = $firstOctet;
$this->secondOctet = $secondOctet;
$this->thirdOctet = $thirdOctet;
$this->fourthOctet = $fourthOctet;
}
/**
@ -49,17 +49,17 @@ final class IpAddress
throw WrongIpException::fromIpAddress($address);
}
$instance = new self();
$instance->isLocalhost = $address === self::LOCALHOST;
[$instance->firstOctet, $instance->secondOctet, $instance->thirdOctet, $instance->fourthOctet] = $parts;
return $instance;
return new self(...$parts);
}
public function getObfuscatedCopy(): self
{
$copy = clone $this;
$copy->fourthOctet = $this->isLocalhost ? $this->fourthOctet : self::OBFUSCATED_OCTET;
return $copy;
return new self(
$this->firstOctet,
$this->secondOctet,
$this->thirdOctet,
self::OBFUSCATED_OCTET
);
}
public function __toString(): string

View File

@ -35,6 +35,7 @@ class Visit extends AbstractEntity implements \JsonSerializable
private $remoteAddr;
/**
* @var string
* @ORM\Column(type="string", length=256, name="remote_addr_hash", nullable=true)
*/
private $remoteAddrHash;
/**
@ -108,8 +109,9 @@ class Visit extends AbstractEntity implements \JsonSerializable
private function obfuscateAddress(?string $address): ?string
{
if ($address === null) {
return null;
// Localhost addresses do not need to be obfuscated
if ($address === null || $address === IpAddress::LOCALHOST) {
return $address;
}
try {
@ -124,6 +126,12 @@ class Visit extends AbstractEntity implements \JsonSerializable
return $address ? \hash('sha256', $address) : null;
}
public function resetObfuscatedAddr(): self
{
$this->remoteAddr = null;
return $this;
}
public function getUserAgent(): string
{
return $this->userAgent;