Ensured only pending visits are imported when processing a short URL which already has imported visits

This commit is contained in:
Alejandro Celaya 2021-04-11 19:56:00 +02:00
parent 09414a8834
commit 9a78d1585d
7 changed files with 49 additions and 27 deletions

View File

@ -7,6 +7,8 @@ namespace Shlinkio\Shlink\Core\Entity;
use Cake\Chronos\Chronos;
use Doctrine\Common\Collections\ArrayCollection;
use Doctrine\Common\Collections\Collection;
use Doctrine\Common\Collections\Criteria;
use Doctrine\Common\Collections\Selectable;
use Shlinkio\Shlink\Common\Entity\AbstractEntity;
use Shlinkio\Shlink\Core\Exception\ShortCodeCannotBeRegeneratedException;
use Shlinkio\Shlink\Core\Model\ShortUrlEdit;
@ -164,6 +166,14 @@ class ShortUrl extends AbstractEntity
return count($this->visits);
}
public function importedVisitsCount(): int
{
/** @var Selectable $visits */
$visits = $this->visits;
$criteria = Criteria::create()->where(Criteria::expr()->eq('type', Visit::TYPE_IMPORTED));
return count($visits->matching($criteria));
}
/**
* @param Collection|Visit[] $visits
* @internal

View File

@ -16,6 +16,7 @@ use Shlinkio\Shlink\Importer\Model\ImportedShlinkVisit;
class Visit extends AbstractEntity implements JsonSerializable
{
public const TYPE_VALID_SHORT_URL = 'valid_short_url';
public const TYPE_IMPORTED = 'imported';
public const TYPE_INVALID_SHORT_URL = 'invalid_short_url';
public const TYPE_BASE_URL = 'base_url';
public const TYPE_REGULAR_404 = 'regular_404';
@ -44,9 +45,9 @@ class Visit extends AbstractEntity implements JsonSerializable
return $instance;
}
public static function fromImport(ImportedShlinkVisit $importedVisit, ShortUrl $shortUrl): self
public static function fromImport(ShortUrl $shortUrl, ImportedShlinkVisit $importedVisit): self
{
$instance = new self($shortUrl, self::TYPE_VALID_SHORT_URL);
$instance = new self($shortUrl, self::TYPE_IMPORTED);
$instance->userAgent = $importedVisit->userAgent();
$instance->referer = $importedVisit->referer();
$instance->date = Chronos::instance($importedVisit->date());

View File

@ -51,7 +51,7 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface
$longUrl = $importedUrl->longUrl();
// Skip already imported URLs
if ($shortUrlRepo->importedUrlExists($importedUrl)) {
if ($shortUrlRepo->findOneByImportedUrl($importedUrl) !== null) {
// TODO If the URL exists, allow to merge visits instead of just skipping completely
$io->text(sprintf('%s: <comment>Skipped</comment>', $longUrl));
continue;
@ -74,6 +74,11 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface
}
}
// private function getOrCreateShortUrl(ImportedShlinkUrl $url, bool $importShortCodes): ?ShortUrl
// {
//
// }
private function handleShortCodeUniqueness(
ImportedShlinkUrl $url,
ShortUrl $shortUrl,
@ -101,10 +106,17 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface
private function importVisits(ImportedShlinkUrl $importedUrl, ShortUrl $shortUrl): int
{
// TODO Process only missing visits when possible: $importedUrl->visitsCount();
// If we know the amount of visits that can be imported, import only those left. Import all otherwise.
$importVisitsCount = $importedUrl->visitsCount();
$visitsLeft = $importVisitsCount !== null ? $importVisitsCount - $shortUrl->importedVisitsCount() : null;
$importedVisits = 0;
foreach ($importedUrl->visits() as $importedVisit) {
$this->em->persist(Visit::fromImport($importedVisit, $shortUrl));
if ($visitsLeft !== null && $importedVisits >= $visitsLeft) {
break;
}
$this->em->persist(Visit::fromImport($shortUrl, $importedVisit));
$importedVisits++;
}

View File

@ -264,12 +264,10 @@ class ShortUrlRepository extends EntitySpecificationRepository implements ShortU
return $qb->getQuery()->getOneOrNullResult();
}
public function importedUrlExists(ImportedShlinkUrl $url): bool
public function findOneByImportedUrl(ImportedShlinkUrl $url): ?ShortUrl
{
$qb = $this->getEntityManager()->createQueryBuilder();
$qb->select('COUNT(DISTINCT s.id)')
->from(ShortUrl::class, 's')
->andWhere($qb->expr()->eq('s.importOriginalShortCode', ':shortCode'))
$qb = $this->createQueryBuilder('s');
$qb->andWhere($qb->expr()->eq('s.importOriginalShortCode', ':shortCode'))
->setParameter('shortCode', $url->shortCode())
->andWhere($qb->expr()->eq('s.importSource', ':importSource'))
->setParameter('importSource', $url->source())
@ -277,8 +275,7 @@ class ShortUrlRepository extends EntitySpecificationRepository implements ShortU
$this->whereDomainIs($qb, $url->domain());
$result = (int) $qb->getQuery()->getSingleScalarResult();
return $result > 0;
return $qb->getQuery()->getOneOrNullResult();
}
private function whereDomainIs(QueryBuilder $qb, ?string $domain): void

View File

@ -40,5 +40,5 @@ interface ShortUrlRepositoryInterface extends ObjectRepository, EntitySpecificat
public function findOneMatching(ShortUrlMeta $meta): ?ShortUrl;
public function importedUrlExists(ImportedShlinkUrl $url): bool;
public function findOneByImportedUrl(ImportedShlinkUrl $url): ?ShortUrl;
}

View File

@ -416,7 +416,7 @@ class ShortUrlRepositoryTest extends DatabaseTestCase
}
/** @test */
public function importedShortUrlsAreSearchedAsExpected(): void
public function importedShortUrlsAreFoundWhenExpected(): void
{
$buildImported = static fn (string $shortCode, ?String $domain = null) =>
new ImportedShlinkUrl('', 'foo', [], Chronos::now(), $domain, $shortCode, null);
@ -429,11 +429,11 @@ class ShortUrlRepositoryTest extends DatabaseTestCase
$this->getEntityManager()->flush();
self::assertTrue($this->repo->importedUrlExists($buildImported('my-cool-slug')));
self::assertTrue($this->repo->importedUrlExists($buildImported('another-slug', 'doma.in')));
self::assertFalse($this->repo->importedUrlExists($buildImported('non-existing-slug')));
self::assertFalse($this->repo->importedUrlExists($buildImported('non-existing-slug', 'doma.in')));
self::assertFalse($this->repo->importedUrlExists($buildImported('my-cool-slug', 'doma.in')));
self::assertFalse($this->repo->importedUrlExists($buildImported('another-slug')));
self::assertNotNull($this->repo->findOneByImportedUrl($buildImported('my-cool-slug')));
self::assertNotNull($this->repo->findOneByImportedUrl($buildImported('another-slug', 'doma.in')));
self::assertNull($this->repo->findOneByImportedUrl($buildImported('non-existing-slug')));
self::assertNull($this->repo->findOneByImportedUrl($buildImported('non-existing-slug', 'doma.in')));
self::assertNull($this->repo->findOneByImportedUrl($buildImported('my-cool-slug', 'doma.in')));
self::assertNull($this->repo->findOneByImportedUrl($buildImported('another-slug')));
}
}

View File

@ -64,7 +64,7 @@ class ImportedLinksProcessorTest extends TestCase
];
$expectedCalls = count($urls);
$importedUrlExists = $this->repo->importedUrlExists(Argument::cetera())->willReturn(false);
$importedUrlExists = $this->repo->findOneByImportedUrl(Argument::cetera())->willReturn(null);
$ensureUniqueness = $this->shortCodeHelper->ensureShortCodeUniqueness(Argument::cetera())->willReturn(true);
$persist = $this->em->persist(Argument::type(ShortUrl::class));
@ -88,12 +88,14 @@ class ImportedLinksProcessorTest extends TestCase
];
$contains = fn (string $needle) => fn (string $text) => str_contains($text, $needle);
$importedUrlExists = $this->repo->importedUrlExists(Argument::cetera())->will(function (array $args): bool {
/** @var ImportedShlinkUrl $url */
[$url] = $args;
$importedUrlExists = $this->repo->findOneByImportedUrl(Argument::cetera())->will(
function (array $args): ?ShortUrl {
/** @var ImportedShlinkUrl $url */
[$url] = $args;
return contains(['foo', 'baz2', 'baz3'], $url->longUrl());
});
return contains(['foo', 'baz2', 'baz3'], $url->longUrl()) ? ShortUrl::fromImport($url, true) : null;
},
);
$ensureUniqueness = $this->shortCodeHelper->ensureShortCodeUniqueness(Argument::cetera())->willReturn(true);
$persist = $this->em->persist(Argument::type(ShortUrl::class));
@ -118,7 +120,7 @@ class ImportedLinksProcessorTest extends TestCase
];
$contains = fn (string $needle) => fn (string $text) => str_contains($text, $needle);
$importedUrlExists = $this->repo->importedUrlExists(Argument::cetera())->willReturn(false);
$importedUrlExists = $this->repo->findOneByImportedUrl(Argument::cetera())->willReturn(null);
$failingEnsureUniqueness = $this->shortCodeHelper->ensureShortCodeUniqueness(
Argument::any(),
true,