Added logic to import orphan visits

This commit is contained in:
Alejandro Celaya 2022-12-04 20:35:38 +01:00
parent 4b66aaba5c
commit 55c9773a02
11 changed files with 118 additions and 29 deletions

View File

@ -5,6 +5,7 @@ declare(strict_types=1);
namespace Shlinkio\Shlink\Core;
use Cake\Chronos\Chronos;
use Cake\Chronos\ChronosInterface;
use DateTimeInterface;
use Doctrine\ORM\Mapping\Builder\FieldBuilder;
use Jaybizzle\CrawlerDetect\CrawlerDetect;
@ -35,7 +36,7 @@ function generateRandomShortCode(int $length): string
function parseDateFromQuery(array $query, string $dateName): ?Chronos
{
return normalizeDate(empty($query[$dateName] ?? null) ? null : Chronos::parse($query[$dateName]));
return normalizeOptionalDate(empty($query[$dateName] ?? null) ? null : Chronos::parse($query[$dateName]));
}
function parseDateRangeFromQuery(array $query, string $startDateName, string $endDateName): DateRange
@ -46,7 +47,10 @@ function parseDateRangeFromQuery(array $query, string $startDateName, string $en
return buildDateRange($startDate, $endDate);
}
function normalizeDate(string|DateTimeInterface|Chronos|null $date): ?Chronos
/**
* @return ($date is null ? null : Chronos)
*/
function normalizeOptionalDate(string|DateTimeInterface|ChronosInterface|null $date): ?Chronos
{
$parsedDate = match (true) {
$date === null || $date instanceof Chronos => $date,
@ -57,6 +61,11 @@ function normalizeDate(string|DateTimeInterface|Chronos|null $date): ?Chronos
return $parsedDate?->setTimezone(date_default_timezone_get());
}
function normalizeDate(string|DateTimeInterface|ChronosInterface $date): Chronos
{
return normalizeOptionalDate($date);
}
function getOptionalIntFromInputFilter(InputFilter $inputFilter, string $fieldName): ?int
{
$value = $inputFilter->getValue($fieldName);

View File

@ -11,7 +11,10 @@ use Shlinkio\Shlink\Core\ShortUrl\Helper\ShortCodeUniquenessHelperInterface;
use Shlinkio\Shlink\Core\ShortUrl\Repository\ShortUrlRepositoryInterface;
use Shlinkio\Shlink\Core\ShortUrl\Resolver\ShortUrlRelationResolverInterface;
use Shlinkio\Shlink\Core\Util\DoctrineBatchHelperInterface;
use Shlinkio\Shlink\Core\Visit\Entity\Visit;
use Shlinkio\Shlink\Core\Visit\Repository\VisitRepositoryInterface;
use Shlinkio\Shlink\Importer\ImportedLinksProcessorInterface;
use Shlinkio\Shlink\Importer\Model\ImportedShlinkOrphanVisit;
use Shlinkio\Shlink\Importer\Model\ImportedShlinkUrl;
use Shlinkio\Shlink\Importer\Model\ImportResult;
use Shlinkio\Shlink\Importer\Params\ImportParams;
@ -20,6 +23,7 @@ use Symfony\Component\Console\Style\OutputStyle;
use Symfony\Component\Console\Style\StyleInterface;
use Throwable;
use function Shlinkio\Shlink\Core\normalizeDate;
use function sprintf;
class ImportedLinksProcessor implements ImportedLinksProcessorInterface
@ -36,12 +40,27 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface
}
public function process(StyleInterface $io, ImportResult $result, ImportParams $params): void
{
$io->title('Importing short URLs');
$this->importShortUrls($io, $result->shlinkUrls, $params);
if ($params->importOrphanVisits) {
$io->title('Importing orphan visits');
$this->importOrphanVisits($io, $result->orphanVisits);
}
$io->success('Data properly imported!');
}
/**
* @param iterable<ImportedShlinkUrl> $shlinkUrls
*/
private function importShortUrls(StyleInterface $io, iterable $shlinkUrls, ImportParams $params): void
{
$importShortCodes = $params->importShortCodes;
$source = $params->source;
$iterable = $this->batchHelper->wrapIterable($result->shlinkUrls, $source === ImportSource::SHLINK ? 10 : 100);
$iterable = $this->batchHelper->wrapIterable($shlinkUrls, $source === ImportSource::SHLINK ? 10 : 100);
/** @var ImportedShlinkUrl $importedUrl */
foreach ($iterable as $importedUrl) {
$skipOnShortCodeConflict = static fn (): bool => $io->choice(sprintf(
'Failed to import URL "%s" because its short-code "%s" is already in use. Do you want to generate '
@ -105,4 +124,29 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface
return $this->shortCodeHelper->ensureShortCodeUniqueness($shortUrl, false);
}
/**
* @param iterable<ImportedShlinkOrphanVisit> $orphanVisits
*/
private function importOrphanVisits(StyleInterface $io, iterable $orphanVisits): void
{
$iterable = $this->batchHelper->wrapIterable($orphanVisits, 100);
/** @var VisitRepositoryInterface $visitRepo */
$visitRepo = $this->em->getRepository(Visit::class);
$mostRecentOrphanVisit = $visitRepo->findMostRecentOrphanVisit();
$importedVisits = 0;
foreach ($iterable as $importedOrphanVisit) {
// Skip visits which are older than the most recent already imported visit's date
if ($mostRecentOrphanVisit?->getDate()->gte(normalizeDate($importedOrphanVisit->date))) {
continue;
}
$this->em->persist(Visit::fromOrphanImport($importedOrphanVisit));
$importedVisits++;
}
$io->text(sprintf('<info>Imported %s</info> orphan visits.', $importedVisits));
}
}

View File

@ -4,12 +4,12 @@ declare(strict_types=1);
namespace Shlinkio\Shlink\Core\Importer;
use Cake\Chronos\Chronos;
use Doctrine\ORM\EntityManagerInterface;
use Shlinkio\Shlink\Core\ShortUrl\Entity\ShortUrl;
use Shlinkio\Shlink\Core\Visit\Entity\Visit;
use Shlinkio\Shlink\Importer\Model\ImportedShlinkVisit;
use function Shlinkio\Shlink\Core\normalizeDate;
use function sprintf;
final class ShortUrlImporting
@ -38,7 +38,7 @@ final class ShortUrlImporting
$importedVisits = 0;
foreach ($visits as $importedVisit) {
// Skip visits which are older than the most recent already imported visit's date
if ($mostRecentImportedDate?->gte(Chronos::instance($importedVisit->date))) {
if ($mostRecentImportedDate?->gte(normalizeDate($importedVisit->date))) {
continue;
}

View File

@ -25,6 +25,8 @@ use Shlinkio\Shlink\Rest\Entity\ApiKey;
use function count;
use function Shlinkio\Shlink\Core\generateRandomShortCode;
use function Shlinkio\Shlink\Core\normalizeDate;
use function Shlinkio\Shlink\Core\normalizeOptionalDate;
class ShortUrl extends AbstractEntity
{
@ -109,19 +111,11 @@ class ShortUrl extends AbstractEntity
$instance = self::fromMeta(ShortUrlCreation::fromRawData($meta), $relationResolver);
$validSince = $url->meta->validSince;
if ($validSince !== null) {
$instance->validSince = Chronos::instance($validSince);
}
$validUntil = $url->meta->validUntil;
if ($validUntil !== null) {
$instance->validUntil = Chronos::instance($validUntil);
}
$instance->importSource = $url->source->value;
$instance->importOriginalShortCode = $url->shortCode;
$instance->dateCreated = Chronos::instance($url->createdAt);
$instance->validSince = normalizeOptionalDate($url->meta->validSince);
$instance->validUntil = normalizeOptionalDate($url->meta->validUntil);
$instance->dateCreated = normalizeDate($url->createdAt);
return $instance;
}

View File

@ -12,7 +12,7 @@ use Shlinkio\Shlink\Rest\Entity\ApiKey;
use function Shlinkio\Shlink\Core\getOptionalBoolFromInputFilter;
use function Shlinkio\Shlink\Core\getOptionalIntFromInputFilter;
use function Shlinkio\Shlink\Core\normalizeDate;
use function Shlinkio\Shlink\Core\normalizeOptionalDate;
use const Shlinkio\Shlink\DEFAULT_SHORT_CODES_LENGTH;
@ -68,8 +68,8 @@ final class ShortUrlCreation implements TitleResolutionModelInterface
}
$this->longUrl = $inputFilter->getValue(ShortUrlInputFilter::LONG_URL);
$this->validSince = normalizeDate($inputFilter->getValue(ShortUrlInputFilter::VALID_SINCE));
$this->validUntil = normalizeDate($inputFilter->getValue(ShortUrlInputFilter::VALID_UNTIL));
$this->validSince = normalizeOptionalDate($inputFilter->getValue(ShortUrlInputFilter::VALID_SINCE));
$this->validUntil = normalizeOptionalDate($inputFilter->getValue(ShortUrlInputFilter::VALID_UNTIL));
$this->customSlug = $inputFilter->getValue(ShortUrlInputFilter::CUSTOM_SLUG);
$this->maxVisits = getOptionalIntFromInputFilter($inputFilter, ShortUrlInputFilter::MAX_VISITS);
$this->findIfExists = $inputFilter->getValue(ShortUrlInputFilter::FIND_IF_EXISTS);

View File

@ -12,7 +12,7 @@ use Shlinkio\Shlink\Core\ShortUrl\Model\Validation\ShortUrlInputFilter;
use function array_key_exists;
use function Shlinkio\Shlink\Core\getOptionalBoolFromInputFilter;
use function Shlinkio\Shlink\Core\getOptionalIntFromInputFilter;
use function Shlinkio\Shlink\Core\normalizeDate;
use function Shlinkio\Shlink\Core\normalizeOptionalDate;
final class ShortUrlEdition implements TitleResolutionModelInterface
{
@ -69,8 +69,8 @@ final class ShortUrlEdition implements TitleResolutionModelInterface
$this->forwardQueryPropWasProvided = array_key_exists(ShortUrlInputFilter::FORWARD_QUERY, $data);
$this->longUrl = $inputFilter->getValue(ShortUrlInputFilter::LONG_URL);
$this->validSince = normalizeDate($inputFilter->getValue(ShortUrlInputFilter::VALID_SINCE));
$this->validUntil = normalizeDate($inputFilter->getValue(ShortUrlInputFilter::VALID_UNTIL));
$this->validSince = normalizeOptionalDate($inputFilter->getValue(ShortUrlInputFilter::VALID_SINCE));
$this->validUntil = normalizeOptionalDate($inputFilter->getValue(ShortUrlInputFilter::VALID_UNTIL));
$this->maxVisits = getOptionalIntFromInputFilter($inputFilter, ShortUrlInputFilter::MAX_VISITS);
$this->validateUrl = getOptionalBoolFromInputFilter($inputFilter, ShortUrlInputFilter::VALIDATE_URL) ?? false;
$this->tags = $inputFilter->getValue(ShortUrlInputFilter::TAGS);

View File

@ -10,7 +10,7 @@ use Shlinkio\Shlink\Core\Model\Ordering;
use Shlinkio\Shlink\Core\ShortUrl\Model\Validation\ShortUrlsParamsInputFilter;
use function Shlinkio\Shlink\Common\buildDateRange;
use function Shlinkio\Shlink\Core\normalizeDate;
use function Shlinkio\Shlink\Core\normalizeOptionalDate;
final class ShortUrlsParams
{
@ -59,8 +59,8 @@ final class ShortUrlsParams
$this->searchTerm = $inputFilter->getValue(ShortUrlsParamsInputFilter::SEARCH_TERM);
$this->tags = (array) $inputFilter->getValue(ShortUrlsParamsInputFilter::TAGS);
$this->dateRange = buildDateRange(
normalizeDate($inputFilter->getValue(ShortUrlsParamsInputFilter::START_DATE)),
normalizeDate($inputFilter->getValue(ShortUrlsParamsInputFilter::END_DATE)),
normalizeOptionalDate($inputFilter->getValue(ShortUrlsParamsInputFilter::START_DATE)),
normalizeOptionalDate($inputFilter->getValue(ShortUrlsParamsInputFilter::END_DATE)),
);
$this->orderBy = Ordering::fromTuple($inputFilter->getValue(ShortUrlsParamsInputFilter::ORDER_BY));
$this->itemsPerPage = (int) (

View File

@ -6,5 +6,10 @@ namespace Shlinkio\Shlink\Core\Util;
interface DoctrineBatchHelperInterface
{
/**
* @template T
* @param iterable<T> $resultSet
* @return iterable<T>
*/
public function wrapIterable(iterable $resultSet, int $batchSize): iterable;
}

View File

@ -10,12 +10,13 @@ use Shlinkio\Shlink\Common\Entity\AbstractEntity;
use Shlinkio\Shlink\Common\Exception\InvalidArgumentException;
use Shlinkio\Shlink\Common\Util\IpAddress;
use Shlinkio\Shlink\Core\ShortUrl\Entity\ShortUrl;
use Shlinkio\Shlink\Core\Visit\Entity\VisitLocation;
use Shlinkio\Shlink\Core\Visit\Model\Visitor;
use Shlinkio\Shlink\Core\Visit\Model\VisitType;
use Shlinkio\Shlink\Importer\Model\ImportedShlinkOrphanVisit;
use Shlinkio\Shlink\Importer\Model\ImportedShlinkVisit;
use function Shlinkio\Shlink\Core\isCrawler;
use function Shlinkio\Shlink\Core\normalizeDate;
class Visit extends AbstractEntity implements JsonSerializable
{
@ -46,11 +47,30 @@ class Visit extends AbstractEntity implements JsonSerializable
public static function fromImport(ShortUrl $shortUrl, ImportedShlinkVisit $importedVisit): self
{
$instance = new self($shortUrl, VisitType::IMPORTED);
return self::fromImportOrOrphanImport($importedVisit, VisitType::IMPORTED, $shortUrl);
}
public static function fromOrphanImport(ImportedShlinkOrphanVisit $importedVisit): self
{
$instance = self::fromImportOrOrphanImport(
$importedVisit,
VisitType::tryFrom($importedVisit->type) ?? VisitType::IMPORTED,
);
$instance->visitedUrl = $importedVisit->visitedUrl;
return $instance;
}
private static function fromImportOrOrphanImport(
ImportedShlinkVisit|ImportedShlinkOrphanVisit $importedVisit,
VisitType $type,
?ShortUrl $shortUrl = null,
): self {
$instance = new self($shortUrl, $type);
$instance->userAgent = $importedVisit->userAgent;
$instance->potentialBot = isCrawler($instance->userAgent);
$instance->referer = $importedVisit->referer;
$instance->date = Chronos::instance($importedVisit->date);
$instance->date = normalizeDate($importedVisit->date);
$importedLocation = $importedVisit->location;
$instance->visitLocation = $importedLocation !== null ? VisitLocation::fromImport($importedLocation) : null;

View File

@ -286,4 +286,19 @@ class VisitRepository extends EntitySpecificationRepository implements VisitRepo
return $this->getEntityManager()->createNativeQuery($nativeQb->getSQL(), $rsm)->getResult();
}
public function findMostRecentOrphanVisit(): ?Visit
{
$dql = <<<DQL
SELECT v
FROM Shlinkio\Shlink\Core\Visit\Entity\Visit AS v
WHERE v.shortUrl IS NULL
ORDER BY v.id DESC
DQL;
$query = $this->getEntityManager()->createQuery($dql);
$query->setMaxResults(1);
return $query->getOneOrNullResult();
}
}

View File

@ -65,4 +65,6 @@ interface VisitRepositoryInterface extends ObjectRepository, EntitySpecification
public function findNonOrphanVisits(VisitsListFiltering $filtering): array;
public function countNonOrphanVisits(VisitsCountFiltering $filtering): int;
public function findMostRecentOrphanVisit(): ?Visit;
}