From 80d41db90178f98a22fd0e31c7431eab9a9c0aae Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sat, 2 May 2020 22:47:59 +0200 Subject: [PATCH 1/8] Improved performance on query that returns the list of visits for a short URL --- .../Core/src/Repository/VisitRepository.php | 33 +++++++------------ 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/module/Core/src/Repository/VisitRepository.php b/module/Core/src/Repository/VisitRepository.php index 61b2afb8..e8a530f2 100644 --- a/module/Core/src/Repository/VisitRepository.php +++ b/module/Core/src/Repository/VisitRepository.php @@ -7,6 +7,7 @@ namespace Shlinkio\Shlink\Core\Repository; use Doctrine\ORM\EntityRepository; use Doctrine\ORM\QueryBuilder; use Shlinkio\Shlink\Common\Util\DateRange; +use Shlinkio\Shlink\Core\Entity\ShortUrl; use Shlinkio\Shlink\Core\Entity\Visit; class VisitRepository extends EntityRepository implements VisitRepositoryInterface @@ -82,15 +83,11 @@ class VisitRepository extends EntityRepository implements VisitRepositoryInterfa ?int $offset = null ): array { $qb = $this->createVisitsByShortCodeQueryBuilder($shortCode, $domain, $dateRange); - $qb->select('v') - ->orderBy('v.date', 'DESC'); - - if ($limit !== null) { - $qb->setMaxResults($limit); - } - if ($offset !== null) { - $qb->setFirstResult($offset); - } + $qb->select('v', 'vl') + ->leftJoin('v.visitLocation', 'vl') + ->orderBy('v.id', 'DESC') + ->setMaxResults($limit) + ->setFirstResult($offset); return $qb->getQuery()->getResult(); } @@ -108,20 +105,14 @@ class VisitRepository extends EntityRepository implements VisitRepositoryInterfa ?string $domain, ?DateRange $dateRange ): QueryBuilder { + /** @var ShortUrlRepositoryInterface $shortUrlRepo */ + $shortUrlRepo = $this->getEntityManager()->getRepository(ShortUrl::class); + $shortUrl = $shortUrlRepo->findOne($shortCode, $domain) ?? -1; + $qb = $this->getEntityManager()->createQueryBuilder(); $qb->from(Visit::class, 'v') - ->join('v.shortUrl', 'su') - ->where($qb->expr()->eq('su.shortCode', ':shortCode')) - ->setParameter('shortCode', $shortCode); - - // Apply domain filtering - if ($domain !== null) { - $qb->join('su.domain', 'd') - ->andWhere($qb->expr()->eq('d.authority', ':domain')) - ->setParameter('domain', $domain); - } else { - $qb->andWhere($qb->expr()->isNull('su.domain')); - } + ->where($qb->expr()->eq('v.shortUrl', ':shortUrl')) + ->setParameter('shortUrl', $shortUrl); // Apply date range filtering if ($dateRange !== null && $dateRange->getStartDate() !== null) { From c4ae89a2794ea8d5802a0edfbb642ec989b3d845 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 3 May 2020 10:22:00 +0200 Subject: [PATCH 2/8] Removed DISTINCT when counting visits for a short URL --- module/Core/src/Repository/VisitRepository.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/Core/src/Repository/VisitRepository.php b/module/Core/src/Repository/VisitRepository.php index e8a530f2..e38980c4 100644 --- a/module/Core/src/Repository/VisitRepository.php +++ b/module/Core/src/Repository/VisitRepository.php @@ -95,7 +95,7 @@ class VisitRepository extends EntityRepository implements VisitRepositoryInterfa public function countVisitsByShortCode(string $shortCode, ?string $domain = null, ?DateRange $dateRange = null): int { $qb = $this->createVisitsByShortCodeQueryBuilder($shortCode, $domain, $dateRange); - $qb->select('COUNT(DISTINCT v.id)'); + $qb->select('COUNT(v.id)'); return (int) $qb->getQuery()->getSingleScalarResult(); } From 0e4bccc4bbb0525bb3b3efba7b488f80160d0dd9 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 3 May 2020 10:44:01 +0200 Subject: [PATCH 3/8] Cached result of the count query on VisitsPaginatorAdapter --- .../Paginator/Adapter/VisitsPaginatorAdapter.php | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/module/Core/src/Paginator/Adapter/VisitsPaginatorAdapter.php b/module/Core/src/Paginator/Adapter/VisitsPaginatorAdapter.php index 247ea93e..6a42ecbc 100644 --- a/module/Core/src/Paginator/Adapter/VisitsPaginatorAdapter.php +++ b/module/Core/src/Paginator/Adapter/VisitsPaginatorAdapter.php @@ -15,6 +15,8 @@ class VisitsPaginatorAdapter implements AdapterInterface private ShortUrlIdentifier $identifier; private VisitsParams $params; + private ?int $count = null; + public function __construct( VisitRepositoryInterface $visitRepository, ShortUrlIdentifier $identifier, @@ -38,7 +40,17 @@ class VisitsPaginatorAdapter implements AdapterInterface public function count(): int { - return $this->visitRepository->countVisitsByShortCode( + // Since a new adapter instance is created every time visits are fetched, it is reasonably safe to internally + // cache the count value. + // The reason it is cached is because the Paginator is actually calling the method twice. + // An inconsistent value could be returned if between the first call and the second one, a new visit is created. + // However, it's almost instant, and then the adapter instance is discarded immediately after. + + if ($this->count !== null) { + return $this->count; + } + + return $this->count = $this->visitRepository->countVisitsByShortCode( $this->identifier->shortCode(), $this->identifier->domain(), $this->params->getDateRange(), From 8b0ce8e6f333ef8b1e585ca2cb8a047e7557a0a4 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 3 May 2020 18:18:24 +0200 Subject: [PATCH 4/8] Improved performance when loading visits chuncks at high offsets --- .../Core/src/Repository/VisitRepository.php | 65 ++++++++++++++++--- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/module/Core/src/Repository/VisitRepository.php b/module/Core/src/Repository/VisitRepository.php index e38980c4..4cb7ff42 100644 --- a/module/Core/src/Repository/VisitRepository.php +++ b/module/Core/src/Repository/VisitRepository.php @@ -5,10 +5,16 @@ declare(strict_types=1); namespace Shlinkio\Shlink\Core\Repository; use Doctrine\ORM\EntityRepository; +use Doctrine\ORM\Query\ResultSetMappingBuilder; use Doctrine\ORM\QueryBuilder; use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Entity\ShortUrl; use Shlinkio\Shlink\Core\Entity\Visit; +use Shlinkio\Shlink\Core\Entity\VisitLocation; + +use function preg_replace; + +use const PHP_INT_MAX; class VisitRepository extends EntityRepository implements VisitRepositoryInterface { @@ -82,19 +88,60 @@ class VisitRepository extends EntityRepository implements VisitRepositoryInterfa ?int $limit = null, ?int $offset = null ): array { - $qb = $this->createVisitsByShortCodeQueryBuilder($shortCode, $domain, $dateRange); - $qb->select('v', 'vl') - ->leftJoin('v.visitLocation', 'vl') + /** + * @var QueryBuilder $qb + * @var ShortUrl|int $shortUrl + */ + [$qb, $shortUrl] = $this->createVisitsByShortCodeQueryBuilder($shortCode, $domain, $dateRange); + $qb->select('v.id') ->orderBy('v.id', 'DESC') - ->setMaxResults($limit) - ->setFirstResult($offset); + // Falling back to values that will behave as no limit/offset, but will workaround MS SQL not allowing + // order on sub-queries without offset + ->setMaxResults($limit ?? PHP_INT_MAX) + ->setFirstResult($offset ?? 0); - return $qb->getQuery()->getResult(); + // FIXME Crappy way to resolve the params into the query. Best option would be to inject the sub-query with + // placeholders and then pass params to the main query + $shortUrlId = $shortUrl instanceof ShortUrl ? $shortUrl->getId() : $shortUrl; + $subQuery = $qb->getQuery()->getSQL(); + $subQuery = preg_replace('/\?/', $shortUrlId, $subQuery, 1); + if ($dateRange !== null && $dateRange->getStartDate() !== null) { + $subQuery = preg_replace( + '/\?/', + '\'' . $dateRange->getStartDate()->toDateTimeString() . '\'', + $subQuery, + 1, + ); + } + if ($dateRange !== null && $dateRange->getEndDate() !== null) { + $subQuery = preg_replace('/\?/', '\'' . $dateRange->getEndDate()->toDateTimeString() . '\'', $subQuery, 1); + } + + // A native query builder needs to be used here because DQL and ORM query builders do not accept + // sub-queries at "from" and "join" level. + // If no sub-query is used, then the performance drops dramatically while the "offset" grows. + $nativeQb = $this->getEntityManager()->getConnection()->createQueryBuilder(); + $nativeQb->select('v.*', 'vl.*') + ->from('visits', 'v') + ->join('v', '(' . $subQuery . ')', 'o', $nativeQb->expr()->eq('o.id_0', 'v.id')) + ->leftJoin('v', 'visit_locations', 'vl', $nativeQb->expr()->eq('v.visit_location_id', 'vl.id')) + ->orderBy('v.id', 'DESC'); + + $rsm = new ResultSetMappingBuilder($this->getEntityManager()); + $rsm->addRootEntityFromClassMetadata(Visit::class, 'v'); + $rsm->addJoinedEntityFromClassMetadata(VisitLocation::class, 'vl', 'v', 'visitLocation', [ + 'id' => 'visit_location_id', + ]); + + $query = $this->getEntityManager()->createNativeQuery($nativeQb->getSQL(), $rsm); + + return $query->getResult(); } public function countVisitsByShortCode(string $shortCode, ?string $domain = null, ?DateRange $dateRange = null): int { - $qb = $this->createVisitsByShortCodeQueryBuilder($shortCode, $domain, $dateRange); + /** @var QueryBuilder $qb */ + [$qb] = $this->createVisitsByShortCodeQueryBuilder($shortCode, $domain, $dateRange); $qb->select('COUNT(v.id)'); return (int) $qb->getQuery()->getSingleScalarResult(); @@ -104,7 +151,7 @@ class VisitRepository extends EntityRepository implements VisitRepositoryInterfa string $shortCode, ?string $domain, ?DateRange $dateRange - ): QueryBuilder { + ): array { /** @var ShortUrlRepositoryInterface $shortUrlRepo */ $shortUrlRepo = $this->getEntityManager()->getRepository(ShortUrl::class); $shortUrl = $shortUrlRepo->findOne($shortCode, $domain) ?? -1; @@ -124,6 +171,6 @@ class VisitRepository extends EntityRepository implements VisitRepositoryInterfa ->setParameter('endDate', $dateRange->getEndDate()); } - return $qb; + return [$qb, $shortUrl]; } } From 74ad3553cbf1be4a79373ac6b690cdb633db5002 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 3 May 2020 19:02:13 +0200 Subject: [PATCH 5/8] Hardcoded types on date fields when filtering visits lists --- module/Core/src/Repository/VisitRepository.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/module/Core/src/Repository/VisitRepository.php b/module/Core/src/Repository/VisitRepository.php index 4cb7ff42..3efd263f 100644 --- a/module/Core/src/Repository/VisitRepository.php +++ b/module/Core/src/Repository/VisitRepository.php @@ -7,6 +7,7 @@ namespace Shlinkio\Shlink\Core\Repository; use Doctrine\ORM\EntityRepository; use Doctrine\ORM\Query\ResultSetMappingBuilder; use Doctrine\ORM\QueryBuilder; +use Shlinkio\Shlink\Common\Doctrine\Type\ChronosDateTimeType; use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Entity\ShortUrl; use Shlinkio\Shlink\Core\Entity\Visit; @@ -123,7 +124,7 @@ class VisitRepository extends EntityRepository implements VisitRepositoryInterfa $nativeQb = $this->getEntityManager()->getConnection()->createQueryBuilder(); $nativeQb->select('v.*', 'vl.*') ->from('visits', 'v') - ->join('v', '(' . $subQuery . ')', 'o', $nativeQb->expr()->eq('o.id_0', 'v.id')) + ->join('v', '(' . $subQuery . ')', 'sq', $nativeQb->expr()->eq('sq.id_0', 'v.id')) ->leftJoin('v', 'visit_locations', 'vl', $nativeQb->expr()->eq('v.visit_location_id', 'vl.id')) ->orderBy('v.id', 'DESC'); @@ -164,11 +165,11 @@ class VisitRepository extends EntityRepository implements VisitRepositoryInterfa // Apply date range filtering if ($dateRange !== null && $dateRange->getStartDate() !== null) { $qb->andWhere($qb->expr()->gte('v.date', ':startDate')) - ->setParameter('startDate', $dateRange->getStartDate()); + ->setParameter('startDate', $dateRange->getStartDate(), ChronosDateTimeType::CHRONOS_DATETIME); } if ($dateRange !== null && $dateRange->getEndDate() !== null) { $qb->andWhere($qb->expr()->lte('v.date', ':endDate')) - ->setParameter('endDate', $dateRange->getEndDate()); + ->setParameter('endDate', $dateRange->getEndDate(), ChronosDateTimeType::CHRONOS_DATETIME); } return [$qb, $shortUrl]; From 867659ea25d6e9ed79ea368c3af2090f4204e412 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 3 May 2020 19:15:26 +0200 Subject: [PATCH 6/8] Created index on visits.date column --- data/migrations/Version20200503170404.php | 27 +++++++++++++++++++ .../Shlinkio.Shlink.Core.Entity.Visit.php | 2 ++ 2 files changed, 29 insertions(+) create mode 100644 data/migrations/Version20200503170404.php diff --git a/data/migrations/Version20200503170404.php b/data/migrations/Version20200503170404.php new file mode 100644 index 00000000..a102c2c8 --- /dev/null +++ b/data/migrations/Version20200503170404.php @@ -0,0 +1,27 @@ +getTable('visits'); + $this->skipIf($visits->hasIndex(self::INDEX_NAME)); + $visits->addIndex(['date'], self::INDEX_NAME); + } + + public function down(Schema $schema): void + { + $visits = $schema->getTable('visits'); + $this->skipIf(! $visits->hasIndex(self::INDEX_NAME)); + $visits->dropIndex(self::INDEX_NAME); + } +} diff --git a/module/Core/config/entities-mappings/Shlinkio.Shlink.Core.Entity.Visit.php b/module/Core/config/entities-mappings/Shlinkio.Shlink.Core.Entity.Visit.php index 803b9790..5143389b 100644 --- a/module/Core/config/entities-mappings/Shlinkio.Shlink.Core.Entity.Visit.php +++ b/module/Core/config/entities-mappings/Shlinkio.Shlink.Core.Entity.Visit.php @@ -32,6 +32,8 @@ return static function (ClassMetadata $metadata, array $emConfig): void { ->columnName('`date`') ->build(); + $builder->addIndex(['date'], 'IDX_visits_date'); + $builder->createField('remoteAddr', Types::STRING) ->columnName('remote_addr') ->length(Visitor::REMOTE_ADDRESS_MAX_LENGTH) From d5288f756e1ca578a1abfb8b8dc0a54898692595 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 3 May 2020 19:52:40 +0200 Subject: [PATCH 7/8] Fixed entity mapping for visits without a visit location --- module/Core/src/Repository/VisitRepository.php | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/module/Core/src/Repository/VisitRepository.php b/module/Core/src/Repository/VisitRepository.php index 3efd263f..547493c5 100644 --- a/module/Core/src/Repository/VisitRepository.php +++ b/module/Core/src/Repository/VisitRepository.php @@ -104,8 +104,7 @@ class VisitRepository extends EntityRepository implements VisitRepositoryInterfa // FIXME Crappy way to resolve the params into the query. Best option would be to inject the sub-query with // placeholders and then pass params to the main query $shortUrlId = $shortUrl instanceof ShortUrl ? $shortUrl->getId() : $shortUrl; - $subQuery = $qb->getQuery()->getSQL(); - $subQuery = preg_replace('/\?/', $shortUrlId, $subQuery, 1); + $subQuery = preg_replace('/\?/', $shortUrlId, $qb->getQuery()->getSQL(), 1); if ($dateRange !== null && $dateRange->getStartDate() !== null) { $subQuery = preg_replace( '/\?/', @@ -120,16 +119,16 @@ class VisitRepository extends EntityRepository implements VisitRepositoryInterfa // A native query builder needs to be used here because DQL and ORM query builders do not accept // sub-queries at "from" and "join" level. - // If no sub-query is used, then the performance drops dramatically while the "offset" grows. + // If no sub-query is used, then performance drops dramatically while the "offset" grows. $nativeQb = $this->getEntityManager()->getConnection()->createQueryBuilder(); - $nativeQb->select('v.*', 'vl.*') + $nativeQb->select('v.id AS visit_id', 'v.*', 'vl.*') ->from('visits', 'v') ->join('v', '(' . $subQuery . ')', 'sq', $nativeQb->expr()->eq('sq.id_0', 'v.id')) ->leftJoin('v', 'visit_locations', 'vl', $nativeQb->expr()->eq('v.visit_location_id', 'vl.id')) ->orderBy('v.id', 'DESC'); $rsm = new ResultSetMappingBuilder($this->getEntityManager()); - $rsm->addRootEntityFromClassMetadata(Visit::class, 'v'); + $rsm->addRootEntityFromClassMetadata(Visit::class, 'v', ['id' => 'visit_id']); $rsm->addJoinedEntityFromClassMetadata(VisitLocation::class, 'vl', 'v', 'visitLocation', [ 'id' => 'visit_location_id', ]); From 313b6a59b922f840bf4139d94179d7356cce2398 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 3 May 2020 20:02:50 +0200 Subject: [PATCH 8/8] Updated changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ed6d0eb..f50e81d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com), and this #### Changed -* *Nothing* +* [#692](https://github.com/shlinkio/shlink/issues/692) Drastically improved performance when loading visits. Specially noticeable when loading big result sets. #### Deprecated