mirror of
https://github.com/shlinkio/shlink.git
synced 2025-02-25 18:45:27 -06:00
Merge pull request #1091 from acelaya-forks/feature/improved-crawling
Feature/improved crawling
This commit is contained in:
commit
663ae9f6bb
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
@ -12,7 +12,7 @@ jobs:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
php-version: ['7.4']
|
||||
php-version: ['8.0']
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
@ -30,7 +30,7 @@ jobs:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
php-version: ['7.4']
|
||||
php-version: ['8.0']
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
@ -242,7 +242,7 @@ jobs:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
php-version: ['7.4']
|
||||
php-version: ['8.0']
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
|
@ -24,6 +24,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com), and this
|
||||
* `disable_referrer_tracking`: If true, the referrer will not be tracked.
|
||||
* `disable_ua_tracking`: If true, the user agent will not be tracked.
|
||||
|
||||
* [#955](https://github.com/shlinkio/shlink/issues/955) Added new option to set short URLs as crawlable, making them be listed in the robots.txt as Allowed.
|
||||
|
||||
### Changed
|
||||
* [#1036](https://github.com/shlinkio/shlink/issues/1036) Updated to `happyr/doctrine-specification` 2.0.
|
||||
* [#1039](https://github.com/shlinkio/shlink/issues/1039) Updated to `endroid/qr-code` 4.0.
|
||||
|
26
data/migrations/Version20210522051601.php
Normal file
26
data/migrations/Version20210522051601.php
Normal file
@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace ShlinkMigrations;
|
||||
|
||||
use Doctrine\DBAL\Schema\Schema;
|
||||
use Doctrine\DBAL\Types\Types;
|
||||
use Doctrine\Migrations\AbstractMigration;
|
||||
|
||||
final class Version20210522051601 extends AbstractMigration
|
||||
{
|
||||
public function up(Schema $schema): void
|
||||
{
|
||||
$shortUrls = $schema->getTable('short_urls');
|
||||
$this->skipIf($shortUrls->hasColumn('crawlable'));
|
||||
$shortUrls->addColumn('crawlable', Types::BOOLEAN, ['default' => false]);
|
||||
}
|
||||
|
||||
public function down(Schema $schema): void
|
||||
{
|
||||
$shortUrls = $schema->getTable('short_urls');
|
||||
$this->skipIf(! $shortUrls->hasColumn('crawlable'));
|
||||
$shortUrls->dropColumn('crawlable');
|
||||
}
|
||||
}
|
@ -116,6 +116,15 @@
|
||||
"domain": {
|
||||
"type": "string",
|
||||
"description": "The domain in which the short URL was created. Null if it belongs to default domain."
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"nullable": true,
|
||||
"description": "A descriptive title of the short URL."
|
||||
},
|
||||
"crawlable": {
|
||||
"type": "boolean",
|
||||
"description": "Tells if this URL will be included as 'Allow' in Shlink's robots.txt."
|
||||
}
|
||||
},
|
||||
"example": {
|
||||
@ -133,7 +142,9 @@
|
||||
"validUntil": null,
|
||||
"maxVisits": 100
|
||||
},
|
||||
"domain": "example.com"
|
||||
"domain": "example.com",
|
||||
"title": "The title",
|
||||
"crawlable": false
|
||||
}
|
||||
},
|
||||
"ShortUrlMeta": {
|
||||
|
@ -41,6 +41,10 @@
|
||||
"type": "string",
|
||||
"nullable": true,
|
||||
"description": "A descriptive title of the short URL."
|
||||
},
|
||||
"crawlable": {
|
||||
"type": "boolean",
|
||||
"description": "Tells if this URL will be included as 'Allow' in Shlink's robots.txt."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -140,7 +140,8 @@
|
||||
"maxVisits": 100
|
||||
},
|
||||
"domain": null,
|
||||
"title": "Welcome to Steam"
|
||||
"title": "Welcome to Steam",
|
||||
"crawlable": false
|
||||
},
|
||||
{
|
||||
"shortCode": "12Kb3",
|
||||
@ -157,7 +158,8 @@
|
||||
"maxVisits": null
|
||||
},
|
||||
"domain": null,
|
||||
"title": null
|
||||
"title": null,
|
||||
"crawlable": false
|
||||
},
|
||||
{
|
||||
"shortCode": "123bA",
|
||||
@ -172,7 +174,8 @@
|
||||
"maxVisits": null
|
||||
},
|
||||
"domain": "example.com",
|
||||
"title": null
|
||||
"title": null,
|
||||
"crawlable": false
|
||||
}
|
||||
],
|
||||
"pagination": {
|
||||
@ -273,6 +276,10 @@
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "A descriptive title of the short URL."
|
||||
},
|
||||
"crawlable": {
|
||||
"type": "boolean",
|
||||
"description": "Tells if this URL will be included as 'Allow' in Shlink's robots.txt."
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -305,7 +312,9 @@
|
||||
"validUntil": null,
|
||||
"maxVisits": 500
|
||||
},
|
||||
"domain": null
|
||||
"domain": null,
|
||||
"title": null,
|
||||
"crawlable": false
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -74,7 +74,8 @@
|
||||
"maxVisits": 100
|
||||
},
|
||||
"domain": null,
|
||||
"title": null
|
||||
"title": null,
|
||||
"crawlable": false
|
||||
},
|
||||
"text/plain": "https://doma.in/abc123"
|
||||
}
|
||||
|
@ -54,7 +54,8 @@
|
||||
"maxVisits": 100
|
||||
},
|
||||
"domain": null,
|
||||
"title": null
|
||||
"title": null,
|
||||
"crawlable": false
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -147,6 +148,10 @@
|
||||
"type": "string",
|
||||
"description": "A descriptive title of the short URL.",
|
||||
"nullable": true
|
||||
},
|
||||
"crawlable": {
|
||||
"type": "boolean",
|
||||
"description": "Tells if this URL will be included as 'Allow' in Shlink's robots.txt."
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -184,7 +189,8 @@
|
||||
"maxVisits": 100
|
||||
},
|
||||
"domain": null,
|
||||
"title": "Shlink - The URL shortener"
|
||||
"title": "Shlink - The URL shortener",
|
||||
"crawlable": false
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -48,6 +48,7 @@ return [
|
||||
Action\RedirectAction::class => ConfigAbstractFactory::class,
|
||||
Action\PixelAction::class => ConfigAbstractFactory::class,
|
||||
Action\QrCodeAction::class => ConfigAbstractFactory::class,
|
||||
Action\RobotsAction::class => ConfigAbstractFactory::class,
|
||||
|
||||
ShortUrl\Resolver\PersistenceShortUrlRelationResolver::class => ConfigAbstractFactory::class,
|
||||
ShortUrl\Helper\ShortUrlStringifier::class => ConfigAbstractFactory::class,
|
||||
@ -57,6 +58,8 @@ return [
|
||||
Mercure\MercureUpdatesGenerator::class => ConfigAbstractFactory::class,
|
||||
|
||||
Importer\ImportedLinksProcessor::class => ConfigAbstractFactory::class,
|
||||
|
||||
Crawling\CrawlingHelper::class => ConfigAbstractFactory::class,
|
||||
],
|
||||
|
||||
'aliases' => [
|
||||
@ -129,6 +132,7 @@ return [
|
||||
ShortUrl\Helper\ShortUrlStringifier::class,
|
||||
'Logger_Shlink',
|
||||
],
|
||||
Action\RobotsAction::class => [Crawling\CrawlingHelper::class],
|
||||
|
||||
ShortUrl\Resolver\PersistenceShortUrlRelationResolver::class => ['em'],
|
||||
ShortUrl\Helper\ShortUrlStringifier::class => ['config.url_shortener.domain', 'config.router.base_path'],
|
||||
@ -146,6 +150,8 @@ return [
|
||||
Service\ShortUrl\ShortCodeHelper::class,
|
||||
Util\DoctrineBatchHelper::class,
|
||||
],
|
||||
|
||||
Crawling\CrawlingHelper::class => ['em'],
|
||||
],
|
||||
|
||||
];
|
||||
|
@ -95,4 +95,9 @@ return static function (ClassMetadata $metadata, array $emConfig): void {
|
||||
->columnName('title_was_auto_resolved')
|
||||
->option('default', false)
|
||||
->build();
|
||||
|
||||
$builder->createField('crawlable', Types::BOOLEAN)
|
||||
->columnName('crawlable')
|
||||
->option('default', false)
|
||||
->build();
|
||||
};
|
||||
|
@ -9,6 +9,14 @@ use Shlinkio\Shlink\Core\Action;
|
||||
return [
|
||||
|
||||
'routes' => [
|
||||
[
|
||||
'name' => Action\RobotsAction::class,
|
||||
'path' => '/robots.txt',
|
||||
'middleware' => [
|
||||
Action\RobotsAction::class,
|
||||
],
|
||||
'allowed_methods' => [RequestMethod::METHOD_GET],
|
||||
],
|
||||
[
|
||||
'name' => Action\RedirectAction::class,
|
||||
'path' => '/{shortCode}',
|
||||
|
49
module/Core/src/Action/RobotsAction.php
Normal file
49
module/Core/src/Action/RobotsAction.php
Normal file
@ -0,0 +1,49 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Shlinkio\Shlink\Core\Action;
|
||||
|
||||
use Fig\Http\Message\StatusCodeInterface;
|
||||
use GuzzleHttp\Psr7\Response;
|
||||
use Psr\Http\Message\ResponseInterface;
|
||||
use Psr\Http\Message\ServerRequestInterface;
|
||||
use Psr\Http\Server\RequestHandlerInterface;
|
||||
use Shlinkio\Shlink\Core\Crawling\CrawlingHelperInterface;
|
||||
|
||||
use function sprintf;
|
||||
|
||||
use const PHP_EOL;
|
||||
|
||||
class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
|
||||
{
|
||||
private CrawlingHelperInterface $crawlingHelper;
|
||||
|
||||
public function __construct(CrawlingHelperInterface $crawlingHelper)
|
||||
{
|
||||
$this->crawlingHelper = $crawlingHelper;
|
||||
}
|
||||
|
||||
public function handle(ServerRequestInterface $request): ResponseInterface
|
||||
{
|
||||
return new Response(self::STATUS_OK, ['Content-type' => 'text/plain'], $this->buildRobots());
|
||||
}
|
||||
|
||||
private function buildRobots(): iterable
|
||||
{
|
||||
yield <<<ROBOTS
|
||||
# For more information about the robots.txt standard, see:
|
||||
# https://www.robotstxt.org/orig.html
|
||||
|
||||
User-agent: *
|
||||
|
||||
ROBOTS;
|
||||
|
||||
$shortCodes = $this->crawlingHelper->listCrawlableShortCodes();
|
||||
foreach ($shortCodes as $shortCode) {
|
||||
yield sprintf('Allow: /%s%s', $shortCode, PHP_EOL);
|
||||
}
|
||||
|
||||
yield 'Disallow: /';
|
||||
}
|
||||
}
|
26
module/Core/src/Crawling/CrawlingHelper.php
Normal file
26
module/Core/src/Crawling/CrawlingHelper.php
Normal file
@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Shlinkio\Shlink\Core\Crawling;
|
||||
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use Shlinkio\Shlink\Core\Entity\ShortUrl;
|
||||
use Shlinkio\Shlink\Core\Repository\ShortUrlRepositoryInterface;
|
||||
|
||||
class CrawlingHelper implements CrawlingHelperInterface
|
||||
{
|
||||
private EntityManagerInterface $em;
|
||||
|
||||
public function __construct(EntityManagerInterface $em)
|
||||
{
|
||||
$this->em = $em;
|
||||
}
|
||||
|
||||
public function listCrawlableShortCodes(): iterable
|
||||
{
|
||||
/** @var ShortUrlRepositoryInterface $repo */
|
||||
$repo = $this->em->getRepository(ShortUrl::class);
|
||||
yield from $repo->findCrawlableShortCodes();
|
||||
}
|
||||
}
|
13
module/Core/src/Crawling/CrawlingHelperInterface.php
Normal file
13
module/Core/src/Crawling/CrawlingHelperInterface.php
Normal file
@ -0,0 +1,13 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Shlinkio\Shlink\Core\Crawling;
|
||||
|
||||
interface CrawlingHelperInterface
|
||||
{
|
||||
/**
|
||||
* @return string[]|iterable
|
||||
*/
|
||||
public function listCrawlableShortCodes(): iterable;
|
||||
}
|
@ -42,6 +42,7 @@ class ShortUrl extends AbstractEntity
|
||||
private ?ApiKey $authorApiKey = null;
|
||||
private ?string $title = null;
|
||||
private bool $titleWasAutoResolved = false;
|
||||
private bool $crawlable = false;
|
||||
|
||||
private function __construct()
|
||||
{
|
||||
@ -78,6 +79,7 @@ class ShortUrl extends AbstractEntity
|
||||
$instance->authorApiKey = $meta->getApiKey();
|
||||
$instance->title = $meta->getTitle();
|
||||
$instance->titleWasAutoResolved = $meta->titleWasAutoResolved();
|
||||
$instance->crawlable = $meta->isCrawlable();
|
||||
|
||||
return $instance;
|
||||
}
|
||||
@ -200,6 +202,11 @@ class ShortUrl extends AbstractEntity
|
||||
return $this->title;
|
||||
}
|
||||
|
||||
public function crawlable(): bool
|
||||
{
|
||||
return $this->crawlable;
|
||||
}
|
||||
|
||||
public function update(
|
||||
ShortUrlEdit $shortUrlEdit,
|
||||
?ShortUrlRelationResolverInterface $relationResolver = null
|
||||
@ -220,6 +227,9 @@ class ShortUrl extends AbstractEntity
|
||||
$relationResolver = $relationResolver ?? new SimpleShortUrlRelationResolver();
|
||||
$this->tags = $relationResolver->resolveTags($shortUrlEdit->tags());
|
||||
}
|
||||
if ($shortUrlEdit->crawlableWasProvided()) {
|
||||
$this->crawlable = $shortUrlEdit->crawlable();
|
||||
}
|
||||
if (
|
||||
$this->title === null
|
||||
|| $shortUrlEdit->titleWasProvided()
|
||||
|
@ -30,6 +30,8 @@ final class ShortUrlEdit implements TitleResolutionModelInterface
|
||||
private ?string $title = null;
|
||||
private bool $titleWasAutoResolved = false;
|
||||
private ?bool $validateUrl = null;
|
||||
private bool $crawlablePropWasProvided = false;
|
||||
private bool $crawlable = false;
|
||||
|
||||
private function __construct()
|
||||
{
|
||||
@ -61,6 +63,7 @@ final class ShortUrlEdit implements TitleResolutionModelInterface
|
||||
$this->maxVisitsPropWasProvided = array_key_exists(ShortUrlInputFilter::MAX_VISITS, $data);
|
||||
$this->tagsPropWasProvided = array_key_exists(ShortUrlInputFilter::TAGS, $data);
|
||||
$this->titlePropWasProvided = array_key_exists(ShortUrlInputFilter::TITLE, $data);
|
||||
$this->crawlablePropWasProvided = array_key_exists(ShortUrlInputFilter::CRAWLABLE, $data);
|
||||
|
||||
$this->longUrl = $inputFilter->getValue(ShortUrlInputFilter::LONG_URL);
|
||||
$this->validSince = parseDateField($inputFilter->getValue(ShortUrlInputFilter::VALID_SINCE));
|
||||
@ -69,6 +72,7 @@ final class ShortUrlEdit implements TitleResolutionModelInterface
|
||||
$this->validateUrl = getOptionalBoolFromInputFilter($inputFilter, ShortUrlInputFilter::VALIDATE_URL);
|
||||
$this->tags = $inputFilter->getValue(ShortUrlInputFilter::TAGS);
|
||||
$this->title = $inputFilter->getValue(ShortUrlInputFilter::TITLE);
|
||||
$this->crawlable = $inputFilter->getValue(ShortUrlInputFilter::CRAWLABLE);
|
||||
}
|
||||
|
||||
public function longUrl(): ?string
|
||||
@ -162,4 +166,14 @@ final class ShortUrlEdit implements TitleResolutionModelInterface
|
||||
{
|
||||
return $this->validateUrl;
|
||||
}
|
||||
|
||||
public function crawlable(): bool
|
||||
{
|
||||
return $this->crawlable;
|
||||
}
|
||||
|
||||
public function crawlableWasProvided(): bool
|
||||
{
|
||||
return $this->crawlablePropWasProvided;
|
||||
}
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ final class ShortUrlMeta implements TitleResolutionModelInterface
|
||||
private array $tags = [];
|
||||
private ?string $title = null;
|
||||
private bool $titleWasAutoResolved = false;
|
||||
private bool $crawlable = false;
|
||||
|
||||
private function __construct()
|
||||
{
|
||||
@ -80,6 +81,7 @@ final class ShortUrlMeta implements TitleResolutionModelInterface
|
||||
$this->apiKey = $inputFilter->getValue(ShortUrlInputFilter::API_KEY);
|
||||
$this->tags = $inputFilter->getValue(ShortUrlInputFilter::TAGS);
|
||||
$this->title = $inputFilter->getValue(ShortUrlInputFilter::TITLE);
|
||||
$this->crawlable = $inputFilter->getValue(ShortUrlInputFilter::CRAWLABLE);
|
||||
}
|
||||
|
||||
public function getLongUrl(): string
|
||||
@ -188,4 +190,9 @@ final class ShortUrlMeta implements TitleResolutionModelInterface
|
||||
|
||||
return $copy;
|
||||
}
|
||||
|
||||
public function isCrawlable(): bool
|
||||
{
|
||||
return $this->crawlable;
|
||||
}
|
||||
}
|
||||
|
@ -288,4 +288,28 @@ class ShortUrlRepository extends EntitySpecificationRepository implements ShortU
|
||||
$qb->andWhere($qb->expr()->isNull('s.domain'));
|
||||
}
|
||||
}
|
||||
|
||||
public function findCrawlableShortCodes(): iterable
|
||||
{
|
||||
$blockSize = 1000;
|
||||
$qb = $this->getEntityManager()->createQueryBuilder();
|
||||
$qb->select('DISTINCT s.shortCode')
|
||||
->from(ShortUrl::class, 's')
|
||||
->where($qb->expr()->eq('s.crawlable', ':crawlable'))
|
||||
->setParameter('crawlable', true)
|
||||
->setMaxResults($blockSize);
|
||||
|
||||
$page = 0;
|
||||
do {
|
||||
$qbClone = (clone $qb)->setFirstResult($blockSize * $page);
|
||||
$iterator = $qbClone->getQuery()->toIterable();
|
||||
$resultsFound = false;
|
||||
$page++;
|
||||
|
||||
foreach ($iterator as ['shortCode' => $shortCode]) {
|
||||
$resultsFound = true;
|
||||
yield $shortCode;
|
||||
}
|
||||
} while ($resultsFound);
|
||||
}
|
||||
}
|
||||
|
@ -41,4 +41,6 @@ interface ShortUrlRepositoryInterface extends ObjectRepository, EntitySpecificat
|
||||
public function findOneMatching(ShortUrlMeta $meta): ?ShortUrl;
|
||||
|
||||
public function findOneByImportedUrl(ImportedShlinkUrl $url): ?ShortUrl;
|
||||
|
||||
public function findCrawlableShortCodes(): iterable;
|
||||
}
|
||||
|
@ -66,11 +66,11 @@ class VisitRepository extends EntitySpecificationRepository implements VisitRepo
|
||||
|
||||
do {
|
||||
$qb = (clone $originalQueryBuilder)->andWhere($qb->expr()->gt('v.id', $lastId));
|
||||
$iterator = $qb->getQuery()->iterate();
|
||||
$iterator = $qb->getQuery()->toIterable();
|
||||
$resultsFound = false;
|
||||
|
||||
/** @var Visit $visit */
|
||||
foreach ($iterator as $key => [$visit]) {
|
||||
foreach ($iterator as $key => $visit) {
|
||||
$resultsFound = true;
|
||||
yield $key => $visit;
|
||||
}
|
||||
|
@ -35,6 +35,7 @@ class ShortUrlDataTransformer implements DataTransformerInterface
|
||||
'meta' => $this->buildMeta($shortUrl),
|
||||
'domain' => $shortUrl->getDomain(),
|
||||
'title' => $shortUrl->title(),
|
||||
'crawlable' => $shortUrl->crawlable(),
|
||||
];
|
||||
}
|
||||
|
||||
|
@ -32,6 +32,7 @@ class ShortUrlInputFilter extends InputFilter
|
||||
public const API_KEY = 'apiKey';
|
||||
public const TAGS = 'tags';
|
||||
public const TITLE = 'title';
|
||||
public const CRAWLABLE = 'crawlable';
|
||||
|
||||
private function __construct(array $data, bool $requireLongUrl)
|
||||
{
|
||||
@ -105,5 +106,7 @@ class ShortUrlInputFilter extends InputFilter
|
||||
$this->add($this->createTagsInput(self::TAGS, false));
|
||||
|
||||
$this->add($this->createInput(self::TITLE, false));
|
||||
|
||||
$this->add($this->createBooleanInput(self::CRAWLABLE, false));
|
||||
}
|
||||
}
|
||||
|
@ -436,4 +436,37 @@ class ShortUrlRepositoryTest extends DatabaseTestCase
|
||||
self::assertNull($this->repo->findOneByImportedUrl($buildImported('my-cool-slug', 'doma.in')));
|
||||
self::assertNull($this->repo->findOneByImportedUrl($buildImported('another-slug')));
|
||||
}
|
||||
|
||||
/** @test */
|
||||
public function findCrawlableShortCodesReturnsExpectedResult(): void
|
||||
{
|
||||
$createShortUrl = fn (bool $crawlable) => ShortUrl::fromMeta(
|
||||
ShortUrlMeta::fromRawData(['crawlable' => $crawlable, 'longUrl' => 'foo.com']),
|
||||
);
|
||||
|
||||
$shortUrl1 = $createShortUrl(true);
|
||||
$this->getEntityManager()->persist($shortUrl1);
|
||||
$shortUrl2 = $createShortUrl(false);
|
||||
$this->getEntityManager()->persist($shortUrl2);
|
||||
$shortUrl3 = $createShortUrl(true);
|
||||
$this->getEntityManager()->persist($shortUrl3);
|
||||
$shortUrl4 = $createShortUrl(true);
|
||||
$this->getEntityManager()->persist($shortUrl4);
|
||||
$shortUrl5 = $createShortUrl(false);
|
||||
$this->getEntityManager()->persist($shortUrl5);
|
||||
$this->getEntityManager()->flush();
|
||||
|
||||
$iterable = $this->repo->findCrawlableShortCodes();
|
||||
$results = [];
|
||||
foreach ($iterable as $shortCode) {
|
||||
$results[] = $shortCode;
|
||||
}
|
||||
|
||||
self::assertCount(3, $results);
|
||||
self::assertContains($shortUrl1->getShortCode(), $results);
|
||||
self::assertContains($shortUrl3->getShortCode(), $results);
|
||||
self::assertContains($shortUrl4->getShortCode(), $results);
|
||||
self::assertNotContains($shortUrl2->getShortCode(), $results);
|
||||
self::assertNotContains($shortUrl5->getShortCode(), $results);
|
||||
}
|
||||
}
|
||||
|
75
module/Core/test/Action/RobotsActionTest.php
Normal file
75
module/Core/test/Action/RobotsActionTest.php
Normal file
@ -0,0 +1,75 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace ShlinkioTest\Shlink\Core\Action;
|
||||
|
||||
use Laminas\Diactoros\ServerRequestFactory;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Prophecy\PhpUnit\ProphecyTrait;
|
||||
use Prophecy\Prophecy\ObjectProphecy;
|
||||
use Shlinkio\Shlink\Core\Action\RobotsAction;
|
||||
use Shlinkio\Shlink\Core\Crawling\CrawlingHelperInterface;
|
||||
|
||||
class RobotsActionTest extends TestCase
|
||||
{
|
||||
use ProphecyTrait;
|
||||
|
||||
private RobotsAction $action;
|
||||
private ObjectProphecy $helper;
|
||||
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->helper = $this->prophesize(CrawlingHelperInterface::class);
|
||||
$this->action = new RobotsAction($this->helper->reveal());
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @dataProvider provideShortCodes
|
||||
*/
|
||||
public function buildsRobotsLinesFromCrawlableShortCodes(array $shortCodes, string $expected): void
|
||||
{
|
||||
$getShortCodes = $this->helper->listCrawlableShortCodes()->willReturn($shortCodes);
|
||||
|
||||
$response = $this->action->handle(ServerRequestFactory::fromGlobals());
|
||||
|
||||
self::assertEquals(200, $response->getStatusCode());
|
||||
self::assertEquals($expected, $response->getBody()->__toString());
|
||||
self::assertEquals('text/plain', $response->getHeaderLine('Content-Type'));
|
||||
$getShortCodes->shouldHaveBeenCalledOnce();
|
||||
}
|
||||
|
||||
public function provideShortCodes(): iterable
|
||||
{
|
||||
yield 'three short codes' => [['foo', 'bar', 'baz'], <<<ROBOTS
|
||||
# For more information about the robots.txt standard, see:
|
||||
# https://www.robotstxt.org/orig.html
|
||||
|
||||
User-agent: *
|
||||
Allow: /foo
|
||||
Allow: /bar
|
||||
Allow: /baz
|
||||
Disallow: /
|
||||
ROBOTS];
|
||||
yield 'five short codes' => [['foo', 'bar', 'some', 'thing', 'baz'], <<<ROBOTS
|
||||
# For more information about the robots.txt standard, see:
|
||||
# https://www.robotstxt.org/orig.html
|
||||
|
||||
User-agent: *
|
||||
Allow: /foo
|
||||
Allow: /bar
|
||||
Allow: /some
|
||||
Allow: /thing
|
||||
Allow: /baz
|
||||
Disallow: /
|
||||
ROBOTS];
|
||||
yield 'no short codes' => [[], <<<ROBOTS
|
||||
# For more information about the robots.txt standard, see:
|
||||
# https://www.robotstxt.org/orig.html
|
||||
|
||||
User-agent: *
|
||||
Disallow: /
|
||||
ROBOTS];
|
||||
}
|
||||
}
|
43
module/Core/test/Crawling/CrawlingHelperTest.php
Normal file
43
module/Core/test/Crawling/CrawlingHelperTest.php
Normal file
@ -0,0 +1,43 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace ShlinkioTest\Shlink\Core\Crawling;
|
||||
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Prophecy\PhpUnit\ProphecyTrait;
|
||||
use Prophecy\Prophecy\ObjectProphecy;
|
||||
use Shlinkio\Shlink\Core\Crawling\CrawlingHelper;
|
||||
use Shlinkio\Shlink\Core\Entity\ShortUrl;
|
||||
use Shlinkio\Shlink\Core\Repository\ShortUrlRepositoryInterface;
|
||||
|
||||
class CrawlingHelperTest extends TestCase
|
||||
{
|
||||
use ProphecyTrait;
|
||||
|
||||
private CrawlingHelper $helper;
|
||||
private ObjectProphecy $em;
|
||||
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->em = $this->prophesize(EntityManagerInterface::class);
|
||||
$this->helper = new CrawlingHelper($this->em->reveal());
|
||||
}
|
||||
|
||||
/** @test */
|
||||
public function listCrawlableShortCodesDelegatesIntoRepository(): void
|
||||
{
|
||||
$repo = $this->prophesize(ShortUrlRepositoryInterface::class);
|
||||
$findCrawlableShortCodes = $repo->findCrawlableShortCodes()->willReturn([]);
|
||||
$getRepo = $this->em->getRepository(ShortUrl::class)->willReturn($repo->reveal());
|
||||
|
||||
$result = $this->helper->listCrawlableShortCodes();
|
||||
foreach ($result as $shortCode) {
|
||||
// Result is a generator and therefore, it needs to be iterated
|
||||
}
|
||||
|
||||
$findCrawlableShortCodes->shouldHaveBeenCalledOnce();
|
||||
$getRepo->shouldHaveBeenCalledOnce();
|
||||
}
|
||||
}
|
@ -59,6 +59,7 @@ class MercureUpdatesGeneratorTest extends TestCase
|
||||
],
|
||||
'domain' => null,
|
||||
'title' => $title,
|
||||
'crawlable' => false,
|
||||
],
|
||||
'visit' => [
|
||||
'referer' => '',
|
||||
|
@ -26,6 +26,7 @@ class ListShortUrlsTest extends ApiTestCase
|
||||
],
|
||||
'domain' => null,
|
||||
'title' => 'My cool title',
|
||||
'crawlable' => true,
|
||||
];
|
||||
private const SHORT_URL_DOCS = [
|
||||
'shortCode' => 'ghi789',
|
||||
@ -41,6 +42,7 @@ class ListShortUrlsTest extends ApiTestCase
|
||||
],
|
||||
'domain' => null,
|
||||
'title' => null,
|
||||
'crawlable' => false,
|
||||
];
|
||||
private const SHORT_URL_CUSTOM_SLUG_AND_DOMAIN = [
|
||||
'shortCode' => 'custom-with-domain',
|
||||
@ -56,6 +58,7 @@ class ListShortUrlsTest extends ApiTestCase
|
||||
],
|
||||
'domain' => 'some-domain.com',
|
||||
'title' => null,
|
||||
'crawlable' => false,
|
||||
];
|
||||
private const SHORT_URL_META = [
|
||||
'shortCode' => 'def456',
|
||||
@ -73,6 +76,7 @@ class ListShortUrlsTest extends ApiTestCase
|
||||
],
|
||||
'domain' => null,
|
||||
'title' => null,
|
||||
'crawlable' => false,
|
||||
];
|
||||
private const SHORT_URL_CUSTOM_SLUG = [
|
||||
'shortCode' => 'custom',
|
||||
@ -88,6 +92,7 @@ class ListShortUrlsTest extends ApiTestCase
|
||||
],
|
||||
'domain' => null,
|
||||
'title' => null,
|
||||
'crawlable' => false,
|
||||
];
|
||||
private const SHORT_URL_CUSTOM_DOMAIN = [
|
||||
'shortCode' => 'ghi789',
|
||||
@ -105,6 +110,7 @@ class ListShortUrlsTest extends ApiTestCase
|
||||
],
|
||||
'domain' => 'example.com',
|
||||
'title' => null,
|
||||
'crawlable' => false,
|
||||
];
|
||||
|
||||
/**
|
||||
|
@ -35,6 +35,7 @@ class ShortUrlsFixture extends AbstractFixture implements DependentFixtureInterf
|
||||
'longUrl' => 'https://shlink.io',
|
||||
'tags' => ['foo'],
|
||||
'title' => 'My cool title',
|
||||
'crawlable' => true,
|
||||
]), $relationResolver),
|
||||
'2018-05-01',
|
||||
);
|
||||
|
@ -3,4 +3,3 @@ parameters:
|
||||
checkGenericClassInNonGenericObjectType: false
|
||||
ignoreErrors:
|
||||
- '#If condition is always false#'
|
||||
- '#setOrderBy\(\) expects array\<int, string\>, array\<string, string\> given#'
|
||||
|
@ -1,5 +0,0 @@
|
||||
# For more information about the robots.txt standard, see:
|
||||
# http://www.robotstxt.org/orig.html
|
||||
|
||||
User-agent: *
|
||||
Disallow: /
|
Loading…
Reference in New Issue
Block a user