From 7280b48cdcc19a2558200d20929ab0126d851247 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sat, 22 May 2021 07:15:34 +0200 Subject: [PATCH] Created action to dynamically build the robots.txt --- module/Core/config/dependencies.config.php | 4 + module/Core/config/routes.config.php | 8 ++ module/Core/src/Action/RobotsAction.php | 49 ++++++++++++ module/Core/src/Crawling/CrawlingHelper.php | 13 ++++ .../src/Crawling/CrawlingHelperInterface.php | 13 ++++ module/Core/test/Action/RobotsActionTest.php | 75 +++++++++++++++++++ public/robots.txt | 5 -- 7 files changed, 162 insertions(+), 5 deletions(-) create mode 100644 module/Core/src/Action/RobotsAction.php create mode 100644 module/Core/src/Crawling/CrawlingHelper.php create mode 100644 module/Core/src/Crawling/CrawlingHelperInterface.php create mode 100644 module/Core/test/Action/RobotsActionTest.php delete mode 100644 public/robots.txt diff --git a/module/Core/config/dependencies.config.php b/module/Core/config/dependencies.config.php index b84c74a4..4eb3d60d 100644 --- a/module/Core/config/dependencies.config.php +++ b/module/Core/config/dependencies.config.php @@ -48,6 +48,7 @@ return [ Action\RedirectAction::class => ConfigAbstractFactory::class, Action\PixelAction::class => ConfigAbstractFactory::class, Action\QrCodeAction::class => ConfigAbstractFactory::class, + Action\RobotsAction::class => ConfigAbstractFactory::class, ShortUrl\Resolver\PersistenceShortUrlRelationResolver::class => ConfigAbstractFactory::class, ShortUrl\Helper\ShortUrlStringifier::class => ConfigAbstractFactory::class, @@ -57,6 +58,8 @@ return [ Mercure\MercureUpdatesGenerator::class => ConfigAbstractFactory::class, Importer\ImportedLinksProcessor::class => ConfigAbstractFactory::class, + + Crawling\CrawlingHelper::class => InvokableFactory::class, ], 'aliases' => [ @@ -129,6 +132,7 @@ return [ ShortUrl\Helper\ShortUrlStringifier::class, 'Logger_Shlink', ], + Action\RobotsAction::class => [Crawling\CrawlingHelper::class], ShortUrl\Resolver\PersistenceShortUrlRelationResolver::class => ['em'], ShortUrl\Helper\ShortUrlStringifier::class => ['config.url_shortener.domain', 'config.router.base_path'], diff --git a/module/Core/config/routes.config.php b/module/Core/config/routes.config.php index a95e8e96..c3f4b66a 100644 --- a/module/Core/config/routes.config.php +++ b/module/Core/config/routes.config.php @@ -9,6 +9,14 @@ use Shlinkio\Shlink\Core\Action; return [ 'routes' => [ + [ + 'name' => Action\RobotsAction::class, + 'path' => '/robots.txt', + 'middleware' => [ + Action\RobotsAction::class, + ], + 'allowed_methods' => [RequestMethod::METHOD_GET], + ], [ 'name' => Action\RedirectAction::class, 'path' => '/{shortCode}', diff --git a/module/Core/src/Action/RobotsAction.php b/module/Core/src/Action/RobotsAction.php new file mode 100644 index 00000000..31539b92 --- /dev/null +++ b/module/Core/src/Action/RobotsAction.php @@ -0,0 +1,49 @@ +crawlingHelper = $crawlingHelper; + } + + public function handle(ServerRequestInterface $request): ResponseInterface + { + return new Response(self::STATUS_OK, ['Content-type' => 'text/plain'], $this->buildRobots()); + } + + private function buildRobots(): iterable + { + yield <<crawlingHelper->listCrawlableShortCodes(); + foreach ($shortCodes as $shortCode) { + yield sprintf('Allow: /%s%s', $shortCode, PHP_EOL); + } + + yield 'Disallow: /'; + } +} diff --git a/module/Core/src/Crawling/CrawlingHelper.php b/module/Core/src/Crawling/CrawlingHelper.php new file mode 100644 index 00000000..ef54761b --- /dev/null +++ b/module/Core/src/Crawling/CrawlingHelper.php @@ -0,0 +1,13 @@ +helper = $this->prophesize(CrawlingHelperInterface::class); + $this->action = new RobotsAction($this->helper->reveal()); + } + + /** + * @test + * @dataProvider provideShortCodes + */ + public function buildsRobotsLinesFromCrawlableShortCodes(array $shortCodes, string $expected): void + { + $getShortCodes = $this->helper->listCrawlableShortCodes()->willReturn($shortCodes); + + $response = $this->action->handle(ServerRequestFactory::fromGlobals()); + + self::assertEquals(200, $response->getStatusCode()); + self::assertEquals($expected, $response->getBody()->__toString()); + self::assertEquals('text/plain', $response->getHeaderLine('Content-Type')); + $getShortCodes->shouldHaveBeenCalledOnce(); + } + + public function provideShortCodes(): iterable + { + yield 'three short codes' => [['foo', 'bar', 'baz'], << [['foo', 'bar', 'some', 'thing', 'baz'], << [[], <<