From bd495adf22a05c37e70efc48e00d55d25afc215d Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 1 May 2022 08:40:20 +0200 Subject: [PATCH 1/3] Set SemVer versions for some shlink package versions --- composer.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index 2446aace..1179e136 100644 --- a/composer.json +++ b/composer.json @@ -50,8 +50,8 @@ "shlinkio/shlink-common": "^4.4", "shlinkio/shlink-config": "^1.6", "shlinkio/shlink-event-dispatcher": "^2.3", - "shlinkio/shlink-importer": "dev-main#af0e05e as 3.0", - "shlinkio/shlink-installer": "dev-develop#fbbc8f5 as 7.1", + "shlinkio/shlink-importer": "^3.0", + "shlinkio/shlink-installer": "^7.1", "shlinkio/shlink-ip-geolocation": "^2.2", "symfony/console": "^6.0", "symfony/filesystem": "^6.0", From eea76999b2e89b21a9d69b2300cdd61af63295af Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 1 May 2022 09:51:15 +0200 Subject: [PATCH 2/3] Ensured URL validation is doe via HEAD method when the title does not need to be resolved --- module/Core/src/Util/UrlValidator.php | 31 ++++++++++++++++------ module/Core/test/Util/UrlValidatorTest.php | 10 ++++--- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/module/Core/src/Util/UrlValidator.php b/module/Core/src/Util/UrlValidator.php index da061c0d..006ca372 100644 --- a/module/Core/src/Util/UrlValidator.php +++ b/module/Core/src/Util/UrlValidator.php @@ -11,6 +11,7 @@ use GuzzleHttp\RequestOptions; use Psr\Http\Message\ResponseInterface; use Shlinkio\Shlink\Core\Exception\InvalidUrlException; use Shlinkio\Shlink\Core\Options\UrlShortenerOptions; +use Throwable; use function preg_match; use function trim; @@ -36,7 +37,7 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return; } - $this->validateUrlAndGetResponse($url, true); + $this->validateUrlAndGetResponse($url); } public function validateUrlWithTitle(string $url, bool $doValidate): ?string @@ -45,8 +46,13 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return null; } - $response = $this->validateUrlAndGetResponse($url, $doValidate); - if ($response === null || ! $this->options->autoResolveTitles()) { + if (! $this->options->autoResolveTitles()) { + $this->validateUrlAndGetResponse($url, self::METHOD_HEAD); + return null; + } + + $response = $doValidate ? $this->validateUrlAndGetResponse($url) : $this->getResponse($url); + if ($response === null) { return null; } @@ -55,20 +61,29 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return isset($matches[1]) ? trim($matches[1]) : null; } - private function validateUrlAndGetResponse(string $url, bool $throwOnError): ?ResponseInterface + /** + * @param self::METHOD_GET|self::METHOD_HEAD $method + * @throws InvalidUrlException + */ + private function validateUrlAndGetResponse(string $url, string $method = self::METHOD_GET): ResponseInterface { try { - return $this->httpClient->request(self::METHOD_GET, $url, [ + return $this->httpClient->request($method, $url, [ RequestOptions::ALLOW_REDIRECTS => ['max' => self::MAX_REDIRECTS], RequestOptions::IDN_CONVERSION => true, // Making the request with a browser's user agent makes the validation closer to a real user RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT], ]); } catch (GuzzleException $e) { - if ($throwOnError) { - throw InvalidUrlException::fromUrl($url, $e); - } + throw InvalidUrlException::fromUrl($url, $e); + } + } + private function getResponse(string $url): ?ResponseInterface + { + try { + return $this->validateUrlAndGetResponse($url); + } catch (Throwable) { return null; } } diff --git a/module/Core/test/Util/UrlValidatorTest.php b/module/Core/test/Util/UrlValidatorTest.php index 55cb4d86..7a943ea0 100644 --- a/module/Core/test/Util/UrlValidatorTest.php +++ b/module/Core/test/Util/UrlValidatorTest.php @@ -107,7 +107,9 @@ class UrlValidatorTest extends TestCase /** @test */ public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsDisabledAndValidationIsEnabled(): void { - $request = $this->httpClient->request(Argument::cetera())->willReturn($this->respWithTitle()); + $request = $this->httpClient->request(RequestMethodInterface::METHOD_HEAD, Argument::cetera())->willReturn( + $this->respWithTitle(), + ); $this->options->autoResolveTitles = false; $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); @@ -119,7 +121,9 @@ class UrlValidatorTest extends TestCase /** @test */ public function validateUrlWithTitleResolvesTitleWhenAutoResolutionIsEnabled(): void { - $request = $this->httpClient->request(Argument::cetera())->willReturn($this->respWithTitle()); + $request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn( + $this->respWithTitle(), + ); $this->options->autoResolveTitles = true; $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); @@ -131,7 +135,7 @@ class UrlValidatorTest extends TestCase private function respWithTitle(): Response { $body = new Stream('php://temp', 'wr'); - $body->write(' Resolved title'); + $body->write(' Resolved title'); return new Response($body); } From 18f656fed236007e0a2d3db1fed76d4cc7275594 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 1 May 2022 11:48:20 +0200 Subject: [PATCH 3/3] Changed logic when resolving the title of a URL, to ensure only html content is tried to be downloaded, and only until the title tag has been parsed --- CHANGELOG.md | 17 +++++++++ composer.json | 2 +- module/Core/src/Util/UrlValidator.php | 18 ++++++++-- module/Core/test/Util/UrlValidatorTest.php | 41 ++++++++++++++++++++-- 4 files changed, 72 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 882bd2ed..fdaa75e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com), and this project adheres to [Semantic Versioning](https://semver.org). +## [Unreleased] +### Added +* *Nothing* + +### Changed +* *Nothing* + +### Deprecated +* *Nothing* + +### Removed +* *Nothing* + +### Fixed +* [#1439](https://github.com/shlinkio/shlink/issues/1439) Fixed crash when trying to auto-resolve titles for URLs which serve large binary files. + + ## [3.1.0] - 2022-04-23 ### Added * [#1294](https://github.com/shlinkio/shlink/issues/1294) Allowed to provide a specific domain when importing URLs from YOURLS. diff --git a/composer.json b/composer.json index 1179e136..d310a103 100644 --- a/composer.json +++ b/composer.json @@ -139,7 +139,7 @@ "test:api": "bin/test/run-api-tests.sh", "test:api:ci": "GENERATE_COVERAGE=yes composer test:api", "infect:ci:base": "infection --threads=4 --log-verbosity=default --only-covered --only-covering-test-cases --skip-initial-tests", - "infect:ci:unit": "@infect:ci:base --coverage=build/coverage-unit --min-msi=85", + "infect:ci:unit": "@infect:ci:base --coverage=build/coverage-unit --min-msi=84", "infect:ci:db": "@infect:ci:base --coverage=build/coverage-db --min-msi=95 --configuration=infection-db.json", "infect:ci:api": "@infect:ci:base --coverage=build/coverage-api --min-msi=80 --configuration=infection-api.json", "infect:ci": "@parallel infect:ci:unit infect:ci:db infect:ci:api", diff --git a/module/Core/src/Util/UrlValidator.php b/module/Core/src/Util/UrlValidator.php index 006ca372..6bd8d76f 100644 --- a/module/Core/src/Util/UrlValidator.php +++ b/module/Core/src/Util/UrlValidator.php @@ -14,6 +14,9 @@ use Shlinkio\Shlink\Core\Options\UrlShortenerOptions; use Throwable; use function preg_match; +use function str_contains; +use function str_starts_with; +use function strtolower; use function trim; use const Shlinkio\Shlink\TITLE_TAG_VALUE; @@ -56,8 +59,18 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return null; } - $body = $response->getBody()->__toString(); - preg_match(TITLE_TAG_VALUE, $body, $matches); + $contentType = strtolower($response->getHeaderLine('Content-Type')); + if (! str_starts_with($contentType, 'text/html')) { + return null; + } + + $collectedBody = ''; + $body = $response->getBody(); + // With streaming enabled, we can walk the body until the tag is found, and then stop + while (! str_contains($collectedBody, '') && ! $body->eof()) { + $collectedBody .= $body->read(1024); + } + preg_match(TITLE_TAG_VALUE, $collectedBody, $matches); return isset($matches[1]) ? trim($matches[1]) : null; } @@ -73,6 +86,7 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface RequestOptions::IDN_CONVERSION => true, // Making the request with a browser's user agent makes the validation closer to a real user RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT], + RequestOptions::STREAM => true, // This ensures large files are not fully downloaded if not needed ]); } catch (GuzzleException $e) { throw InvalidUrlException::fromUrl($url, $e); diff --git a/module/Core/test/Util/UrlValidatorTest.php b/module/Core/test/Util/UrlValidatorTest.php index 7a943ea0..7e1f9a1b 100644 --- a/module/Core/test/Util/UrlValidatorTest.php +++ b/module/Core/test/Util/UrlValidatorTest.php @@ -132,11 +132,46 @@ class UrlValidatorTest extends TestCase $request->shouldHaveBeenCalledOnce(); } + /** @test */ + public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsEnabledAndReturnedContentTypeIsInvalid(): void + { + $request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn( + new Response('php://memory', 200, ['Content-Type' => 'application/octet-stream']), + ); + $this->options->autoResolveTitles = true; + + $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); + + self::assertNull($result); + $request->shouldHaveBeenCalledOnce(); + } + + /** @test */ + public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsEnabledAndBodyDoesNotContainTitle(): void + { + $request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn( + new Response($this->createStreamWithContent('No title'), 200, ['Content-Type' => 'text/html']), + ); + $this->options->autoResolveTitles = true; + + $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); + + self::assertNull($result); + $request->shouldHaveBeenCalledOnce(); + } + private function respWithTitle(): Response { - $body = new Stream('php://temp', 'wr'); - $body->write(' Resolved title'); + $body = $this->createStreamWithContent(' Resolved title'); + return new Response($body, 200, ['Content-Type' => 'TEXT/html; charset=utf-8']); + } - return new Response($body); + private function createStreamWithContent(string $content): Stream + { + $body = new Stream('php://temp', 'wr'); + $body->write($content); + $body->rewind(); + + return $body; } }