Changed logic when resolving the title of a URL, to ensure only html content is tried to be downloaded, and only until the title tag has been parsed

This commit is contained in:
Alejandro Celaya
2022-05-01 11:48:20 +02:00
parent eea76999b2
commit 18f656fed2
4 changed files with 72 additions and 6 deletions

View File

@@ -132,11 +132,46 @@ class UrlValidatorTest extends TestCase
$request->shouldHaveBeenCalledOnce();
}
/** @test */
public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsEnabledAndReturnedContentTypeIsInvalid(): void
{
$request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn(
new Response('php://memory', 200, ['Content-Type' => 'application/octet-stream']),
);
$this->options->autoResolveTitles = true;
$result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true);
self::assertNull($result);
$request->shouldHaveBeenCalledOnce();
}
/** @test */
public function validateUrlWithTitleReturnsNullWhenAutoResolutionIsEnabledAndBodyDoesNotContainTitle(): void
{
$request = $this->httpClient->request(RequestMethodInterface::METHOD_GET, Argument::cetera())->willReturn(
new Response($this->createStreamWithContent('<body>No title</body>'), 200, ['Content-Type' => 'text/html']),
);
$this->options->autoResolveTitles = true;
$result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true);
self::assertNull($result);
$request->shouldHaveBeenCalledOnce();
}
private function respWithTitle(): Response
{
$body = new Stream('php://temp', 'wr');
$body->write('<title data-foo="bar"> Resolved title</title>');
$body = $this->createStreamWithContent('<title data-foo="bar"> Resolved title</title>');
return new Response($body, 200, ['Content-Type' => 'TEXT/html; charset=utf-8']);
}
return new Response($body);
private function createStreamWithContent(string $content): Stream
{
$body = new Stream('php://temp', 'wr');
$body->write($content);
$body->rewind();
return $body;
}
}