Use hash-based directory names in the remote image download cache (#12274)

Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com>
This commit is contained in:
James Addison
2024-04-16 23:52:06 +01:00
committed by GitHub
parent 9078cf21df
commit 4f4d979b41

View File

@@ -25,10 +25,6 @@ logger = logging.getLogger(__name__)
MAX_FILENAME_LEN = 32
CRITICAL_PATH_CHAR_RE = re.compile('[:;<>|*" ]')
# Replace reserved Windows or Unix path characters with '/'.
_URI_TO_PATH = {
ord(k): '/' for k in ('"', '&', '*', '/', ':', '<', '>', '?', '\\', '|')
}
class BaseImageConverter(SphinxTransform):
@@ -68,11 +64,9 @@ class ImageDownloader(BaseImageConverter):
basename = sha1(filename.encode(), usedforsecurity=False).hexdigest() + ext
basename = CRITICAL_PATH_CHAR_RE.sub("_", basename)
dirname = node['uri'].replace('://', '/').translate(_URI_TO_PATH)
if len(dirname) > MAX_FILENAME_LEN:
dirname = sha1(dirname.encode(), usedforsecurity=False).hexdigest()
ensuredir(os.path.join(self.imagedir, dirname))
path = os.path.join(self.imagedir, dirname, basename)
uri_hash = sha1(node['uri'].encode(), usedforsecurity=False).hexdigest()
ensuredir(os.path.join(self.imagedir, uri_hash))
path = os.path.join(self.imagedir, uri_hash, basename)
headers = {}
if os.path.exists(path):
@@ -104,7 +98,7 @@ class ImageDownloader(BaseImageConverter):
if mimetype != '*' and os.path.splitext(basename)[1] == '':
# append a suffix if URI does not contain suffix
ext = get_image_extension(mimetype)
newpath = os.path.join(self.imagedir, dirname, basename + ext)
newpath = os.path.join(self.imagedir, uri_hash, basename + ext)
os.replace(path, newpath)
self.app.env.original_image_uri.pop(path)
self.app.env.original_image_uri[newpath] = node['uri']