mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-04 16:49:35 +00:00
[html] convertLazyLoading: Add parseSrcset()
Add srcset parser closer to the specifications
This commit is contained in:
parent
3055e69c23
commit
4f8eb2fd37
69
lib/html.php
69
lib/html.php
@ -226,6 +226,61 @@ function defaultLinkTo($dom, $url)
|
||||
return $dom;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a srcset HTML attribute value and return size => URL mappings
|
||||
* Srcset contains a list of image URLs with associated size specified as size (e.g. 1024w) or scale (e.g. 2x)
|
||||
* The web browser should pick the most appropriate image depending on screen size and/or pixel density
|
||||
*
|
||||
* This function takes a srcset string such as the following:
|
||||
* header640.png 640w, header960.png 960w, header1024.png 1024w
|
||||
*
|
||||
* Returns an array such as the following:
|
||||
* [
|
||||
* '640w' => 'header640.png',
|
||||
* '960w' => 'header960.png',
|
||||
* '1024w' => 'header1024.png'
|
||||
* ]
|
||||
*
|
||||
* @param string $srcset Content of srcset html attribute
|
||||
* @param bool $return_largest_url Instead of returning an array, return URL for the largest entry
|
||||
* @return array|string Content of srcset attribute as { size => url } array, or largest entry URL if requested
|
||||
*/
|
||||
function parseSrcset($srcset, $return_largest_url = false)
|
||||
{
|
||||
// The srcset format is more tricky to parse that it seems:
|
||||
// URLs may contain commas, and space after comma is not mandatory, so the following is valid:
|
||||
// image.png?resize=640,640 640w,image.png?resize=960,960 960w,image.png?resize=1024,1024 1024w
|
||||
// Since splitting by space or comma will not work, there is a precise algorithm to parse srcset attribute:
|
||||
// https://html.spec.whatwg.org/multipage/images.html#parse-a-srcset-attribute
|
||||
// To summarize, each srcset entry has the following format:
|
||||
// 1. Leading spaces and comma. Zero or more spaces, zero or at most one comma
|
||||
// 2. Any amount of characters up to the next whitespace (space, tab, newline...): This is the URL
|
||||
// 3. A nonnegative number followed by lowercase w, x or h: This is the image size
|
||||
// We parse the srcset entries using a regex to mimick the above parser/tokenizer behavior.
|
||||
preg_match_all('/[\s]*,?[\s]*([^\s]+)\s+([0-9]+[wxh])/', $srcset, $matches);
|
||||
$entries = [];
|
||||
foreach ($matches[1] as $index => $url) {
|
||||
if (array_key_exists($index, $matches[2])) {
|
||||
$size = $matches[2][$index];
|
||||
$entries[$size] = html_entity_decode($url);
|
||||
}
|
||||
}
|
||||
if ($return_largest_url) {
|
||||
$largest_image_url = null;
|
||||
$largest_image_size = -1;
|
||||
foreach ($entries as $size => $url) {
|
||||
$size_int = intval(substr($size, 0, strlen($size) - 1));
|
||||
if ($size_int > $largest_image_size) {
|
||||
$largest_image_size = $size_int;
|
||||
$largest_image_url = $url;
|
||||
}
|
||||
}
|
||||
return $largest_image_url;
|
||||
} else {
|
||||
return $entries;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert lazy-loading images and frames (video embeds) into static elements
|
||||
*
|
||||
@ -244,28 +299,18 @@ function convertLazyLoading($dom)
|
||||
$dom = str_get_html($dom);
|
||||
}
|
||||
|
||||
// Retrieve image URL from srcset attribute
|
||||
// https://developer.mozilla.org/en-US/docs/Web/API/HTMLImageElement/srcset
|
||||
// Example: convert "header640.png 640w, header960.png 960w, header1024.png 1024w" to "header1024.png"
|
||||
$srcset_to_src = function ($srcset) {
|
||||
$sources = explode(',', $srcset);
|
||||
$last_entry = trim($sources[array_key_last($sources)]);
|
||||
$url = explode(' ', $last_entry)[0];
|
||||
return $url;
|
||||
};
|
||||
|
||||
// Process standalone images, embeds and picture sources
|
||||
foreach ($dom->find('img, iframe, source') as $img) {
|
||||
if (!empty($img->getAttribute('data-src'))) {
|
||||
$img->src = $img->getAttribute('data-src');
|
||||
} elseif (!empty($img->getAttribute('data-srcset'))) {
|
||||
$img->src = $srcset_to_src($img->getAttribute('data-srcset'));
|
||||
$img->src = parseSrcset($img->getAttribute('data-srcset'));
|
||||
} elseif (!empty($img->getAttribute('data-lazy-src'))) {
|
||||
$img->src = $img->getAttribute('data-lazy-src');
|
||||
} elseif (!empty($img->getAttribute('data-orig-file'))) {
|
||||
$img->src = $img->getAttribute('data-orig-file');
|
||||
} elseif (!empty($img->getAttribute('srcset'))) {
|
||||
$img->src = $srcset_to_src($img->getAttribute('srcset'));
|
||||
$img->src = parseSrcset($img->getAttribute('srcset'));
|
||||
} else {
|
||||
continue; // Proceed to next element without removing attributes
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user