Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions src/Modules/Cache/PageCache.php
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,20 @@ class PageCache implements ModuleInterface {
*/
private $url_normalizer = null;

/**
* Maximum number of child sitemap documents to fetch per seed run.
*
* @var int
*/
private $preload_sitemap_child_limit = 20;

/**
* Maximum number of unique sitemap URLs to collect per seed run.
*
* @var int
*/
private $preload_sitemap_url_cap = 500;

/**
* Determine whether this module should be loaded.
*
Expand Down Expand Up @@ -656,9 +670,17 @@ private function fetch_urls_from_sitemap() {
return [];
}

$urls = [];
$urls = [];
$url_cap = max( 1, (int) $this->preload_sitemap_url_cap );
$child_sitemap_limit = max( 1, (int) $this->preload_sitemap_child_limit );
$child_sitemaps_fetched = 0;

if ( isset( $index->sitemap ) ) {
foreach ( $index->sitemap as $sitemap ) {
if ( count( $urls ) >= $url_cap || $child_sitemaps_fetched >= $child_sitemap_limit ) {
break;
}

if ( empty( $sitemap->loc ) ) {
continue;
}
Expand All @@ -668,6 +690,7 @@ private function fetch_urls_from_sitemap() {
continue;
}

++$child_sitemaps_fetched;
$child_response = wp_remote_get( $child_sitemap_url, [ 'timeout' => 8 ] );
if ( is_wp_error( $child_response ) ) {
continue;
Expand All @@ -684,17 +707,21 @@ private function fetch_urls_from_sitemap() {
}

foreach ( $child->url as $item ) {
if ( count( $urls ) >= $url_cap ) {
break;
}

if ( ! empty( $item->loc ) ) {
$item_url = esc_url_raw( (string) $item->loc );
if ( $this->is_site_url( $item_url ) ) {
$urls[] = $item_url;
$urls[ $item_url ] = true;
}
}
}
}
}

return array_slice( array_values( array_unique( array_filter( $urls ) ) ), 0, 500 );
return array_keys( $urls );
}

/**
Expand Down
81 changes: 81 additions & 0 deletions tests/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,22 @@ function admin_url( $path = '' ) {
}
}

if ( ! function_exists( 'home_url' ) ) {
function home_url( $path = '' ) {
$base = isset( $GLOBALS['perform_test_home_url'] ) ? (string) $GLOBALS['perform_test_home_url'] : 'https://example.com';

if ( '' === $path ) {
return $base;
}

if ( 0 === strpos( (string) $path, 'http://' ) || 0 === strpos( (string) $path, 'https://' ) ) {
return (string) $path;
}

return rtrim( $base, '/' ) . '/' . ltrim( (string) $path, '/' );
}
}

if ( ! function_exists( 'esc_url' ) ) {
function esc_url( $url ) {
return (string) $url;
Expand Down Expand Up @@ -223,6 +239,71 @@ function esc_url_raw( $url ) {
}
}

if ( ! class_exists( 'WP_Error' ) ) {
class WP_Error {
/**
* Error code.
*
* @var string
*/
public $code = '';

/**
* Error message.
*
* @var string
*/
public $message = '';

/**
* Constructor.
*
* @param string $code Error code.
* @param string $message Error message.
*/
public function __construct( $code = '', $message = '' ) {
$this->code = (string) $code;
$this->message = (string) $message;
}
}
}

if ( ! function_exists( 'is_wp_error' ) ) {
function is_wp_error( $thing ) {
return $thing instanceof WP_Error;
}
}

if ( ! function_exists( 'wp_remote_get' ) ) {
function wp_remote_get( $url, $args = [] ) {
if ( ! isset( $GLOBALS['perform_test_remote_get_calls'] ) || ! is_array( $GLOBALS['perform_test_remote_get_calls'] ) ) {
$GLOBALS['perform_test_remote_get_calls'] = [];
}

$GLOBALS['perform_test_remote_get_calls'][] = [
'url' => $url,
'args' => $args,
];

$responses = isset( $GLOBALS['perform_test_remote_get_map'] ) && is_array( $GLOBALS['perform_test_remote_get_map'] ) ? $GLOBALS['perform_test_remote_get_map'] : [];
if ( array_key_exists( $url, $responses ) ) {
return $responses[ $url ];
}

return new WP_Error( 'missing_mock', 'No mocked response registered.' );
}
}

if ( ! function_exists( 'wp_remote_retrieve_body' ) ) {
function wp_remote_retrieve_body( $response ) {
if ( is_array( $response ) && isset( $response['body'] ) ) {
return (string) $response['body'];
}

return '';
}
}

if ( ! function_exists( 'sanitize_text_field' ) ) {
function sanitize_text_field( $value ) {
return is_scalar( $value ) ? trim( (string) $value ) : $value;
Expand Down
143 changes: 141 additions & 2 deletions tests/unit-tests/tests-page-cache.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,27 @@

final class Tests_Page_Cache extends TestCase {
protected function setUp(): void {
$GLOBALS['perform_test_transients'] = [
$GLOBALS['perform_test_transients'] = [
'perform_cache_lock_test' => 'expected-token',
];
$GLOBALS['perform_test_options'] = [];
$GLOBALS['perform_test_filters'] = [];
$GLOBALS['perform_test_home_url'] = 'https://example.com';
$GLOBALS['perform_test_remote_get_map'] = [];
$GLOBALS['perform_test_remote_get_calls'] = [];
unset( $_SERVER['HTTP_X_PERFORM_CACHE_REGEN'] );
}

protected function tearDown(): void {
unset( $GLOBALS['perform_test_transients'], $_SERVER['HTTP_X_PERFORM_CACHE_REGEN'] );
unset(
$GLOBALS['perform_test_filters'],
$GLOBALS['perform_test_home_url'],
$GLOBALS['perform_test_options'],
$GLOBALS['perform_test_remote_get_calls'],
$GLOBALS['perform_test_remote_get_map'],
$GLOBALS['perform_test_transients'],
$_SERVER['HTTP_X_PERFORM_CACHE_REGEN']
);
}

public function test_internal_regeneration_requires_matching_lock_token() {
Expand All @@ -31,4 +44,130 @@ public function test_internal_regeneration_requires_matching_lock_token() {
$_SERVER['HTTP_X_PERFORM_CACHE_REGEN'] = 'expected-token';
$this->assertTrue( $is_internal_regen_request->invoke( $page_cache ) );
}

public function test_fetch_urls_from_sitemap_limits_child_sitemap_requests_per_run() {
$page_cache = new PageCache();

$this->set_private_property( $page_cache, 'preload_sitemap_child_limit', 3 );
$this->set_private_property( $page_cache, 'preload_sitemap_url_cap', 20 );

$GLOBALS['perform_test_remote_get_map'] = [
'https://example.com/wp-sitemap.xml' => [ 'body' => $this->build_sitemap_index_xml( 5 ) ],
'https://example.com/sitemap-1.xml' => [ 'body' => $this->build_urlset_xml( '/page-1', '/page-2' ) ],
'https://example.com/sitemap-2.xml' => [ 'body' => $this->build_urlset_xml( '/page-3', '/page-4' ) ],
'https://example.com/sitemap-3.xml' => [ 'body' => $this->build_urlset_xml( '/page-5', '/page-6' ) ],
'https://example.com/sitemap-4.xml' => [ 'body' => $this->build_urlset_xml( '/page-7', '/page-8' ) ],
'https://example.com/sitemap-5.xml' => [ 'body' => $this->build_urlset_xml( '/page-9', '/page-10' ) ],
];

$method = new ReflectionMethod( $page_cache, 'fetch_urls_from_sitemap' );
$method->setAccessible( true );

$this->assertSame(
[
'https://example.com/page-1',
'https://example.com/page-2',
'https://example.com/page-3',
'https://example.com/page-4',
'https://example.com/page-5',
'https://example.com/page-6',
],
$method->invoke( $page_cache )
);

$this->assertSame(
[
'https://example.com/wp-sitemap.xml',
'https://example.com/sitemap-1.xml',
'https://example.com/sitemap-2.xml',
'https://example.com/sitemap-3.xml',
],
array_column( $GLOBALS['perform_test_remote_get_calls'], 'url' )
);
}

public function test_fetch_urls_from_sitemap_stops_after_reaching_url_cap() {
$page_cache = new PageCache();

$this->set_private_property( $page_cache, 'preload_sitemap_child_limit', 5 );
$this->set_private_property( $page_cache, 'preload_sitemap_url_cap', 3 );

$GLOBALS['perform_test_remote_get_map'] = [
'https://example.com/wp-sitemap.xml' => [ 'body' => $this->build_sitemap_index_xml( 2 ) ],
'https://example.com/sitemap-1.xml' => [ 'body' => $this->build_urlset_xml( '/page-1', '/page-2', '/page-2', '/page-3', '/page-4' ) ],
'https://example.com/sitemap-2.xml' => [ 'body' => $this->build_urlset_xml( '/page-5' ) ],
];

$method = new ReflectionMethod( $page_cache, 'fetch_urls_from_sitemap' );
$method->setAccessible( true );

$this->assertSame(
[
'https://example.com/page-1',
'https://example.com/page-2',
'https://example.com/page-3',
],
$method->invoke( $page_cache )
);

$this->assertSame(
[
'https://example.com/wp-sitemap.xml',
'https://example.com/sitemap-1.xml',
],
array_column( $GLOBALS['perform_test_remote_get_calls'], 'url' )
);
}

public function test_seed_preload_queue_keeps_existing_queue_when_sitemap_fetch_fails() {
$GLOBALS['perform_test_options'] = [
'perform_settings' => [
'enable_cache_preload' => true,
],
'perform_cache_preload_queue' => [
'https://example.com/existing-page',
],
];
$GLOBALS['perform_test_remote_get_map'] = [
'https://example.com/wp-sitemap.xml' => new WP_Error( 'http_request_failed', 'Timeout' ),
];

$page_cache = new PageCache();
$page_cache->seed_preload_queue_from_sitemap_and_logs();

$this->assertSame(
[ 'https://example.com/existing-page' ],
$GLOBALS['perform_test_options']['perform_cache_preload_queue']
);
$this->assertSame(
[ 'https://example.com/wp-sitemap.xml' ],
array_column( $GLOBALS['perform_test_remote_get_calls'], 'url' )
);
}

private function set_private_property( PageCache $page_cache, string $property_name, int $value ): void {
$property = new ReflectionProperty( $page_cache, $property_name );
$property->setAccessible( true );
$property->setValue( $page_cache, $value );
}

private function build_sitemap_index_xml( int $child_count ): string {
$items = [];

for ( $index = 1; $index <= $child_count; $index++ ) {
$items[] = sprintf( '<sitemap><loc>https://example.com/sitemap-%d.xml</loc></sitemap>', $index );
}

return '<?xml version="1.0" encoding="UTF-8"?><sitemapindex>' . implode( '', $items ) . '</sitemapindex>';
}

private function build_urlset_xml( string ...$paths ): string {
$items = [];

foreach ( $paths as $path ) {
$items[] = '<url><loc>https://example.com' . $path . '</loc></url>';
}

return '<?xml version="1.0" encoding="UTF-8"?><urlset>' . implode( '', $items ) . '</urlset>';
}
}
Loading