diff --git a/src/Modules/Cache/PageCache.php b/src/Modules/Cache/PageCache.php
index 168de96..be542da 100644
--- a/src/Modules/Cache/PageCache.php
+++ b/src/Modules/Cache/PageCache.php
@@ -74,6 +74,20 @@ class PageCache implements ModuleInterface {
*/
private $url_normalizer = null;
+ /**
+ * Maximum number of child sitemap documents to fetch per seed run.
+ *
+ * @var int
+ */
+ private $preload_sitemap_child_limit = 20;
+
+ /**
+ * Maximum number of unique sitemap URLs to collect per seed run.
+ *
+ * @var int
+ */
+ private $preload_sitemap_url_cap = 500;
+
/**
* Determine whether this module should be loaded.
*
@@ -656,9 +670,17 @@ private function fetch_urls_from_sitemap() {
return [];
}
- $urls = [];
+ $urls = [];
+ $url_cap = max( 1, (int) $this->preload_sitemap_url_cap );
+ $child_sitemap_limit = max( 1, (int) $this->preload_sitemap_child_limit );
+ $child_sitemaps_fetched = 0;
+
if ( isset( $index->sitemap ) ) {
foreach ( $index->sitemap as $sitemap ) {
+ if ( count( $urls ) >= $url_cap || $child_sitemaps_fetched >= $child_sitemap_limit ) {
+ break;
+ }
+
if ( empty( $sitemap->loc ) ) {
continue;
}
@@ -668,6 +690,7 @@ private function fetch_urls_from_sitemap() {
continue;
}
+ ++$child_sitemaps_fetched;
$child_response = wp_remote_get( $child_sitemap_url, [ 'timeout' => 8 ] );
if ( is_wp_error( $child_response ) ) {
continue;
@@ -684,17 +707,21 @@ private function fetch_urls_from_sitemap() {
}
foreach ( $child->url as $item ) {
+ if ( count( $urls ) >= $url_cap ) {
+ break;
+ }
+
if ( ! empty( $item->loc ) ) {
$item_url = esc_url_raw( (string) $item->loc );
if ( $this->is_site_url( $item_url ) ) {
- $urls[] = $item_url;
+ $urls[ $item_url ] = true;
}
}
}
}
}
- return array_slice( array_values( array_unique( array_filter( $urls ) ) ), 0, 500 );
+ return array_keys( $urls );
}
/**
diff --git a/tests/bootstrap.php b/tests/bootstrap.php
index a5bc27e..87f38eb 100644
--- a/tests/bootstrap.php
+++ b/tests/bootstrap.php
@@ -143,6 +143,22 @@ function admin_url( $path = '' ) {
}
}
+if ( ! function_exists( 'home_url' ) ) {
+ function home_url( $path = '' ) {
+ $base = isset( $GLOBALS['perform_test_home_url'] ) ? (string) $GLOBALS['perform_test_home_url'] : 'https://example.com';
+
+ if ( '' === $path ) {
+ return $base;
+ }
+
+ if ( 0 === strpos( (string) $path, 'http://' ) || 0 === strpos( (string) $path, 'https://' ) ) {
+ return (string) $path;
+ }
+
+ return rtrim( $base, '/' ) . '/' . ltrim( (string) $path, '/' );
+ }
+}
+
if ( ! function_exists( 'esc_url' ) ) {
function esc_url( $url ) {
return (string) $url;
@@ -223,6 +239,71 @@ function esc_url_raw( $url ) {
}
}
+if ( ! class_exists( 'WP_Error' ) ) {
+ class WP_Error {
+ /**
+ * Error code.
+ *
+ * @var string
+ */
+ public $code = '';
+
+ /**
+ * Error message.
+ *
+ * @var string
+ */
+ public $message = '';
+
+ /**
+ * Constructor.
+ *
+ * @param string $code Error code.
+ * @param string $message Error message.
+ */
+ public function __construct( $code = '', $message = '' ) {
+ $this->code = (string) $code;
+ $this->message = (string) $message;
+ }
+ }
+}
+
+if ( ! function_exists( 'is_wp_error' ) ) {
+ function is_wp_error( $thing ) {
+ return $thing instanceof WP_Error;
+ }
+}
+
+if ( ! function_exists( 'wp_remote_get' ) ) {
+ function wp_remote_get( $url, $args = [] ) {
+ if ( ! isset( $GLOBALS['perform_test_remote_get_calls'] ) || ! is_array( $GLOBALS['perform_test_remote_get_calls'] ) ) {
+ $GLOBALS['perform_test_remote_get_calls'] = [];
+ }
+
+ $GLOBALS['perform_test_remote_get_calls'][] = [
+ 'url' => $url,
+ 'args' => $args,
+ ];
+
+ $responses = isset( $GLOBALS['perform_test_remote_get_map'] ) && is_array( $GLOBALS['perform_test_remote_get_map'] ) ? $GLOBALS['perform_test_remote_get_map'] : [];
+ if ( array_key_exists( $url, $responses ) ) {
+ return $responses[ $url ];
+ }
+
+ return new WP_Error( 'missing_mock', 'No mocked response registered.' );
+ }
+}
+
+if ( ! function_exists( 'wp_remote_retrieve_body' ) ) {
+ function wp_remote_retrieve_body( $response ) {
+ if ( is_array( $response ) && isset( $response['body'] ) ) {
+ return (string) $response['body'];
+ }
+
+ return '';
+ }
+}
+
if ( ! function_exists( 'sanitize_text_field' ) ) {
function sanitize_text_field( $value ) {
return is_scalar( $value ) ? trim( (string) $value ) : $value;
diff --git a/tests/unit-tests/tests-page-cache.php b/tests/unit-tests/tests-page-cache.php
index f7b136f..82f1ca4 100644
--- a/tests/unit-tests/tests-page-cache.php
+++ b/tests/unit-tests/tests-page-cache.php
@@ -5,14 +5,27 @@
final class Tests_Page_Cache extends TestCase {
protected function setUp(): void {
- $GLOBALS['perform_test_transients'] = [
+ $GLOBALS['perform_test_transients'] = [
'perform_cache_lock_test' => 'expected-token',
];
+ $GLOBALS['perform_test_options'] = [];
+ $GLOBALS['perform_test_filters'] = [];
+ $GLOBALS['perform_test_home_url'] = 'https://example.com';
+ $GLOBALS['perform_test_remote_get_map'] = [];
+ $GLOBALS['perform_test_remote_get_calls'] = [];
unset( $_SERVER['HTTP_X_PERFORM_CACHE_REGEN'] );
}
protected function tearDown(): void {
- unset( $GLOBALS['perform_test_transients'], $_SERVER['HTTP_X_PERFORM_CACHE_REGEN'] );
+ unset(
+ $GLOBALS['perform_test_filters'],
+ $GLOBALS['perform_test_home_url'],
+ $GLOBALS['perform_test_options'],
+ $GLOBALS['perform_test_remote_get_calls'],
+ $GLOBALS['perform_test_remote_get_map'],
+ $GLOBALS['perform_test_transients'],
+ $_SERVER['HTTP_X_PERFORM_CACHE_REGEN']
+ );
}
public function test_internal_regeneration_requires_matching_lock_token() {
@@ -31,4 +44,130 @@ public function test_internal_regeneration_requires_matching_lock_token() {
$_SERVER['HTTP_X_PERFORM_CACHE_REGEN'] = 'expected-token';
$this->assertTrue( $is_internal_regen_request->invoke( $page_cache ) );
}
+
+ public function test_fetch_urls_from_sitemap_limits_child_sitemap_requests_per_run() {
+ $page_cache = new PageCache();
+
+ $this->set_private_property( $page_cache, 'preload_sitemap_child_limit', 3 );
+ $this->set_private_property( $page_cache, 'preload_sitemap_url_cap', 20 );
+
+ $GLOBALS['perform_test_remote_get_map'] = [
+ 'https://example.com/wp-sitemap.xml' => [ 'body' => $this->build_sitemap_index_xml( 5 ) ],
+ 'https://example.com/sitemap-1.xml' => [ 'body' => $this->build_urlset_xml( '/page-1', '/page-2' ) ],
+ 'https://example.com/sitemap-2.xml' => [ 'body' => $this->build_urlset_xml( '/page-3', '/page-4' ) ],
+ 'https://example.com/sitemap-3.xml' => [ 'body' => $this->build_urlset_xml( '/page-5', '/page-6' ) ],
+ 'https://example.com/sitemap-4.xml' => [ 'body' => $this->build_urlset_xml( '/page-7', '/page-8' ) ],
+ 'https://example.com/sitemap-5.xml' => [ 'body' => $this->build_urlset_xml( '/page-9', '/page-10' ) ],
+ ];
+
+ $method = new ReflectionMethod( $page_cache, 'fetch_urls_from_sitemap' );
+ $method->setAccessible( true );
+
+ $this->assertSame(
+ [
+ 'https://example.com/page-1',
+ 'https://example.com/page-2',
+ 'https://example.com/page-3',
+ 'https://example.com/page-4',
+ 'https://example.com/page-5',
+ 'https://example.com/page-6',
+ ],
+ $method->invoke( $page_cache )
+ );
+
+ $this->assertSame(
+ [
+ 'https://example.com/wp-sitemap.xml',
+ 'https://example.com/sitemap-1.xml',
+ 'https://example.com/sitemap-2.xml',
+ 'https://example.com/sitemap-3.xml',
+ ],
+ array_column( $GLOBALS['perform_test_remote_get_calls'], 'url' )
+ );
+ }
+
+ public function test_fetch_urls_from_sitemap_stops_after_reaching_url_cap() {
+ $page_cache = new PageCache();
+
+ $this->set_private_property( $page_cache, 'preload_sitemap_child_limit', 5 );
+ $this->set_private_property( $page_cache, 'preload_sitemap_url_cap', 3 );
+
+ $GLOBALS['perform_test_remote_get_map'] = [
+ 'https://example.com/wp-sitemap.xml' => [ 'body' => $this->build_sitemap_index_xml( 2 ) ],
+ 'https://example.com/sitemap-1.xml' => [ 'body' => $this->build_urlset_xml( '/page-1', '/page-2', '/page-2', '/page-3', '/page-4' ) ],
+ 'https://example.com/sitemap-2.xml' => [ 'body' => $this->build_urlset_xml( '/page-5' ) ],
+ ];
+
+ $method = new ReflectionMethod( $page_cache, 'fetch_urls_from_sitemap' );
+ $method->setAccessible( true );
+
+ $this->assertSame(
+ [
+ 'https://example.com/page-1',
+ 'https://example.com/page-2',
+ 'https://example.com/page-3',
+ ],
+ $method->invoke( $page_cache )
+ );
+
+ $this->assertSame(
+ [
+ 'https://example.com/wp-sitemap.xml',
+ 'https://example.com/sitemap-1.xml',
+ ],
+ array_column( $GLOBALS['perform_test_remote_get_calls'], 'url' )
+ );
+ }
+
+ public function test_seed_preload_queue_keeps_existing_queue_when_sitemap_fetch_fails() {
+ $GLOBALS['perform_test_options'] = [
+ 'perform_settings' => [
+ 'enable_cache_preload' => true,
+ ],
+ 'perform_cache_preload_queue' => [
+ 'https://example.com/existing-page',
+ ],
+ ];
+ $GLOBALS['perform_test_remote_get_map'] = [
+ 'https://example.com/wp-sitemap.xml' => new WP_Error( 'http_request_failed', 'Timeout' ),
+ ];
+
+ $page_cache = new PageCache();
+ $page_cache->seed_preload_queue_from_sitemap_and_logs();
+
+ $this->assertSame(
+ [ 'https://example.com/existing-page' ],
+ $GLOBALS['perform_test_options']['perform_cache_preload_queue']
+ );
+ $this->assertSame(
+ [ 'https://example.com/wp-sitemap.xml' ],
+ array_column( $GLOBALS['perform_test_remote_get_calls'], 'url' )
+ );
+ }
+
+ private function set_private_property( PageCache $page_cache, string $property_name, int $value ): void {
+ $property = new ReflectionProperty( $page_cache, $property_name );
+ $property->setAccessible( true );
+ $property->setValue( $page_cache, $value );
+ }
+
+ private function build_sitemap_index_xml( int $child_count ): string {
+ $items = [];
+
+ for ( $index = 1; $index <= $child_count; $index++ ) {
+ $items[] = sprintf( 'https://example.com/sitemap-%d.xml', $index );
+ }
+
+ return '' . implode( '', $items ) . '';
+ }
+
+ private function build_urlset_xml( string ...$paths ): string {
+ $items = [];
+
+ foreach ( $paths as $path ) {
+ $items[] = 'https://example.com' . $path . '';
+ }
+
+ return '' . implode( '', $items ) . '';
+ }
}