Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Data Liberation] Introduce WP_Entity_Reader_Iterator #2122

Draft
wants to merge 2 commits into
base: trunk
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,20 @@ public static function create_for_markdown_directory( $markdown_directory, $opti
return WP_Markdown_Importer::create(
function ( $cursor = null ) use ( $markdown_directory ) {
// @TODO: Handle $cursor
return new WP_Directory_Tree_Entity_Reader(
new WP_Filesystem(),
array(
'root_dir' => $markdown_directory,
'first_post_id' => 1,
'allowed_extensions' => array( 'md' ),
'index_file_patterns' => array( '#^index\.md$#' ),
'markup_converter_factory' => function ( $content ) {
return new WP_Markdown_To_Blocks( $content );
},
)
);
return new WP_Entity_Reader_Iterator(
new WP_Directory_Tree_Entity_Reader(
new WP_Filesystem(),
array(
'root_dir' => $markdown_directory,
'first_post_id' => 1,
'allowed_extensions' => array( 'md' ),
'index_file_patterns' => array( '#^index\.md$#' ),
'markup_converter_factory' => function ( $content ) {
return new WP_Markdown_To_Blocks( $content );
},
)
)
);
},
$options,
$cursor
Expand Down
1 change: 1 addition & 0 deletions packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
require_once __DIR__ . '/src/import/WP_Stream_Importer.php';
require_once __DIR__ . '/src/import/WP_Entity_Iterator_Chain.php';
require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php';
require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader_Iterator.php';
require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php';

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
*
* @TODO: Explore supporting a cursor to allow resuming from where we left off.
*/
class WP_Directory_Tree_Entity_Reader implements \Iterator {
class WP_Directory_Tree_Entity_Reader extends WP_Entity_Reader {
private $file_visitor;
private $filesystem;
private $entity;

private $is_finished = false;
private $pending_directory_index;
private $pending_files = array();
private $parent_ids = array();
private $next_post_id;
private $is_finished = false;
private $entities_read_so_far = 0;
private $allowed_extensions = array();
private $index_file_patterns = array();
Expand Down Expand Up @@ -154,6 +154,14 @@ public function next_entity() {
return false;
}

public function is_finished(): bool {
return $this->is_finished;
}

public function get_last_error(): ?string {
return null;
}

public function get_entity(): ?\WP_Imported_Entity {
return $this->entity;
}
Expand Down Expand Up @@ -308,35 +316,4 @@ protected function is_valid_file( $path ) {
return in_array( $extension, $this->allowed_extensions, true );
}

/**
* @TODO: Either implement this method, or introduce a concept of
* reentrant and non-reentrant entity readers.
*/
public function get_reentrancy_cursor() {
return '';
}

public function current(): mixed {
if ( null === $this->entity && ! $this->is_finished ) {
$this->next();
}
return $this->get_entity();
}

public function next(): void {
$this->next_entity();
}

public function key(): int {
return $this->entities_read_so_far - 1;
}

public function valid(): bool {
return ! $this->is_finished;
}

public function rewind(): void {
// @TODO: Either implement this method, or formalize the fact that
// entity readers are not rewindable.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* The reader implements Iterator so you can easily loop through entities:
* foreach ($reader as $entity) { ... }
*/
abstract class WP_Entity_Reader implements \Iterator {
abstract class WP_Entity_Reader {

/**
* Gets the current entity being processed.
Expand Down Expand Up @@ -57,39 +57,4 @@ public function get_reentrancy_cursor() {
return '';
}

// The iterator interface:

public function current(): object {
if ( null === $this->get_entity() && ! $this->is_finished() && ! $this->get_last_error() ) {
$this->next();
}
return $this->get_entity();
}

private $last_next_result = null;
public function next(): void {
// @TODO: Don't keep track of this. Just make sure the next_entity()
// call will make the is_finished() true.
$this->last_next_result = $this->next_entity();
}

public function key(): string {
return $this->get_reentrancy_cursor();
}

public function valid(): bool {
return false !== $this->last_next_result && ! $this->is_finished() && ! $this->get_last_error();
}

public function rewind(): void {
// Haven't started yet.
if ( null === $this->last_next_result ) {
return;
}
_doing_it_wrong(
__METHOD__,
'WP_WXR_Entity_Reader does not support rewinding.',
null
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<?php

/**
* An iterator that reads entities from a WP_Entity_Reader.
*/
class WP_Entity_Reader_Iterator implements Iterator {

/**
* @var WP_Entity_Reader
*/
private $entity_reader;
private $is_initialized = false;
private $key = 0;

public function __construct( WP_Entity_Reader $entity_reader ) {
$this->entity_reader = $entity_reader;
}

public function get_entity_reader() {
return $this->entity_reader;
}

#[\ReturnTypeWillChange]
public function current() {
$this->ensure_initialized();
return $this->entity_reader->get_entity();
}

#[\ReturnTypeWillChange]
public function next() {
$this->ensure_initialized();
$this->advance_to_next_entity();
}

#[\ReturnTypeWillChange]
public function key() {
$this->ensure_initialized();
return $this->key;
}

#[\ReturnTypeWillChange]
public function valid() {
$this->ensure_initialized();
return ! $this->entity_reader->is_finished();
}

#[\ReturnTypeWillChange]
public function rewind() {
throw new Data_Liberation_Exception( 'WP_Entity_Reader_Iterator does not support rewinding.' );
}

private function ensure_initialized() {
if ( ! $this->is_initialized ) {
$this->is_initialized = true;
$this->advance_to_next_entity();
}
}

private function advance_to_next_entity() {
if ( $this->entity_reader->next_entity() ) {
$this->key++;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -905,39 +905,4 @@ private function after_entity() {
$this->last_opener_attributes = array();
}

public function current(): object {
// Lazily initialize the iterator when it is first accessed.
// The alternative is eager initialization in the constructor.
if ( null === $this->entity_data && ! $this->is_finished() && ! $this->get_last_error() ) {
$this->next();
}
return $this->get_entity();
}

private $last_next_result = null;
public function next(): void {
// @TODO: Don't keep track of this. Just make sure the next_entity()
// call will make the is_finished() true.
$this->last_next_result = $this->next_entity();
}

public function key(): string {
return $this->get_reentrancy_cursor();
}

public function valid(): bool {
return false !== $this->last_next_result && ! $this->is_finished() && ! $this->get_last_error();
}

public function rewind(): void {
// Haven't started yet.
if ( null === $this->last_next_result ) {
return;
}
_doing_it_wrong(
__METHOD__,
'WP_WXR_Reader does not support rewinding.',
null
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,9 @@ class WP_Stream_Importer {
public static function create_for_wxr_file( $wxr_path, $options = array(), $cursor = null ) {
return static::create(
function ( $cursor = null ) use ( $wxr_path ) {
return WP_WXR_Entity_Reader::create( WP_File_Reader::create( $wxr_path ), $cursor );
return new WP_Entity_Reader_Iterator(
WP_WXR_Entity_Reader::create( WP_File_Reader::create( $wxr_path ), $cursor )
);
},
$options,
$cursor
Expand All @@ -142,7 +144,9 @@ function ( $cursor = null ) use ( $wxr_path ) {
public static function create_for_wxr_url( $wxr_url, $options = array(), $cursor = null ) {
return static::create(
function ( $cursor = null ) use ( $wxr_url ) {
return WP_WXR_Entity_Reader::create( new WP_Remote_File_Reader( $wxr_url ), $cursor );
return new WP_Entity_Reader_Iterator(
WP_WXR_Entity_Reader::create( new WP_Remote_File_Reader( $wxr_url ), $cursor )
);
},
$options,
$cursor
Expand Down Expand Up @@ -240,11 +244,11 @@ public function get_reentrancy_cursor() {

protected static function parse_options( $options ) {
if ( ! isset( $options['new_site_url'] ) ) {
$options['new_site_url'] = get_site_url();
// $options['new_site_url'] = get_site_url();
}

if ( ! isset( $options['uploads_path'] ) ) {
$options['uploads_path'] = wp_get_upload_dir()['basedir'];
// $options['uploads_path'] = wp_get_upload_dir()['basedir'];
}
// Remove the trailing slash to make concatenation easier later.
$options['uploads_path'] = rtrim( $options['uploads_path'], '/' );
Expand Down Expand Up @@ -435,7 +439,7 @@ protected function index_next_entities( $count = 10000 ) {

$this->entity_iterator->next();
}
$this->resume_at_entity = $this->entity_iterator->get_reentrancy_cursor();
$this->resume_at_entity = $this->entity_iterator->get_entity_reader()->get_reentrancy_cursor();
return true;
}

Expand Down Expand Up @@ -570,7 +574,7 @@ protected function frontload_next_entity() {
* and enqueue them for download.
*/
$entity = $this->entity_iterator->current();
$cursor = $this->entity_iterator->get_reentrancy_cursor();
$cursor = $this->entity_iterator->get_entity_reader()->get_reentrancy_cursor();
$this->active_downloads[ $cursor ] = array();

$data = $entity->get_data();
Expand Down Expand Up @@ -715,7 +719,7 @@ protected function import_next_entity() {
/**
* @TODO: Update the progress information.
*/
$this->resume_at_entity = $this->entity_iterator->get_reentrancy_cursor();
$this->resume_at_entity = $this->entity_iterator->get_entity_reader()->get_reentrancy_cursor();
$this->entity_iterator->next();
return true;
}
Expand Down Expand Up @@ -744,7 +748,7 @@ protected function enqueue_attachment_download( string $raw_url, $options = arra
return false;
}

$entity_cursor = $this->entity_iterator->get_reentrancy_cursor();
$entity_cursor = $this->entity_iterator->get_entity_reader()->get_reentrancy_cursor();
$this->active_downloads[ $entity_cursor ][ $raw_url ] = true;
return true;
}
Expand Down