Splits the code in the get_mw_pages function into three separate functions. One for getting list of all pages and all file attachments, second for pages in category specified in configuration file and files related to these pages and the last function to get from MW a list of specified pages with related file attachments. Signed-off-by: Pavel Volek <Pavel.Volek@xxxxxxxxxxxxxxx> Signed-off-by: NGUYEN Kim Thuat <Kim-Thuat.Nguyen@xxxxxxxxxxxxxxx> Signed-off-by: ROUCHER IGLESIAS Javier <roucherj@xxxxxxxxxxxxxxx> Signed-off-by: Matthieu Moy <Matthieu.Moy@xxxxxxx> --- contrib/mw-to-git/git-remote-mediawiki | 120 ++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 53 deletions(-) diff --git a/contrib/mw-to-git/git-remote-mediawiki b/contrib/mw-to-git/git-remote-mediawiki index a51e9ab..866bd6f 100755 --- a/contrib/mw-to-git/git-remote-mediawiki +++ b/contrib/mw-to-git/git-remote-mediawiki @@ -224,80 +224,94 @@ sub get_mw_pages { my $user_defined; if (@tracked_pages) { $user_defined = 1; - # The user provided a list of pages titles, but we - # still need to query the API to get the page IDs. - get_mw_first_pages(\@tracked_pages, \%pages); - - if ($import_media) { - get_mw_pages_for_linked_mediafiles(\@tracked_pages, \%pages); - } + get_mw_tracked_pages(\%pages); } if (@tracked_categories) { $user_defined = 1; - foreach my $category (@tracked_categories) { - if (index($category, ':') < 0) { - # Mediawiki requires the Category - # prefix, but let's not force the user - # to specify it. - $category = "Category:" . $category; - } - my $mw_pages = $mediawiki->list( { - action => 'query', - list => 'categorymembers', - cmtitle => $category, - cmlimit => 'max' } ) - || die $mediawiki->{error}->{code} . ': ' - . $mediawiki->{error}->{details}; - foreach my $page (@{$mw_pages}) { - $pages{$page->{title}} = $page; - } - - if ($import_media) { - my @titles = map $_->{title}, @{$mw_pages}; - get_mw_pages_for_linked_mediafiles(\@titles, \%pages); - } - } + get_mw_tracked_categories(\%pages); } if (!$user_defined) { - # No user-provided list, get the list of pages from - # the API. + get_mw_all_pages(\%pages); + } + return values(%pages); +} + +sub get_mw_all_pages { + my $pages = shift; + # No user-provided list, get the list of pages from the API. + my $mw_pages = $mediawiki->list({ + action => 'query', + list => 'allpages', + aplimit => 'max' + }); + if (!defined($mw_pages)) { + print STDERR "fatal: could not get the list of wiki pages.\n"; + print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; + print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; + exit 1; + } + foreach my $page (@{$mw_pages}) { + $pages->{$page->{title}} = $page; + } + + if ($import_media) { + # Attach list of all pages for media files from the API, + # they are in a different namespace, only one namespace + # can be queried at the same moment my $mw_pages = $mediawiki->list({ action => 'query', list => 'allpages', + apnamespace => get_mw_namespace_id("File"), aplimit => 'max' }); if (!defined($mw_pages)) { - print STDERR "fatal: could not get the list of wiki pages.\n"; + print STDERR "fatal: could not get the list of pages for media files.\n"; print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; exit 1; } foreach my $page (@{$mw_pages}) { - $pages{$page->{title}} = $page; + $pages->{$page->{title}} = $page; + } + } +} + +sub get_mw_tracked_pages { + my $pages = shift; + # The user provided a list of pages titles, but we + # still need to query the API to get the page IDs. + get_mw_first_pages(\@tracked_pages, \%{$pages}); + + if ($import_media) { + get_mw_pages_for_linked_mediafiles(\@tracked_pages, \%{$pages}); + } +} + +sub get_mw_tracked_categories { + my $pages = shift; + foreach my $category (@tracked_categories) { + if (index($category, ':') < 0) { + # Mediawiki requires the Category + # prefix, but let's not force the user + # to specify it. + $category = "Category:" . $category; + } + my $mw_pages = $mediawiki->list( { + action => 'query', + list => 'categorymembers', + cmtitle => $category, + cmlimit => 'max' } ) + || die $mediawiki->{error}->{code} . ': ' + . $mediawiki->{error}->{details}; + foreach my $page (@{$mw_pages}) { + $pages->{$page->{title}} = $page; } if ($import_media) { - # Attach list of all pages for media files from the API, - # they are in a different namespace, only one namespace - # can be queried at the same moment - my $mw_pages = $mediawiki->list({ - action => 'query', - list => 'allpages', - apnamespace => get_mw_namespace_id("File"), - aplimit => 'max' - }); - if (!defined($mw_pages)) { - print STDERR "fatal: could not get the list of pages for media files.\n"; - print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; - print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; - exit 1; - } - foreach my $page (@{$mw_pages}) { - $pages{$page->{title}} = $page; - } + my @titles = map $_->{title}, @{$mw_pages}; + get_mw_pages_for_linked_mediafiles(\@titles, \%{$pages}); } } - return values(%pages); } sub get_mw_pages_for_linked_mediafiles { -- 1.7.10.2.552.gaa3bb87 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html