Utilisateur:Phe/page 306

#!/usr/bin/python
# -*- coding: utf-8 -*-

#import botpywi
import catlib
import pagegenerators
import wikipedia
import re
import sys
import time

def add_to_log(msg, opt):
    p = wikipedia.Page(opt.site, u"User:Phe/Log move page")
    text = p.get()
    text += msg
    try:
        p.put(text, u'Journal des erreurs de déplacement')
        return True
    except wikipedia.EditConflict:
        return False

log_msg = u''
def flush_log(opt):
    global log_msg
    if not log_msg:
        return
    if not add_to_log(u'\n' + log_msg, opt):
        time.sleep(10)
        if not add_to_log(u'\n' + log_msg, opt):
            print u'ERROR: Unable to log last error message'
    log_msg = u''

log_msg = u''
def log_error(msg, page):
    err_msg = u'Error, page: [[%s]], %s' % (page.title(), msg)
    print err_msg
    global log_msg
    log_msg += u'*' + err_msg + u'\n'

def regex_from_title(old_title):
    title = u''
    for ch in old_title:
        if ch in u'()*.?+.':
            title += u'\\'
        title += ch
    title = title.replace(u' ', u'[ _]+')
    return title

def move_one_page(page, new_title, opt):
    if not page.exists():
        return
    print 'move page from;', page.title(), 'to:', new_title
    if not opt.dry_run:
        # Passing leaveRedirect = false to move() is more efficient but is
        # not enough stable actually (2009/10) and anyway it breaks things
        # until we have corrected redirect in a later step. So we defer
        # deletion of created redirect later.
        # Leaving the redirect in place make also the process  more robust,
        # if the script stop on an exception it'll remain page to correct and
        # redirect to delete but nothing will be broken on the wiki (no red
        # link but link through redirect will exist)
        while True:
            # Should not be required but safer, as the throttle from
            # pywikipedia is too small for a move operation.
            time.sleep(10)
            if page.move(new_title, u'Conventions sur les titres'):
                break
            print "move fail, sleeping 30 seconds"
            page = wikipedia.Page(page.site(), page.title())
            time.sleep(30)

# in pages to modify it can exists pages that has been moved.
# We fixup pagenames to get the real pagename as we can't use
# p.get(follow_redirect = False) because some of the linked page can be
# redirect existing before the move. Leaving a redirect in place doesn't
# fix the problem as we will edit the redirect instead of the target.
def fixup_pagename(pages, titles):
    results = set()
    for p in pages:
        found = False
        for t in titles:
            if p.title() == t[0]:
                found = True
                results.add(wikipedia.Page(p.site(), t[1]))
        if not found:
            results.add(p)
    return results

def fix_redirect(pages, titles, opt):
    if not opt.dry_run:
        pages = fixup_pagename(pages, titles)
    pages = pagegenerators.PreloadingGenerator(pages)
    for p in pages:
        text = p.get(get_redirect = True)
        new_text = text
        for t in titles:
            regexp = u'\[\[[ ]*' + regex_from_title(t[0]) + u'[ ]*(\||\]|#)'
            repl = u'[[' + t[1] + u'\\1'
            new_text = re.sub(regexp, repl, new_text)
        if opt.dry_run:
            print 'Changing', p.title()
        wikipedia.showDiff(text, new_text)
        if not opt.dry_run:
            p.put(new_text,
                  comment = u'Correction des redirects après renommage')

def delete_redirect(p, opt):
    if p.exists():
        if p.isRedirectPage():
            print 'deleting page:', p.title()
            if len(list(p.getReferences())):
                if not opt.dry_run:
                    log_error(u'skipping deletion, linked page exists, please fix linked pages and delete the redirect manually', p)
            else:
                if not opt.dry_run:
                    if not p.delete(u'Nettoyage après correction des liens',
                                    prompt = False):
                        log_error(u'Impossible de détruire la page', p)
        else:
            if not opt.dry_run:
                log_error(u'skipping deletion, page is not a redirect, check linked page before deletion', p)

def delete_all_redirect(titles, opt):
    if opt.delete_redirect:
        for t in titles:
            p = wikipedia.Page(opt.site, t)
            delete_redirect(p, opt)
    elif not opt.dry_run:
        pages = [wikipedia.Page(opt.site, x) for x in titles]
        for p in pagegenerators.PreloadingGenerator(pages):
            if p.exists():
                log_error(u'redirect created, check linked page before deletion', p)

# The two next function must be changed to change the naming scheme, this one
# change the title, the next function must return True if a title is a
# candidate for a change
def change_title(title):
    title = title.replace(u' - ', u'/')
    #title = title.replace(u', ', u'/')
    #title = title.replace(u' : ', u'/')

    # always change these two, at least on fr:
    title = title.replace(u"'", u'’')
    title = title.replace(u'‘', u'’')

    #title = title.replace(u'Don Quichotte/', u'L’Ingénieux Hidalgo Don Quichotte de la Manche/')
    #title = title.replace(u":", u'/')
    #title = title.replace(u' attiques', u' Attiques')
    #title = title.replace(u" ruisseau, ", u' ruisseau/')
##     if title.endswith(u'.'):
##         title = title[:-1]
    #title = title.replace(u'dernier des flibustiers', u'Dernier des flibustiers')
    #title = title.replace(u' coup ', u' Coup ')
    #title = title.replace(u', Chapitre ', u'/Chapitre ')
    #title = re.sub(u', (.*?), (.*)$', u'/\\1/\\2', title)
    #title = re.sub(u'\.(\d+)$', u'/\\1', title)
    #title = re.sub(u' (\d+) (\d+)$', u'/\\1/\\2', title)
    #title = re.sub(u',(\d+)$', u'/\\1', title)

    return title

def filter_title(title):
    return re.match(u".*('|‘| - ).*", title)

def move_pages(gen, opt):
    pages = set()
    titles = set()
    seen = set()
    for page in gen:
        if page.namespace() != 0:
            continue

        old_title = page.title()
        old_title = old_title.split(u'#')[0]

        # Possible with link to the same page with different section anchor
        if old_title in seen:
            continue

        seen.add(old_title)

        if not filter_title(old_title):
            continue
        new_title = change_title(old_title)
        # don't try to move a page to itself
        if old_title == new_title:
            continue

        # get linked page first, to avoid problem on getting linked page of a
        # page moved a few seconds ago.
        pages = pages.union(pagegenerators.ReferringPageGenerator(page))

        move_one_page(page, new_title, opt)

        titles.add( (old_title, new_title) )

        talk_page = page.toggleTalkPage()
        new_title_talk_page = change_title(talk_page.title())
        titles.add( (talk_page.title(), new_title_talk_page) )

        if opt.fix_redirect and opt.sync_between_page:
            fix_redirect(pages, titles, opt)
            pages = set()
            titles = set()
            delete_all_redirect([ x[0] for x in titles], opt)
            flush_log(opt)

    if opt.fix_redirect and not opt.sync_between_page:
        fix_redirect(pages, titles, opt)
        delete_all_redirect([ x[0] for x in titles], opt)
        flush_log(opt)

if __name__ == '__main__':
    try:
        class Options:
            pass

        options = Options()
        options.dry_run = False
        options.fix_redirect = False
        options.sync_between_page = True
        options.delete_redirect = False
        gen = None

        options.site = wikipedia.getSite(code = 'fr', fam = 'wikisource')

        for arg in sys.argv[1:]:
            if arg == '-dry-run':
                options.dry_run = True
            elif arg.startswith('-cat:'):
                cat_name = unicode(arg[len('-cat:'):], u'utf-8')
                cat = catlib.Category(options.site, cat_name)
                gen = pagegenerators.CategorizedPageGenerator(cat)
                options.sync_between_page = True
            elif arg.startswith('-prefix:'):
                prefix_name = unicode(arg[len('-prefix;'):], 'utf-8')
                gen = pagegenerators.PrefixingPageGenerator(prefix_name, includeredirects = False, site = options.site)
                options.sync_between_page = False
            elif arg.startswith('-links:'):
                pagename = unicode(arg[len('-links:'):], 'utf-8')
                page = wikipedia.Page(options.site, pagename)
                gen = pagegenerators.LinkedPageGenerator(page)
                options.sync_between_page = False
            elif arg.startswith('-page:'):
                pagename = unicode(arg[len('-page:'):], 'utf-8')
                gen = [ wikipedia.Page(options.site, pagename) ]
                options.sync_between_page = False
            elif arg == '-fix-redirect':
                options.fix_redirect = True
            elif arg == '-delete-redirect':
                options.delete_redirect = True
            elif arg == '-help':
                print sys.argv[0], '[-dry-run | -help | -cat:cat_name | -prefix:prefix_name | -fix-redirect | -delete-redirect ]'
                sys.exit(1)

        if options.delete_redirect:
            if not options.site.isAllowed('delete', True):
                print 'You asked to delete redirect but have not enough right todo that'
                sys.exit(1)
            if not options.fix_redirect:
                print 'You asked to delete redirect but not to -fix-redirect'
                sys.exit(1)

        move_pages(gen, options)
    finally:
        flush_log(options)
        wikipedia.stopme()