Gebruiker:Rozebotje/Inwonertal Duitse gemeente/code

Uit Wikipedia, de vrije encyclopedie
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""

Based on basic.py

The following parameters are supported:

&params;

    -debug         If given, doesn't do any real changes, but only shows
                   what would have been changed.

    -log           Writes output to logfile

    -transcludes   processes pages with use a certain template, eg:
                   "-transcludes:Infobox Duitse plaats plus"

    -cat           processes pages from a certain category, eg:
                   "-cat:Gemeente in Saarland"

Other standard arguments are also supported.

All other arguments will be regarded as part of titles of pages,
enclose pagenames in quotes if it includes spaces. eg. "Taura (gemeente)"

"""
__version__ = '$Id$'
import wikipedia
import pagegenerators
import re
import sys
from time import strftime, localtime

# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
    '&params;': pagegenerators.parameterHelp
}

class BasicBot:
    # Edit summary message that should be used.
    # NOTE: Put a good description here, and add translations, if possible!
    msg = {
        'nl': u'Bot: aantal inwoners dmv template genereren',
    }

    def __init__(self, generator, debug):
        """
        Constructor. Parameters:
            * generator - The page generator that determines on which pages
                          to work on.
            * debug     - If True, doesn't do any real changes, but only shows
                          what would have been changed.
        """
        self.generator = generator
        self.debug = debug
        self.acceptall = False
        self.processed = 0
        self.changecount = 0
        self.errorcount = 0

    def createlog(self):
        logbook = 'Gebruiker:Rozebotje/Inwonertal Duitse gemeente/log'

        log_page = wikipedia.Page(wikipedia.getSite(), logbook)
        try:
            log_text = log_page.get()
        except (wikipedia.NoPage, wikipedia.IsRedirectPage):
            log_text = ''

        old_log_text = log_text

        args = [wikipedia.decodeArg(sys.argv[0])] + map(lambda s: wikipedia.decodeArg('"%s"' % s), sys.argv[1:])
        
        log_text += '\n* Start: %s\n' % self.starttime
        log_text += r'* Command: <nowiki>' + u' '.join(args) + r'</nowiki>' + '\n'
        log_text += '* Processed: %d pages\n' % self.processed
        log_text += '* Changes: %d pages\n' % self.changecount
        log_text += '* Errors: %d pages\n' % self.errorcount
        log_text += '* End: %s\n' % self.endtime
        log_text += '----\n'

        com = wikipedia.translate(wikipedia.getSite(), self.msg) + ' (Log)'

        wikipedia.showDiff(old_log_text, log_text)

        if not self.debug:
            try:
                log_page.put(log_text, comment = com, minorEdit = True)
            except:
                wikipedia.output(u'Could not save log')

    def run(self):
        self.starttime = strftime("%d %b %Y %H:%M (%Z)")
        # Set the edit summary message
        wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
        for page in self.generator:
            self.treat(page)
        self.endtime = strftime("%d %b %Y %H:%M (%Z)")
        self.createlog()


    def treat(self, page):
        """
        Loads the given page, does some changes, and saves it.
        """

        self.processed += 1
        
        # Show the title of the page we're working on.
        # Highlight the title in purple.
        wikipedia.output(u"\03{lightpurple}%s\03{default}:" % page.title())
        
        try:
            # Load the page
            text = page.get()
        except wikipedia.NoPage:
            wikipedia.output(u"Page %s does not exist; skipping." % page.aslink())
            return
        except wikipedia.IsRedirectPage:
            wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink())
            return

        #######################################
        # find parameters required for template
        
        # bundesland
        try:
          bundesland = re.search(u'[Bb]undesland\s*=\s*([A-Za-zöÖüÜäÄß-]*)',text).group(1)
        except:
          wikipedia.output(u"Bundesland not found")
          return

        #gemeinde (Gemeindeschlüssel)
        try:
          gemeinde = re.search(u'[Gg]emeindeschlüssel\s*=\s*([0-9 ]*)',text).group(1)
        except:
          wikipedia.output(u"Gemeindeschlüssel not found")
          return

        ################################
        # build expression to search for
        # the expression will be grouped in several parts using parentheses

        # 1
        expr='('
        removepar=re.match(r'(.*) \(.*\)',page.title())
        if removepar is not None:
          expr+=removepar.group(1) + '|'
        expr+=page.title().replace('(',r'\(').replace(')',r'\)') + '|'
        expr+=r'De stad|De plaats|De gemeente) '
        
        # 2
        expr+=r'(heeft|telt) '

        # do not group
        expr+=r'(?:ongeveer|ruim|bijna)? ?'

        # 3
        expr+=r'([\d\.]+) '

        # 4
        expr+=r'(inwoners)'

        ##########################
        # build replacement string
        
        replacement  = r'\1 telt {{Inwonertal Duitse gemeente|'
        replacement += gemeinde + '|' + bundesland + '|'
        replacement += r'\3}} \4'

        #############################
        # now execute the replacement
        
        text = re.sub(expr, replacement, text)

        ###############################
        # save if something was changed
        
        if text != page.get():
            # show what was changed
            wikipedia.showDiff(page.get(), text)

            if not self.debug:
                if self.acceptall:
                    choice = 'y'
                else:
                    choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')

                if choice == 'a':
                    choice = 'y'
                    self.acceptall = True
                    
                if choice == 'y':
                    self.changecount += 1
                    try:
                        # Save the page
                        page.put(text)
                        #wikipedia.output('page.put()')
                    except wikipedia.LockedPage:
                        wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
                        self.errorcount += 1
                    except wikipedia.EditConflict:
                        wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
                        self.errorcount += 1
                    except wikipedia.SpamfilterError, error:
                        wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))
                        self.errorcount += 1


def main():
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitles = []
    # If debug is True, doesn't do any real changes, but only show
    # what would have been changed.
    debug = False

    # Parse command line arguments
    for arg in wikipedia.handleArgs():
        if arg.startswith("-debug"):
            debug = True
        else:
            # check if a standard argument like
            # -start:XYZ or -ref:Asdf was given.
            generator = genFactory.handleArg(arg)
            if generator:
                gen = generator
            else:
                pageTitles.append(arg)

    if pageTitles != []:
        gen = iter([wikipedia.Page(wikipedia.getSite(), t) for t in pageTitles])

    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = BasicBot(gen, debug)
        bot.run()
    else:
        wikipedia.showHelp()

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()