#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Based on basic.py
The following parameters are supported:
¶ms;
-debug If given, doesn't do any real changes, but only shows
what would have been changed.
-log Writes output to logfile
-transcludes processes pages with use a certain template, eg:
"-transcludes:Infobox Duitse plaats plus"
-cat processes pages from a certain category, eg:
"-cat:Gemeente in Saarland"
Other standard arguments are also supported.
All other arguments will be regarded as part of titles of pages,
enclose pagenames in quotes if it includes spaces. eg. "Taura (gemeente)"
"""
__version__ = '$Id$'
import wikipedia
import pagegenerators
import re
import sys
from time import strftime, localtime
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'¶ms;': pagegenerators.parameterHelp
}
class BasicBot:
# Edit summary message that should be used.
# NOTE: Put a good description here, and add translations, if possible!
msg = {
'nl': u'Bot: aantal inwoners dmv template genereren',
}
def __init__(self, generator, debug):
"""
Constructor. Parameters:
* generator - The page generator that determines on which pages
to work on.
* debug - If True, doesn't do any real changes, but only shows
what would have been changed.
"""
self.generator = generator
self.debug = debug
self.acceptall = False
self.processed = 0
self.changecount = 0
self.errorcount = 0
def createlog(self):
logbook = 'Gebruiker:Rozebotje/Inwonertal Duitse gemeente/log'
log_page = wikipedia.Page(wikipedia.getSite(), logbook)
try:
log_text = log_page.get()
except (wikipedia.NoPage, wikipedia.IsRedirectPage):
log_text = ''
old_log_text = log_text
args = [wikipedia.decodeArg(sys.argv[0])] + map(lambda s: wikipedia.decodeArg('"%s"' % s), sys.argv[1:])
log_text += '\n* Start: %s\n' % self.starttime
log_text += r'* Command: <nowiki>' + u' '.join(args) + r'</nowiki>' + '\n'
log_text += '* Processed: %d pages\n' % self.processed
log_text += '* Changes: %d pages\n' % self.changecount
log_text += '* Errors: %d pages\n' % self.errorcount
log_text += '* End: %s\n' % self.endtime
log_text += '----\n'
com = wikipedia.translate(wikipedia.getSite(), self.msg) + ' (Log)'
wikipedia.showDiff(old_log_text, log_text)
if not self.debug:
try:
log_page.put(log_text, comment = com, minorEdit = True)
except:
wikipedia.output(u'Could not save log')
def run(self):
self.starttime = strftime("%d %b %Y %H:%M (%Z)")
# Set the edit summary message
wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
for page in self.generator:
self.treat(page)
self.endtime = strftime("%d %b %Y %H:%M (%Z)")
self.createlog()
def treat(self, page):
"""
Loads the given page, does some changes, and saves it.
"""
self.processed += 1
# Show the title of the page we're working on.
# Highlight the title in purple.
wikipedia.output(u"\03{lightpurple}%s\03{default}:" % page.title())
try:
# Load the page
text = page.get()
except wikipedia.NoPage:
wikipedia.output(u"Page %s does not exist; skipping." % page.aslink())
return
except wikipedia.IsRedirectPage:
wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink())
return
#######################################
# find parameters required for template
# bundesland
try:
bundesland = re.search(u'[Bb]undesland\s*=\s*([A-Za-zöÖüÜäÄß-]*)',text).group(1)
except:
wikipedia.output(u"Bundesland not found")
return
#gemeinde (Gemeindeschlüssel)
try:
gemeinde = re.search(u'[Gg]emeindeschlüssel\s*=\s*([0-9 ]*)',text).group(1)
except:
wikipedia.output(u"Gemeindeschlüssel not found")
return
################################
# build expression to search for
# the expression will be grouped in several parts using parentheses
# 1
expr='('
removepar=re.match(r'(.*) \(.*\)',page.title())
if removepar is not None:
expr+=removepar.group(1) + '|'
expr+=page.title().replace('(',r'\(').replace(')',r'\)') + '|'
expr+=r'De stad|De plaats|De gemeente) '
# 2
expr+=r'(heeft|telt) '
# do not group
expr+=r'(?:ongeveer|ruim|bijna)? ?'
# 3
expr+=r'([\d\.]+) '
# 4
expr+=r'(inwoners)'
##########################
# build replacement string
replacement = r'\1 telt {{Inwonertal Duitse gemeente|'
replacement += gemeinde + '|' + bundesland + '|'
replacement += r'\3}} \4'
#############################
# now execute the replacement
text = re.sub(expr, replacement, text)
###############################
# save if something was changed
if text != page.get():
# show what was changed
wikipedia.showDiff(page.get(), text)
if not self.debug:
if self.acceptall:
choice = 'y'
else:
choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
if choice == 'a':
choice = 'y'
self.acceptall = True
if choice == 'y':
self.changecount += 1
try:
# Save the page
page.put(text)
#wikipedia.output('page.put()')
except wikipedia.LockedPage:
wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
self.errorcount += 1
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
self.errorcount += 1
except wikipedia.SpamfilterError, error:
wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))
self.errorcount += 1
def main():
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# This temporary array is used to read the page title if one single
# page to work on is specified by the arguments.
pageTitles = []
# If debug is True, doesn't do any real changes, but only show
# what would have been changed.
debug = False
# Parse command line arguments
for arg in wikipedia.handleArgs():
if arg.startswith("-debug"):
debug = True
else:
# check if a standard argument like
# -start:XYZ or -ref:Asdf was given.
generator = genFactory.handleArg(arg)
if generator:
gen = generator
else:
pageTitles.append(arg)
if pageTitles != []:
gen = iter([wikipedia.Page(wikipedia.getSite(), t) for t in pageTitles])
if gen:
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = BasicBot(gen, debug)
bot.run()
else:
wikipedia.showHelp()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()