ویکیپدیا:درخواستهای ربات/ربات مختصات جغرافیا/ویرایش ۴
ظاهر
# -*- coding: utf-8 -*-
#solving "{{coord|LAT|" problem
"""
This bot will make direct text replacements. It will retrieve information on
which pages might need changes either from an XML dump or a text file, or only
change a single page.
You can run the bot with the following commandline parameters:
-file - Work on all pages given in a local text file.
Will read any [[wiki link]] and use these articles.
Argument can also be given as "-file:filename".
-cat - Work on all pages which are in a specific category.
Argument can also be given as "-cat:categoryname".
-page - Only edit a specific page.
Argument can also be given as "-page:pagetitle". You can give this
parameter multiple times to edit multiple pages.
-ref - Work on all pages that link to a certain page.
Argument can also be given as "-ref:referredpagetitle".
-filelinks - Works on all pages that link to a certain image.
Argument can also be given as "-filelinks:ImageName".
-links - Work on all pages that are linked to from a certain page.
Argument can also be given as "-links:linkingpagetitle".
-start - Work on all pages in the wiki, starting at a given page. Choose
"-start:!" to start at the beginning.
NOTE: You are advised to use -xml instead of this option; this is
meant for cases where there is no recent XML dump.
-except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given,
XYZ will be regarded as a regular expression.
-summary:XYZ - Set the summary message text for the edit to XYZ, bypassing the
predefined message texts with original and replacements inserted.
-template:XYZ-
-namespace:n - Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g. want to
iterate over all user pages starting at User:M, use
-start:User:M.
-always - Don't prompt you for each replacement
other: -
NOTE: Only use either -xml or -file or -page, but don't mix them.
Examples:
"""
#
# [[Utente:Wiso]] 2007
#
# Distributed under the terms of the GPL licence
#
from __future__ import generators
import sys,re,pprint
import wikipedia,pagegenerators,catlib,config
__version__ = '$Id: coordbot.py,v 0.1 $'
# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.`v
msg = u'ربات:افزودن مختصات %s ([[وپ:درخواستهای ربات/ربات مختصات جغرافیا/ویرایش ۴|کد]])'
templates = {
'safe': [
#Every Wiki:
( r'\{\{ ?[Cc]oord(.*?)\}\}',r"{{Coord\1|display=title}}\n" ),
( r'{{coor[_ ]title[_ ]d\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}',r"{{coord|\1|\2|\3|\4|\5|display=title}}\n" ),
( r'{{coor[_ ]title[_ ]dm\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^\}]*?)\}\}',r"{{coord|\1|\2|\3|\4|\5|\6|\7|display=title}}\n" ),
( r'{{coor[_ ]title[_ ]dms\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}',r"{{coord|\1|\2|\3|\4|\5|\6|\7|\8|\9|display=title}}\n" ),
( r'\{\{ ?[Cc]oor[ _]d\|([0-9\.+-]+)\|([0-9\.+-])(\|?[^\|]*)\}\}',r"{{Coord|\1|\2\3|display=title}}\n" ),
( r'\{\{.*latd *= *([0-9\.]+).*longd ?= ?([0-9\.]+)',r"{{Coord|\1|\2|display=title}}\n" ),
( r'.*\|lat_deg *= *([0-9\.]+).*\|lat_min *= *([0-9\.]+).*\n.*\|lon_deg *= *([0-9\.]+).*\|lon_min *= *([0-9\.]+).*',r"{{Coord|\1|\2|N|\3|\4|E|display=title}}\n" ),
# English Wiki:
( r'.*\|lat_deg *= *([0-9\.]+).*\|lat_min *= *([0-9\.]+).*\|lat_sec *= *([0-9\.]+).*\n.*\|lon_deg *= *([0-9\.]+).*\|lon_min *= *([0-9\.]+).*\|lon_sec *= *([0-9\.]+).*',r"{{Coord|\1|\2|\3|N|\4|\5|\6|E|display=title}}\n" ),
( r'.*\|latd *= *([0-9\.]+).*\|*latm *= *([0-9\.]+).*\|*lats *= *([0-9\.]+).*\|*latNS *= (.*?[NS])\n.*\|longd *= *([0-9\.]+).*\|*longm *= *([0-9\.]+).*\|*longs *= *([0-9\.]+).*\| longEW = (.*?[EW])*',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8|display=title}}\n" ),
( r'.*\|*source_lat_d *= *([0-9\.]+).*\|*source_lat_m *= *([0-9\.]+).*\|*source_lat_s *= *([0-9\.]+).*\|*source_lat_NS *=*(.*?[NS])\n.*\| source_long_d *= *([0-9\.]+).*\|*source_long_m *= *([0-9\.]+).*\|*source_long_s *= *([0-9\.]+).*\| source_long_EW =*(.*?[EW])*',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8|display=title}}\n" ),
#Italian Wiki:
( r'.*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|*\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|*',r"{{Coord|\1|\2|N|\3|\4|E|display=title}}\n" ),
( r'.*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|latSecondi *= *([0-9\.]+).*\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|longSecondi *= *([0-9\.]+).*',r"{{Coord|\1|\2|\3|N|\4|\5|\6|E|display=title}}\n" ),
( r'..*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|latSecondi *= *([0-9\.]+).*\n.*\|latitudineNS *=(.*?[NS])\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|longSecondi *= *([0-9\.]+).*\n.*\|longitudineEW *=(.*?[EW])*',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8|display=title}}\n" ),
],
'notsafe': [
( r'\{\{ ?[Cc]oord[ _]dm\|([0-9]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}',r"{{Coord|\1|\2|\3|\4|\5|\6\7|display=title}}\n" ),
( r'\{\{ ?[Cc]oor[ _]dms\|([0-9]+)\|([0-9\.]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8\9|display=title}}\n" ),
]
}
#Add Execption (for pages that don't need any Coordinaion or have a Coord:)
exceptions = [ r'\{\{ *?Geobox',
r'\{\{ ?[Cc]oord',
r'\{\{ ?Template:[Cc]oord',
r'\{\{ ?[mM]ontagna',
r'\{\{ ?(Template:)?[cC]omune',
r'\{\{ ?[cC]ittأ ',
r'\{\{ ?[mM]unicipalitأ ',
r'\{\{ ?[aA]eroporto\|',
r'\{\{ ?[Mm]unicipi',
r'\{\{ ?[iI]nfobox[ _]Azienda\|',
r'\{\{ ?[Ss]\|aziende',
r'\{\{ ?[Dd]isambigua\|',
r'\{\{ ?[Ff]razione',
r'\{\{ ?[Ss]quadra',
r'\{\{ ?[Pp]asso ?(\||\n)',
r'\{\{ ?[Bb]undesland[ _]tedesco'
]
class CoordRobot:
"""
A bot that import coordinates from other wikipedia.
"""
def __init__( self,generator,autoTitle = False,autoText = False ):
self.generator = generator
self.compileregex()
def compileregex( self ):
for key in templates.keys():
for i in range( len( templates[key] ) ):
old,new = templates[key][i]
oldR = re.compile( old,re.UNICODE )
templates[key][i] = oldR,new
for i in range( len( exceptions ) ):
exceptions[i] = re.compile( exceptions[i] )
def checkExceptions( self,text ):
for exception in exceptions:
hit = exception.search( text )
if hit:
return hit.group( 0 )
return None
def change( self,page,new_text ):
try:
page.put( new_text )
except wikipedia.EditConflict:
wikipedia.output( u'Skipping %s because of edit conflict' % ( page.title() ) )
except wikipedia.SpamfilterError,url:
wikipedia.output( u'Cannot change %s because of blacklist entry %s' % ( page.title(),url ) )
except:
a = 1
# Spceify the Wiki You want to get the Coords from (Now Italian):
def run( self ):
trovato_en = False
sen = wikipedia.Site( 'en' )
interwiki_list = []
for page in self.generator:
try:
try:
if not page.canBeEdited():
wikipedia.output( u'Skipping locked page %s' % page.title() )
continue
text_it = page.get()
match = self.checkExceptions( text_it )
# skip all pages that contain certain texts
if match:
colors = [None] * 9 + [None] * len( page.title() ) + [None] * 21 + [10] * len( match )
wikipedia.output( u'Skipping %s because it contains %s' % ( page.title(),match ) )
continue
interwiki_list = page.interwiki()
except wikipedia.NoPage:
wikipedia.output( u'Page %s not found' % page.title() )
continue
except wikipedia.IsRedirectPage:
wikipedia.output( u'Page %s is a redirect, skip' % page.title() )
continue
trovato_en = False
for page_en in interwiki_list:
if page_en.site() == sen:
trovato_en = True
break
if not trovato_en:
continue
wikipedia.output( page.title() )
wikipedia.output( u'en: %s' % page_en.title() )
try:
text_en = page_en.get()
except wikipedia.NoPage:
wikipedia.output( u'Page %s not found' % page_en.title() )
continue
except wikipedia.IsRedirectPage:
wikipedia.output( u'Page %s is a redirect, follow redirect' % page_en.title() )
text_en = page_en.get( get_redirect = True )
coordfind = False
if text_en.find(u'title')==-1 and text_en.find(u'Title')==-1:
continue
for old,new in templates['safe']:
text_en = text_en.replace( '{{Coord missing' ,'' )
text_en = re.sub( "\|\s*display\s*\=\s*(inline,)?title(,inline)?","",text_en )
text_en = text_en.replace( '|display=inline' ,'' )
match = old.search( text_en )
if not match:
if coordfind == False:
new_text_it = text_it
continue
# colors = [None] * 5 + [13] * len(page.title()) + [None] * 4
# wikipedia.output(u'\n>>> %s <<<' % page.title(), colors = colors)
# pprint.pprint( str( match ) )
wikipedia.output( u'Coord %s: ' % text_en[match.start():match.end()] )
template_new = old.sub( new,text_en[match.start():match.end()] )
template_new = template_new.replace( u'||','|' )
wikipedia.output( template_new )
if template_new.find( '{{Coord missing' ) != -1:
new_text_it = text_it
else:
new_text_it = template_new + text_it
coordfind = True
# choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
# if choice in ['y', 'Y']:
if new_text_it.find( '{{coord|LAT|' ) != -1:
continue
if new_text_it.find( '{{Coord|LAT|' ) != -1:
continue
if new_text_it.find( '{{coord|lat|' ) != -1:
continue
if new_text_it.find( '{{Coord missing' ) == -1:
wikipedia.setAction( msg % page_en.aslink() )
self.change( page,new_text_it )
coordfind = False
except:
continue
def main():
try:
gen = None
# summary message
summary_commandline = None
# Don't edit pages which contain certain texts.
exceptions = []
# commandline paramater.
# Which namespaces should be processed?
# default to [] which means all namespaces will be processed
namespaces = []
template = None
PageTitles = []
autoText = False
autoTitle = False
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# Load default summary message.
# BUG WARNING: This is probably incompatible with the -lang parameter.
wikipedia.setAction( msg )
# Read commandline parameters.
for arg in wikipedia.handleArgs():
if arg == '-autotitle':
autoTitle = True
elif arg == '-autotext':
autoText = True
elif arg.startswith( '-page' ):
if len( arg ) == 5:
PageTitles.append( wikipedia.input( u'Which page do you want to chage?' ) )
else:
PageTitles.append( arg[6:] )
elif arg.startswith( '-except:' ):
exceptions.append( arg[8:] )
elif arg.startswith( '-template:' ):
template = arg[10:]
elif arg.startswith( '-namespace:' ):
namespaces.append( int( arg[11:] ) )
elif arg.startswith( '-summary:' ):
wikipedia.setAction( arg[9:] )
summary_commandline = True
else:
generator = genFactory.handleArg( arg )
if generator:
gen = generator
print namespaces
if PageTitles:
pages = [wikipedia.Page( wikipedia.getSite(),PageTitle ) for PageTitle in PageTitles]
gen = iter( pages )
if not gen:
# syntax error, show help text from the top of this file
wikipedia.showHelp( 'coordbot' )
wikipedia.stopme()
sys.exit()
if namespaces != []:
gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces )
# gen = pagegenerators.RedirectFilterPageGenerator(gen)
preloadingGen = pagegenerators.PreloadingGenerator( gen,pageNumber = 120 )
bot = CoordRobot( preloadingGen,autoTitle,autoText )
bot.run()
except:
a = 1
if __name__ == "__main__":
try:
main()
except:
a = 1