ویکیپدیا:درخواستهای ربات/ربات مختصات جغرافیا/ویرایش ۵
<syntaxhighlight lang="python">
- -*- coding: utf-8 -*-
- solving "{{coord|LAT|" problem
""" This bot will make direct text replacements. It will retrieve information on which pages might need changes either from an XML dump or a text file, or only change a single page.
You can run the bot with the following commandline parameters:
-file - Work on all pages given in a local text file.
Will read any wiki link and use these articles. Argument can also be given as "-file:filename".
-cat - Work on all pages which are in a specific category.
Argument can also be given as "-cat:categoryname".
-page - Only edit a specific page.
Argument can also be given as "-page:pagetitle". You can give this parameter multiple times to edit multiple pages.
-ref - Work on all pages that link to a certain page.
Argument can also be given as "-ref:referredpagetitle".
-filelinks - Works on all pages that link to a certain image.
Argument can also be given as "-filelinks:ImageName".
-links - Work on all pages that are linked to from a certain page.
Argument can also be given as "-links:linkingpagetitle".
-start - Work on all pages in the wiki, starting at a given page. Choose
"-start:!" to start at the beginning. NOTE: You are advised to use -xml instead of this option; this is meant for cases where there is no recent XML dump.
-except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given,
XYZ will be regarded as a regular expression.
-summary:XYZ - Set the summary message text for the edit to XYZ, bypassing the
predefined message texts with original and replacements inserted.
-template:XYZ- -namespace:n - Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other parameters, except for the -start parameter. If you e.g. want to iterate over all user pages starting at User:M, use -start:User:M.
-always - Don't prompt you for each replacement other: -
NOTE: Only use either -xml or -file or -page, but don't mix them.
Examples:
"""
- Utente:Wiso 2007
- Distributed under the terms of the GPL licence
from __future__ import generators import sys,re,pprint import wikipedia,pagegenerators,catlib,config
__version__ = '$Id: coordbot.py,v 0.1 $'
- Summary messages in different languages
- NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
- below.`v
msg = u'ربات:افزودن مختصات %s'
templates = {
'safe': [
- Every Wiki:
( r'\{\{ ?[Cc]oord(.*?)\}\}',r"الگو:Coord\1\n" ), ( r'{{coor[_ ]title[_ ]d\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}',r"متغیرهای نامعتبر در {{#coordinates:}} واردشده است.\n" ), ( r'{{coor[_ ]title[_ ]dm\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^\}]*?)\}\}',r"متغیرهای نامعتبر در {{#coordinates:}} واردشده است.\n" ), ( r'{{coor[_ ]title[_ ]dms\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}',r"متغیرهای نامعتبر در {{#coordinates:}} واردشده است.\n" ), ( r'\{\{ ?[Cc]oor[ _]d\|([0-9\.+-]+)\|([0-9\.+-])(\|?[^\|]*)\}\}',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ), ( r'\{\{.*latd *= *([0-9\.]+).*longd ?= ?([0-9\.]+)',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ), ( r'.*\|lat_deg *= *([0-9\.]+).*\|lat_min *= *([0-9\.]+).*\n.*\|lon_deg *= *([0-9\.]+).*\|lon_min *= *([0-9\.]+).*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
- English Wiki:
( r'.*\|lat_deg *= *([0-9\.]+).*\|lat_min *= *([0-9\.]+).*\|lat_sec *= *([0-9\.]+).*\n.*\|lon_deg *= *([0-9\.]+).*\|lon_min *= *([0-9\.]+).*\|lon_sec *= *([0-9\.]+).*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
( r'.*\|latd *= *([0-9\.]+).*\|*latm *= *([0-9\.]+).*\|*lats *= *([0-9\.]+).*\|*latNS *= (.*?[NS])\n.*\|longd *= *([0-9\.]+).*\|*longm *= *([0-9\.]+).*\|*longs *= *([0-9\.]+).*\| longEW = (.*?[EW])*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ), ( r'.*\|*source_lat_d *= *([0-9\.]+).*\|*source_lat_m *= *([0-9\.]+).*\|*source_lat_s *= *([0-9\.]+).*\|*source_lat_NS *=*(.*?[NS])\n.*\| source_long_d *= *([0-9\.]+).*\|*source_long_m *= *([0-9\.]+).*\|*source_long_s *= *([0-9\.]+).*\| source_long_EW =*(.*?[EW])*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
- Italian Wiki:
( r'.*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|*\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ),
( r'.*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|latSecondi *= *([0-9\.]+).*\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|longSecondi *= *([0-9\.]+).*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ), ( r'..*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|latSecondi *= *([0-9\.]+).*\n.*\|latitudineNS *=(.*?[NS])\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|longSecondi *= *([0-9\.]+).*\n.*\|longitudineEW *=(.*?[EW])*',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ), ], 'notsafe': [ ( r'\{\{ ?[Cc]oord[ _]dm\|([0-9]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ), ( r'\{\{ ?[Cc]oor[ _]dms\|([0-9]+)\|([0-9\.]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}',r"{{#coordinates:}}: عرض جغرافیایی نامعتبر\n" ), ] } #Add Execption (for pages that don't need any Coordinaion or have a Coord:)
exceptions = [ r'\{\{ *?Geobox',
r'\{\{ ?[Cc]oord', r'\{\{ ?Template:[Cc]oord', r'\{\{ ?[mM]ontagna', r'\{\{ ?(Template:)?[cC]omune', r'\{\{ ?[cC]ittأ ', r'\{\{ ?[mM]unicipalitأ ', r'\{\{ ?[aA]eroporto\|', r'\{\{ ?[Mm]unicipi', r'\{\{ ?[iI]nfobox[ _]Azienda\|', r'\{\{ ?[Ss]\|aziende', r'\{\{ ?[Dd]isambigua\|', r'\{\{ ?[Ff]razione', r'\{\{ ?[Ss]quadra', r'\{\{ ?[Pp]asso ?(\||\n)', r'\{\{ ?[Bb]undesland[ _]tedesco' ]
class CoordRobot:
""" A bot that import coordinates from other wikipedia. """ def __init__( self,generator,autoTitle = False,autoText = False ): self.generator = generator self.compileregex() def compileregex( self ): for key in templates.keys(): for i in range( len( templates[key] ) ): old,new = templates[key][i] oldR = re.compile( old,re.UNICODE ) templates[key][i] = oldR,new for i in range( len( exceptions ) ): exceptions[i] = re.compile( exceptions[i] ) def checkExceptions( self,text ): for exception in exceptions: hit = exception.search( text ) if hit: return hit.group( 0 ) return False def change( self,page,new_text ): try: page.put( new_text ) except wikipedia.EditConflict: wikipedia.output( u'Skipping %s because of edit conflict' % ( page.title() ) ) except wikipedia.SpamfilterError,url: wikipedia.output( u'Cannot change %s because of blacklist entry %s' % ( page.title(),url ) ) except: a = 1 # Spceify the Wiki You want to get the Coords from (Now Italian): def run( self ): trovato_en = False sen = wikipedia.Site( 'en' ) interwiki_list = []
pathWiki = wikipedia.getSite().family.nicepath('fa')
for page in self.generator:
try:
text_it = page.get()
except: continue
match = self.checkExceptions( text_it )
if u"" in text_it: continue
# skip all pages that contain certain texts if match: colors = [None] * 9 + [None] * len( page.title() ) + [None] * 21 + [10] * len( match ) wikipedia.output( u'Skipping %s because it contains %s' % ( page.title(),match ) ) continue
url = '%s%s' % (pathWiki, page.urlname())
if u"""""" in wikipedia.getSite().getUrl(url):
wikipedia.output( u'Skipping %s because it contains coordinate' % page.title())
continue
interwiki_list = page.interwiki()
trovato_en = False
for page_en in interwiki_list:
if page_en.site() == sen:
trovato_en = True
break
if not trovato_en:
print u"BAD"
continue wikipedia.output( page.title() ) wikipedia.output( u'en: %s' % page_en.title() )
if u"#" in page_en.title(): print u"We don't go on page sections. I'm sorry but I have to skip" continue
try: text_en = page_en.get() except wikipedia.NoPage: wikipedia.output( u'Page %s not found' % page_en.title() ) continue except wikipedia.IsRedirectPage: wikipedia.output( u'Page %s is a redirect, follow redirect' % page_en.title() ) page_en=page_en.getRedirectTarget() pathWiki = wikipedia.getSite('en').family.nicepath('en')
url = '%s%s' % (pathWiki, page_en.urlname())
html=wikipedia.getSite('en').getUrl(url)
print u"good" if u"""<a href=""" in html: R=re.compile(u"¶ms=(.+?)\"") listR=R.findall(html) if len(listR)>2: print u"something is wrong... skipped" continue a=listR[-1] if u"_W_" in a: splited=a.split(u"_W_") splited[0]=splited[0]+u"|W" else: splited=a.split(u"_E_") splited[0]=splited[0]+u"|E" if len(splited)>2: continue coords=splited[0] coords=coords.replace(u"_",u"|") template_new=u"متغیرهای نامعتبر در {{#coordinates:}} واردشده است." except: template_new=template_new+u"|display=title}}"
wikipedia.output( template_new )
page.put(template_new+u"\n"+text_it,u"ربات: افزودن مختصات از ویکیپدیای انگلیسی")
def main():
try: gen = None # summary message summary_commandline = None # Don't edit pages which contain certain texts. exceptions = [] # commandline paramater. # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] template = None PageTitles = [] autoText = False autoTitle = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # Load default summary message. # BUG WARNING: This is probably incompatible with the -lang parameter. wikipedia.setAction( msg ) # Read commandline parameters. for arg in wikipedia.handleArgs(): if arg == '-autotitle': autoTitle = True elif arg == '-autotext': autoText = True elif arg.startswith( '-page' ): if len( arg ) == 5: PageTitles.append( wikipedia.input( u'Which page do you want to chage?' ) ) else: PageTitles.append( arg[6:] ) elif arg.startswith( '-except:' ): exceptions.append( arg[8:] ) elif arg.startswith( '-template:' ): template = arg[10:] elif arg.startswith( '-namespace:' ): namespaces.append( int( arg[11:] ) ) elif arg.startswith( '-summary:' ): wikipedia.setAction( arg[9:] ) summary_commandline = True else: generator = genFactory.handleArg( arg ) if generator: gen = generator print namespaces if PageTitles: pages = [wikipedia.Page( wikipedia.getSite(),PageTitle ) for PageTitle in PageTitles] gen = iter( pages ) if not gen: # syntax error, show help text from the top of this file wikipedia.showHelp( 'coordbot' ) wikipedia.stopme() sys.exit() if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces )
- gen = pagegenerators.RedirectFilterPageGenerator(gen)
preloadingGen = pagegenerators.PreloadingGenerator( gen ,pageNumber = 120) bot = CoordRobot( preloadingGen,autoTitle,autoText ) bot.run() except: a = 1
if __name__ == "__main__":
try: main() except: a = 1