ویکیپدیا:درخواستهای ربات/ربات استخراج جعبه از درون مقاله(جعبه)/ویرایش 0
ظاهر
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Reza(User:reza1615), 2011
#
# Distributed under the terms of the CC-BY-SA 3.0 .
#!/usr/bin/python
# -*- coding: utf-8 -*-
import codecs
count = 0
filesample = 'resultr2.txt'
text2 = codecs.open( filesample,'r' ,'utf8' )
text = text2.read()
title,interwiki = ' ',''
#if you want to import template active olgoo else disactive it
#olgoo=u'الگو'
#olgoo=u' '
def box( section,interwiki ):
linebaz = 0
linebasteh = 0
start = 0
linebox ,newinterwiki = ' ',''
for line in section.split( '\n' ):
line = line.strip()
if line == '':
continue
else:
linebaz = string.count( line,"{" )
linebasteh = string.count( line,"}" )
linebaz = linebaz - linebasteh
if string.count( line,"^" ) > 0:
linebox = line.replace( u'^','' )
linebox = u'\nAAA[['+linebox + ']]AAA'
interwiki = line.replace( u'^','' )
for i in interwiki:
newinterwiki = newinterwiki + i + '$'
interwiki = u'[[en:' + newinterwiki + ']]'
continue
if linebaz < 0:
linebox = linebox + '\n' + line
return linebox,interwiki
if linebaz == 2:
linebox = linebox + '\n' + line
start = 1
if linebaz == 0 and start == 1:
linebox = linebox + '\n' + line
for pag in text.split( u'@@@' ):
count = count + 1
#title = pag.split( '</title>' )[0].replace( '<title>','' ).strip()
#title=title.replace('template:',olgoo)
#section = pag.split( '<revision>' )[-1]
#pprint.pprint( section )
# try:
# section = section.split( '<text xml:space="preserve">' )[1]
# section = section.split( '</text>' )[0]
# except:
# a = 1
# section = section.replace( '<','<' )
# section = section.replace( '>','>' )
# section = section.replace( '"','"' )
section = pag.strip()
if section.find( u'{{Infobox Settlement' ) != -1:#----------------------the of template and you have to change this name-------------
resultdata,interwiki = box( section ,interwiki )
else:
if section.find( u'{{infobox Settlement' ) != -1:#----------------the of template and you have to change this name--------------
resultdata,interwiki = box( section,interwiki )
else:
#pprint.pprint( section )
continue
resultdata = u'xxx\nxxx' + resultdata + '\n'
with codecs.open( 'FileBox.txt',mode = 'a',encoding = 'utf8' ) as f:#------the export file name--------------
f.write( resultdata )
f.close()
print count