راهنما:استخراج آمار از پایگاه داده مدیاویکی/بومیسازی کدهای ویکیانگلیسی/نمونه
deletedfilesinarticles.py
[ویرایش]<syntaxhighlight lang="python">
- !/usr/bin/env python2.5
- Copyright 2010 bjweeks, Multichil, MZMcBride
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
import datetime import math import MySQLdb as mysqldb import wikipedia import config settings_editsumm=u'ربات:بهروزرسانی آمار' settings_blanksumm=u'ربات:موفق نشد' settings_blankcontent=u'ربات:محتوی خالی'
report_title = 'user:reza1615/Articles containing deleted files/%i'
report_template = u Articles containing a deleted file; data as of %s.
No. | Article | File
%s |
---|
rows_per_page = 1000
def has_been_deleted(cursor, il_to):
cursor.execute( /* deletedfilesinarticles.py SLOW_OK */ SELECT 1 FROM logging_ts_alternative WHERE log_type = 'delete' AND log_action = 'delete' AND log_namespace = 6 AND log_title = %s; , il_to) if len(cursor.fetchall()) > 0: return True return False
wiki = wikipedia.getSite('fa') conn = mysqldb.connect("fawiki-p.db.toolserver.org", db = wiki.dbName(),
user = config.db_username, passwd = config.db_password)
cursor = conn.cursor() cursor.execute( /* deletedfilesinarticles.py SLOW_OK */ SELECT
page_title, il_to
FROM page JOIN imagelinks ON page_id = il_from WHERE (NOT EXISTS (SELECT
1 FROM image WHERE img_name = il_to))
AND (NOT EXISTS (SELECT
1 FROM commonswiki_p.page WHERE page_title = CAST(il_to AS CHAR) AND page_namespace = 6))
AND (NOT EXISTS (SELECT
1 FROM page WHERE page_title = il_to AND page_namespace = 6))
AND page_namespace = 0;)
i = 1 output = [] for row in cursor.fetchall():
il_to = row[1] if not has_been_deleted(cursor, il_to): continue try: page_title = u'%s' % unicode(row[0], 'utf-8') except UnicodeDecodeError: continue try: il_to = u'File:%s' % unicode(il_to, 'utf-8') except UnicodeDecodeError: continue table_row = u|-
| %d | %s | %s % (i, page_title, il_to)
output.append(table_row) i += 1
cursor.execute('SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM recentchanges ORDER BY rc_timestamp DESC LIMIT 1;') rep_lag = cursor.fetchone()[0] current_of = (datetime.datetime.utcnow() - datetime.timedelta(seconds=rep_lag)).strftime('%H:%M, %d %B %Y (UTC)')
end = rows_per_page page = 1 for start in range(0, len(output), rows_per_page):
report = wikipedia.Page(wiki, report_title % page) report_text = report_template % (current_of, '\n'.join(output[start:end])) #report_text = unicode(report_text,'UTF-8') report.put(report_text, settings_editsumm) page += 1 end += rows_per_page
page = math.ceil(len(output) / float(rows_per_page)) + 1 while 1:
report = wikipedia.Page(wiki, report_title % page) report_text = settings_blankcontent #report_text = unicode(report_text,'UTF-8') if not report.exists: break report.put(report_text, settings_blanksumm) page += 1
cursor.close() conn.close()