User:TalBot/ceom equivalence.py
Jump to navigation
Jump to search
#! /usr/bin/env python # _*_ coding: utf8 _*_ # # Part of the Complete Encyclopedia of Music standadisation: # Gather equivalence classes of pages by capitalisation, convert all but one # members of a class to soft redirects # # Copyright (C) 2007, GrafZahl (en.wikisource.org user) # # # run with standard args "-log -putthrottle:xx" # import pagegenerators, wikipedia wikipedia.get_throttle.setDelay(5) for args in wikipedia.handleArgs(): wikipedia.output(u'(WWW) ignoring unrecognised argument: %s' % arg) # Basic stuff site = wikipedia.getSite() q_text = u'(???) Create what redirect?' q_answer = [ u'1 -> 2', u'2 -> 1', u'None' ] q_hotkeys = [ u'1', u'2', u'N' ] q_default = u'N' base_redirover = u'{{subst:dated soft redirect|"%s"}}' base_summ = u'[bot] converting to soft redirect to %s' # Page generators ceom_pages = pagegenerators.PrefixingPageGenerator(u'Complete Encyclopaedia of Music') # Equivalence classes # There does not appear to be a multimap type present in Python. I'll use a # normal mapping (dict) to a set of sets instead eqc = dict() # assess pages wikipedia.output(u'(III) Assessing pages') for page in ceom_pages: try: title = page.title() key = title.lower() if not eqc.has_key(key): eqc[key] = set() eqc[key].add(title) except wikipedia.Error: wikipedia.output(u'(EEE) Error assessing page [[%s]]' % page.title()) # print equivalence classes wikipedia.output(u'(III) Equivalence classes:') for key in eqc.iterkeys(): wikipedia.output(u'\n* [[%s]]:' % key) for title in eqc[key]: wikipedia.output(u'** [[%s]]' % title) # Interactive redirect creation # # We keep it simple: # - refuse to change classes with more than two pages that are not redirects # - ask user which page should be the redirect def user_choice(page1, page2): """A diff of page1 and page2 is shown, then the user is asked whether page1 should be a redirect to page2 or vice versa """ wikipedia.output(u' (III) Difference between texts 1 = [[%s]] and 2 = [[%s]]' % ( page1.title(), page2.title() )) wikipedia.showDiff(page1.get(), page2.get()) choice = wikipedia.inputChoice(q_text, q_answer[:], q_hotkeys, q_default) if choice == u'1': textlink = page2.aslink(textlink = True) redirover = base_redirover % textlink summ = base_summ % textlink page1.put(redirover, summ, minorEdit = False) elif choice == u'2': textlink = page1.aslink(textlink = True) redirover = base_redirover % textlink summ = base_summ % textlink page2.put(redirover, summ, minorEdit = False) wikipedia.output(u'\n(III) redirect creation:') for key in eqc.iterkeys(): try: wikipedia.output(u' (III) processing equivalence class [[%s]]' % key) ctitles = set() for title in eqc[key]: page = wikipedia.Page(site, title) if page.get().find(u'oft redirect') == -1: ctitles.add(page) if len(ctitles) == 0: wikipedia.output(u' (EEE) no content for this key') elif len(ctitles) == 1: wikipedia.output(u' (III) singleton, doing nothing') elif len(ctitles) == 2: # offer choice to user user_choice(ctitles.pop(), ctitles.pop()) else: wikipedia.output(u' (WWW) complicated situation for this key, doing nothing') except wikipedia.Error: wikipedia.output(u'(EEE) Error processing class [[%s]]' % key)