User:Inductiveload/Scripts/Page namespace editor
Appearance
(Redirected from User:Inductiveload/Page namespace editor)
import pw_script_header
import wikipedia
import codecs
import re
FIX = 'newline'
FILE= r'/home/john/src/pw/zz_filelist0.txt'
SUMMARY = "[bot] Tidying formatting."
def decomposePage(wikiText):
regex = re.compile(ur'(?ms)^<noinclude>(.*)</noinclude>(.*?)<noinclude>(.*)</noinclude>$')
m = regex.search(wikiText)
if m:
header = m.group(1)
body = m.group(2)
footer = m.group(3)
return header, body, footer
else:
print "Can't find header, body, footer"
return None
def composePage(header, body, footer):
return '<noinclude>%s</noinclude>%s<noinclude>%s</noinclude>'%(header, body, footer)
def process_body(body):
body = re.sub(ur'([^\n]) *\n([^\n])', ur'\1 \2', body)
return body
def process_header(header):
return header
def process_footer(footer):
return footer
def main():
in_file = codecs.open(FILE, 'r', 'utf-8')
ws_site = wikipedia.getSite("en", "wikisource")
for page_title in in_file:
print '(INF) Processing page: %s' % page_title
page = wikipedia.Page(ws_site, page_title) # get the page
old_wikitext = page.get() #extract wikitext
header, body, footer = decomposePage(old_wikitext) #decompose the page
body = process_body(body) #process the body
header = process_header(header) #header
footer = process_footer(footer) #footer
new_wikitext = composePage(header, body, footer) # make a well formed Page: namespace page
wikipedia.showDiff(old_wikitext, new_wikitext)
print new_wikitext
cont = raw_input("Upload? [y/n]: ")
#cont = 'y'
if cont in ['y','Y','yes','Yes']:
page.put(new_wikitext, SUMMARY, minorEdit=True)
if __name__ == "__main__":
main()