User:Pathoschild/standardise-dev.js
Appearance
Note: After saving, changes may not occur immediately. Click here to learn how to bypass your browser's cache.
- Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Cmd-R on a Mac)
- Google Chrome: Press Ctrl-Shift-R (Cmd-Shift-R on a Mac)
- Internet Explorer: Hold Ctrl while clicking Refresh, or press Ctrl-F5
- Opera: Clear the cache in Tools → Preferences
For details and instructions about other browsers, see Wikipedia:Bypass your cache.
Code that you insert on this page could contain malicious content capable of compromising your account. If you are unsure whether code you are adding to this page is safe, you can ask at the central discussion page, Scriptorium. The code will be executed when previewing this page under some skins, including Monobook. You can in the interim if you wish to refresh the content sooner under another skin. |
Documentation for this script can be added at User:Pathoschild/standardise-dev. |
// <pre><nowiki>
/************************
*********
********* This is the cutting-edge development version, and is frequently
********* broken. You should use [[User:Pathoschild/standardise.js]] instead.
*********
************************/
/*************
*** Wikisource standardization extension (development version)
*** for the Regex menu framework 1.2+, http://meta.wikimedia.org/wiki/User:Pathoschild/Script:Regex_menu_framework
*** by [[m:user:Pathoschild]]
*************/
function standardize() {
/******************
*** Content exceptions
******************/
/* exception pattern */
var pattern = '<(nowiki|poem|pre)[^>]*>[\\s\\S]*?<\\/\\1>'; // double-escaping needed for RegExp()
/* store exceptions in an array */
var patternlocal = new RegExp(pattern, 'ig');
var exceptionvalues = editbox.value.match(patternlocal);
if(editbox.value.match(pattern)) {
/* replace exceptions with placeholders */
var patternlocal = new RegExp(pattern, 'i');
for(var x=0; x<exceptionvalues.length; x++) {
editbox.value = editbox.value.replace(patternlocal, '~exception~');
}
}
/*******************
*** Mainspace
*******************/
if(wgNamespaceNumber=='0') {
/*******************
*** header normalization
*******************/
/* prepare template for parsing if present */
if(editbox.value.match(/{{\s*(?:msg:|template:)?\s*header/i)) {
/* header syntax */
regex(/{{\s*(?:msg:|template:)\s*header/i,'{{header');
/* fix delimiters */
// header parameters
regex(/({{header2?[^\n]*)[\n\s]*\|[\s\n]*(previous|next|title|section|author|notes)\s*=\s*/ig,'$1~$2=',6);
// nested templates (up to 5)
regex(/({{header[\s\S]*?{{[^\|}]*)\|/ig,'$1%%pipe%%',5);
regex(/({{header[\s\S]*?){{([^}]+)}}/ig,'$1%%leftcurlies%%$2%%rightcurlies%%',5);
/* cleanup header */
// deprecated arrows and brackets
regex(/((?:previous|next)[^~]*?)(?:&larr|→|←|→)/ig,'$1');
regex(/(section\s*=\s*)\(([^~}]*)\)/,'$1$2');
// trailing whitespace
regex(/{{(header[^}]+)\s*}}/i,'{{$1}}');
// non-semantic line-breaks
regex(/({{header[^}]+(?:title|section|author|section)=[^~]*)<[^>]*br[^>]*>/ig,'$1',8);
}
/* prepare values */
var headertemplate = String(editbox.value.match(/{{header[\s\S]+?}}/i));
/* parse */
// title
var pattern = /^[\s\S]*~title=([^~}]*)[\s\S]*$/;
if(headertemplate.match(pattern)) {
var headertitle = headertemplate.replace(pattern,'$1');
}
else {
var headertitle = wgTitle.match(/^[^\/]+/);
}
// author
var pattern = /^[\s\S]*~author=([^~}]*)[\s\S]*$/;
if(headertemplate.match(pattern)) {
var headerauthor = headertemplate.replace(pattern,'$1');
}
else {
var headerauthor = '';
}
// section
var pattern = /^[\s\S]*~section=([^~}]*)[\s\S]*$/;
if(headertemplate.match(pattern)) {
var headersection = headertemplate.replace(pattern,'$1');
}
else {
if(wgTitle.match(/.+\//)) {
var newtitle = wgTitle.replace(/^.*\/([^\/]+)/);
var headersection = wgTitle.replace(/^.*\/([^\/]+)/,'$1');
}
else {
var headersection = '';
}
}
// previous
var pattern = /^[\s\S]*~previous=([^~}]*)[\s\S]*$/;
if(headertemplate.match(pattern)) {
var headerprevious = headertemplate.replace(pattern,'$1');
}
else {
if(wgTitle.match(/\/Chapter \d+$/)) {
var newtitle = parseInt(wgTitle.replace(/^.*\/Chapter (\d+)$/,'$1'))-1;
if(newtitle>0) {
var headerprevious = '[[../Chapter '+newtitle+'|Chapter '+newtitle+']]';
}
else {
var headerprevious = '';
}
}
else {
var headerprevious = '';
}
}
// next
var pattern = /^[\s\S]*~next=([^~}]*)[\s\S]*$/;
if(headertemplate.match(pattern)) {
var headernext = headertemplate.replace(pattern,'$1');
}
else {
if(wgTitle.match(/\/Chapter \d+$/)) {
var newtitle = parseInt(wgTitle.replace(/^.*\/Chapter (\d+)$/,'$1'))+1;
var headernext = '[[../Chapter '+newtitle+'|Chapter '+newtitle+']]';
}
else {
var headernext = '';
}
}
// notes
var pattern = /^[\s\S]*~notes=([^~}]*)[\s\S]*$/;
if(headertemplate.match(pattern)) {
var headernotes = headertemplate.replace(pattern,'$1');
headernotes = headernotes.replace(/^([\s\S]*?)[\n\s]*$/,'$1'); // trailing whitespace
}
else {
var headernotes = '';
}
/* remove old template */
editbox.value = editbox.value.replace(/{{header[^}]*}}\n*/ig,'');
/* place new template */
editbox.value = '{{header2'
+ '\n | title = '+headertitle
+ '\n | author = '+headerauthor
+ '\n | section = '+headersection
+ '\n | previous = '+headerprevious
+ '\n | next = '+headernext
+ '\n | notes = '+headernotes
+ '\n}}\n'
+ editbox.value;
/*******************
*** fix false newlines
*******************/
/* replace newlines with placeholders */
regex(/([^\n])\n(\s*[^\n])/ig,'$1%%newline%%$2');
/* selectively restore legitimate newlines */
// paragraphs
regex(/%%newline%%%%newline%%|%%newline%%\n|\n%%newline%%/ig,'\n\n');
// templates
regex(/%%newline%%(\s*(?:}}|\|))/ig,'\n$1'); // before
regex(/}}\s*%%newline%%/ig,'}}\n'); // after
// images, categories, interwiki links
regex(/%%newline%%(\s*\[\[(?:Image|Category|[^:]+):[^\]]+\]\])/ig,'\n$1');
regex(/(\s*\[\[(?:Image|Category|[^:]+):[^\]]+\]\])%%newline%%/ig,'$1\n');
// lists
regex(/%%newline%%([*#:;])/ig,'\n$1'); // lists
regex(/([*#:;][^\n]*)%%newline%%/ig,'$1\n'); // newlines closing list items
// tables
regex(/%%newline%%{\|/ig,'\n{|');
regex(/{\|%%newline%%/ig,'{|\n');
// rules
regex(/%%newline%%(----+)/g,'\n$1');
regex(/(----+)%%newline%%/g,'$1\n');
// tags
regex(/(<[^>\n]+>)\s*%%newline%%/ig,'$1\n');
regex(/%%newline%%(<[^>\n]+>)/ig,'\n$1');
/* remove remaining */
regex(/-%%newline%%([^\s])/ig,'-$1'); // hyphenated words
regex(/\s*%%newline%%\s*/ig,' '); // all others
/*******************
*** Cleanup
*******************/
/* restore delimiters */
regex(/%%pipe%%/g,'|');
regex(/%%leftcurlies%%/g,'{{');
regex(/%%rightcurlies%%/g,'}}');
}
/*******************
*** Authorspace
*******************/
if(wgNamespaceNumber=='102') {
/*******************
*** {{author}} normalization
*******************/
/* fix delimiters */
regex(/[\n\s]*\|\s*((?:first|last)name|last_initial|(?:birth|death)year|description|image|(?:wikipedia|wikiquote|commons)_link|dates|name|defaultsort)\s*=\s*/ig,'~$1='); // author parameter delimiters
regex(/({{author[\s\S]*?{{[^\|}]*)\|/ig,'$1%%pipe%%',5); // other template pipes
regex(/({{author[\s\S]*?){{([^}]+)}}/ig,'$1%%leftcurlies%%$2%%rightcurlies%%',5); // other template delimiters
/* cleanup */
regex(/{{(author[^}]+)\s*}}/i,'{{$1}}'); // rm trailing whitespace
/* place standard template and move like parameters */
regex(/{{author/i,'{{author\n |firstname =\n |lastname =\n |last_initial =\n |birthyear =\n |deathyear =\n |description =\n |image =\n |wikipedia_link =\n |wikiquote_link =\n |commons_link =\n}}\n{{author');
regex(/(author[\s\S]*?\|((?:first|last)?name|last_initial|(?:birth|death)year|description|image|(?:wikipedia|wikiquote|commons)_link|dates|defaultsort)\s*)=([\s\S]*?)~?\2=([^~]*)/i,'$1=$4$3',10);
/* get dates if necessary */
if(regsearch(/(?:birth|death)year\s*=\s*\n/)) {
// cannibalise categories
regex(/(birthyear\s*)=(\s*\n[\s\S]*?)\n?\[\[\s*Category\s*:\s*(\d+(?:\s*BCE)?) births\s*[^\]]*\]\]/,'$1=$3$2');
regex(/(deathyear\s*)=(\s*\n[\s\S]*?)\n?\[\[\s*Category\s*:\s*(\d+(?:\s*BCE)?) deaths\s*[^\]]*\]\]/,'$1=$3$2');
// if that failed, parse from old template
if(regsearch(/(?:birth|death)year\s*=\s*\n/) && regsearch(/~dates=[^~]/)) {
/* get dates */
// get raw parameter
var olddates = editbox.value.replace(/^[\s\S]*dates=[^\d~}]*([^~}]+)[\s\S]*$/,'$1'); // raw parameter
olddates = olddates.replace(/^(\d+)\s*BC?E/ig,'$1 BCE'); // fix eras
// extract dates
var birthyear = olddates.replace(/^(\d+(?: BCE)?)[\s\S]*$/ig,'$1');
var deathyear = olddates.replace(/^\d+[^\d]+?(\d+(?: BCE)?)$/ig,'$1');
/* fill in empty parameters */
if(regsearch(/birthyear\s*=\s*\n/)) {
regex(/(birthyear\s*)=/,'$1='+birthyear);
}
if(regsearch(/deathyear\s*=\s*\n/) &&deathyear>birthyear) {
regex(/(deathyear\s*)=/,'$1='+deathyear);
}
}
}
/* get names */
if(regsearch(/(?:first|last)name\s*=\s*\n/)) {
// cannibalise name field
if(regsearch(/(?:first|last)name\s*=\s*\n/)) {
// extract
var name = editbox.value.replace(/^[\s\S]*~name=([^~}]*)[\s\S]*/,'$1');
var firstname = name.replace(/([\s\S]+)\s+[\s\S]*/,'$1');
var lastname = name.replace(/[\s\S]+\s+([\s\S]*)/,'$1');
// fill in empty parameters
if(regsearch(/firstname\s*=\s*\n/)) {
regex(/(firstname\s*)=/,'$1='+firstname);
}
if(regsearch(/lastname\s*=\s*\n/)) {
regex(/(lastname\s*)=/,'$1='+lastname);
}
}
}
/* cleanup */
// remove old template
regex(/({{author[\s\S]*?)[\n\s]*{{author[^}]*}}[\n\s]*/ig,'$1\n\n');
// restore delimiters
regex(/%%pipe%%/g,'|');
regex(/%%leftcurlies%%/g,'{{');
regex(/%%rightcurlies%%/g,'}}');
// fix whitespace
regex(/((?:(?:first|last)name|last_initial|(?:birth|death)year|description|image|(?:wikipedia|wikiquote|commons)_link)\s*)=\s*/ig,'$1= ');
regex(/= \|/g,'= \n |');
regex(/= }}/g,'= \n}}');
/* remove old categories */
regex(/\[\[\s*Category\s*:\s*\d+[^\]]*?(?:births|deaths)[^\]]*\]\]\n?/ig,''); // authors by year
regex(/\[\[\s*Category\s*:\s*(?:Ancient|Early modern|Medieval|Modern|Renaissance) authors[^\]]*\]\]\n?/ig,''); // authors by era
/*******************
*** Other tweaks
*******************/
/* update license templates */
regex(/{{\s*(?:msg:|template:)?(?:author-)?(PD-[^\|\}]+)(?:\|[^}]*)?}}/ig,'{{$1}}');
/* normalize dates */
regex(/^([#*:]+ \[\[[^\]]+\]\]),\s*(\d+)/mig,'$1 ($2)');
}
/*******************
*** miscellaneous cleanup
*******************/
/* templates */
regex(/{{\s*(?:msg:|template:)?([^}]+)}}/ig,'{{$1}}');
/* syntax */
// headers
regex(/\n*^(=+)\s*(.*?)\s*\1\s*/mig,'\n\n$1$2$1\n'); // whitespace
regex(/=\n+=/ig,'=\n='); // fix consecutive headers
// categories
regex(/\[\[\s*category\s*:\s*([^\|\]]+)(?:\s*(\|)([^\]]*))?\s*\]\]/ig,'[[category:$1$2$3]]');
//links
regex(/\[\[\s*([^\|\]]+?)\s*(?:(\|)\s*([^\]]+?)\s*)?\]\]/ig,'[[$1$2$3]]'); // redundant starting and ending whitespace
regex(/\[\[([^\|\]]+?)\s*\|\s*\1\]\]/ig,'[[$1]]'); // redundant link text
regex(/\[\[([^\|\]]+?)_/ig,'[[$1 ',5); // underscores
// lists
regex(/^([*#:]+)\s*/mig,'$1 ');
/*******************
*** sort elements
*******************/
/* store elements and remove from code */
// categories
var categories = regsearch(/\[\[category:[^\]]+\]\]/ig);
regex(/\[\[category:[^\]]+\]\]\n?/ig,'');
// interlanguage links
var interwikilinks = regsearch(/\[\[[a-z]{2,3}(?:-[^:\|\]]+)?:[^\]]+\]\]/ig); // get codes
if(interwikilinks) {
for(var x in interwikilinks) { // filter out known non-interlanguage prefixes
if(interwikilinks[x].match(/\[\[(?:c2|cej|dcc|mw|rev|rfc|svn|wqy):/i)) {
interwikilinks.splice(x,0);
}
else {
var pattern = new RegExp(interwikilinks[x]+'\n?','ig');
regex(/\[\[[a-z]{2,3}(?:-[^:\|\]]+)?:[^\]]+\]\]\n?/ig,'');
}
}
}
// license templates
var licenses = regsearch(/{{(?:PD-|GFDL)[^}]*}}/ig);
regex(/{{(?:PD-|GFDL)[^}]*}}\n?/ig,'');
/* sort and re-add */
// compare function for case-insensitivity
// courtesy <http://www.webreference.com/js/tips/000430.html>
function caseless(a,b) {
var a = a.toLowerCase();
var b = b.toLowerCase();
if (a < b) return -1;
if (a > b) return 1;
return 0;
}
// initial whitespace
regex(/[\s\n]*$/,'\n\n');
// licenses
if(licenses && licenses.length>0) {
// licenses
for(var x in licenses) {
editbox.value = editbox.value+licenses[x]+'\n';
}
// whitespace
editbox.value = editbox.value+'\n';
}
// categories
if(categories && categories.length>0) {
// sort and place
categories.sort(caseless);
for(var x in categories) {
editbox.value = editbox.value+categories[x]+'\n';
}
// whitespace
editbox.value = editbox.value+'\n';
}
// interlanguage links
if(interwikilinks) {
// sort and place
interwikilinks.sort(caseless);
for(var x in interwikilinks) {
editbox.value = editbox.value+interwikilinks[x]+'\n';
}
}
/* restore exceptions */
if(editbox.value.match(/~exception~/)) {
/* restore placeholders */
for(var i=0; i<exceptionvalues.length; i++) {
var pattern = new RegExp('~exception~');
editbox.value = editbox.value.replace(pattern, exceptionvalues[i]);
}
}
/* edit summary */
setreason('[[WS:STYLE|standardization]], updates, and cleanup with [[m:User:Pathoschild/Script:Regex menu framework|regex]]');
}
// </nowiki></pre>