User:Slaporte/US Code
Appearance
The US Code Formatting Tool will process a formatted ascii file of US Code.
Version history
[edit]- 8/16/2010 - fixed revisionIndex and related functions; improved documentation
- 8/14/2010 - Improved documentation; renamed functions & variables;
- 8/12/2010 - New code with docs
- 7/6/2010
This processes the HTML files downloadable from FDSys.
- 7/1/2010 - Demo code
Code
[edit]<?php
# This is a tool for batch processing the US Code, as
# downloaded from FDSys <http://www.gpo.gov/fdsys/>
#
# Last updated 8/16/2010
/**
* Split the bot file after it exceeds this size
*/
$splitSize = 500000;
/**
* Determines if a interger is odd.
*
*/
function is_odd($int){
return($int & 1);
}
/**
* Gather defined data from comments in html.
*
* An html file from FDSys <http://www.gpo.gov/fdsys/> has
* some data in comments, such as <!-- AUTHORITIES-PUBLICATION-YEAR:2010 -->,
* at the start of the file.
*
* @param $txt string Raw text from FDSys html file
* @return array
* [0] => AUTHORITIES-PUBLICATION-NAME
* [1] => AUTHORITIES-PUBLICATION-ID
* [2] => AUTHORITIES-PUBLICATION-YEAR
* [3] => AUTHORITIES-LAWS-ENACTED-THROUGH-DATE
* [4] => SEARCHABLE-LAWS-ENACTED-THROUGH-DATE
* [5] => AUTHORITIES-USC-TITLE-NAME
* [6] => AUTHORITIES-USC-TITLE-ENUM
* [7] => AUTHORITIES-USC-TITLE-STATUS
* [8] => CONVERSION-PROGRAM
* [9] => CONVERSION-DATETIME
*/
function collectPublicationData($txt){
if(preg_match("/<!-- AUTHORITIES-PUBLICATION-NAME:(.*) -->/",$txt,$name)){
$AuthPubName= $name[1];
}
if(preg_match("/<!-- AUTHORITIES-PUBLICATION-ID:(.*) -->/",$txt,$id)){
$AuthPubId = $id[1];
}
if(preg_match("/<!-- AUTHORITIES-PUBLICATION-YEAR:(.*) -->/",$txt,$pub)){
$AuthPubYear = $pub[1];
}
if(preg_match("/<!-- AUTHORITIES-LAWS-ENACTED-THROUGH-DATE:(.*) -->/",$txt,$authEnacted)){
$AuthLawsEnacted = $authEnacted[1];
}
if(preg_match("/<!-- SEARCHABLE-LAWS-ENACTED-THROUGH-DATE:(.*) -->/",$txt,$searchEnacted)){
$SearchLawsEnacted = $searchEnacted[1];
}
if(preg_match("/<!-- AUTHORITIES-USC-TITLE-NAME:(.*) -->/",$txt,$name)){
$AuthName = $name[1];
}
if(preg_match("/<!-- AUTHORITIES-USC-TITLE-ENUM:(.*) -->/",$txt,$enum)){
$AuthEnum = $enum[1];
}
if(preg_match("/<!-- AUTHORITIES-USC-TITLE-STATUS:(.*) -->/",$txt,$status)){
$AuthStatus = $status[1];
}
if(preg_match("/<!-- CONVERSION-PROGRAM:(.*) -->/",$txt,$conv)){
$ConvProgram = $conv[1];
}
if(preg_match("/<!-- CONVERSION-DATETIME:(.*) -->/",$txt,$date)){
$ConvDate = $date[1];
}
return array($AuthPubName, $AuthPubId, $AuthPubYear, $AuthLawsEnacted, $SearchLawsEnacted, $AuthName, $AuthEnum, $AuthStatus, $ConvProgram, $ConvDate);
}
/**
* Note on loadHTML()--
* Markup errors in loadHTML() are supressed. We can still
* process invalid markup. If necessary, use libxml_use_internal_errors(true) and
* libxml_get_errors() to handle markup errors.
*/
/**
* Find the last listed statute at large. It is useful to
* find the key, for use with printRevisionIndex()
*
*
* @return string The name of the statue last listed statute at large. .
*/
function determineRevisionKey($txt){
$doc = new DOMDocument();
@$doc->loadHTML($txt);
$xpath = new DOMXPath($doc);
$query = '//p';
$items = $xpath->query($query);
if(is_object($items->item(0))){
if(preg_match_all("/([0-9]+ Stat\. [0-9]+)/",$items->item(0)->nodeValue,$stat)){
$recentamendment = end($stat[1]);
}else{
$recentamendment = "none";
}
}
unset($doc);
unset($xpath);
return $recentamendment;
}
/**
* Print the statute => revision pairs in a format useful for compareAmendments()
*
* @param $keys array Formatted statute => revision
*/
function printRevisionIndex($keys){
print '$revisionIndex = array('."\n";
foreach($keys as $name=>$revision){
print "\"".$name."\" => \"".$revision."\",\n";
}
print ");";
}
/**
* Extract content from <h3> element
*
* @param $txt string Text in html
* @return string Content of <h3> element
*/
function getFieldHeadThree($txt){
$doc = new DOMDocument();
@$doc->loadHTML($txt);
$xpath = new DOMXPath($doc);
$query = '//h3';
if(is_object($xpath->query($query)->item(0))){
$result = $xpath->query($query)->item(0)->nodeValue;
if(isset($result)){
$result = "\n\n==".$result."==\n\n";
}
} else {
$result = "";
}
return $result;
}
/**
* Extract content from <h4> element
*
* @param $txt string Text in html
* @return string Content of <h4> element
*/
function getFieldHeadFour($txt){
$doc = new DOMDocument();
@$doc->loadHTML($txt);
$xpath = new DOMXPath($doc);
$query = '//h4';
if(is_object($xpath->query($query)->item(0))){
$result = $xpath->query($query)->item(0)->nodeValue;
if(isset($result)){
$result = "\n\n===".$result."===\n\n";
}
}
if(!isset($result)){
$result = "";
}
return $result;
}
/**
* Apply wikimarkup to indent each paragraph
*
* Indents according to the class of each <p> element. Each em specified in "statutory-body-Xem" will get one
* semicolon indent. Handles 9 indents.
*
* @param $txt string Text in html, with FDSys classes
* @return string Indented paragraphs, no longer in html
*/
function indentStatute($txt){
$doc = new DOMDocument();
@$doc->loadHTML($txt);
$xpath = new DOMXPath($doc);
$query = '//p';
$items = $xpath->query($query);
foreach($items as $item){
if($item->getAttribute('class') == "statutory-body"){
$indentNumber = "";
}else{
if(preg_match("/statutory-body-([0-9]+)em/",$item->getAttribute('class'),$match)){
$indentNumber = $match[1];
}
}
if(!isset($indent)){ $indent = ""; }
if(!isset($indentNumber)) { $indentNumber = 0;}
if($indentNumber != 0){
$indent = str_pad($indent, $indentNumber, ":");
}
if(is_object($item)){
if(isset($text)){
$text = $text.$indent.$item->nodeValue."\n \n";
}else{
$text = $indent.$item->nodeValue."\n \n";
}
}
}
if(!isset($text)){
$text = "";
}
return $text;
}
/**
* Create wikilist of sources
*
* Useful to turn a few paragraphs into a bulleted list
*
* @param $txt string Text in html
* @return string Text with each paragraph on a new line beggining with *
*/
function getSourcecreditParagraph($txt){
$txt = str_replace("; Pub.",";</p> <p>Pub.",$txt);
$doc = new DOMDocument();
@$doc->loadHTML($txt);
$xpath = new DOMXPath($doc);
$query = '//p';
$items = $xpath->query($query);
foreach($items as $item){
if(isset($result)){
if(is_object($item)){
if(!preg_match("{{page\|[0-9]+}}",$item->nodeValue)){
$result = $result."\n*".$item->nodeValue;
}
}
}else{
if(is_object($item)){
$result = $item->nodeValue;
}
}
}
$result = "*".preg_replace("/^(\()|(\))$/","",$result);
return $result;
}
/**
* Extract content from <p> element
*
* @param $txt string Text in html
* @return string Content of <p> element(s)
*/
function getP($txt){
$doc = new DOMDocument();
@$doc->loadHTML($txt);
$xpath = new DOMXPath($doc);
$query = '//p';
$items = $xpath->query($query);
foreach($items as $item){
if(isset($result)){
if(is_object($item)){
$result = $result."\n\n".$item->nodeValue;
}
}else{
if(is_object($item)){
$result = $item->nodeValue;
}
}
}
if(!isset($result)){
$result = "";
}
return $result;
}
/**
* Extract content from <div class="analysis">
*
* @param $txt string Text in html
* @result string Content of <div class="analysis"> element
*/
function getAnalysisSection($txt) {
$doc = new DOMDocument();
@$doc->loadHTML($txt);
$xpath = new DOMXPath($doc);
$query = '//div[@class="analysis"]';
$items = $xpath->query($query);
foreach($items as $item){
if(isset($result)){
$result = $result."".$item->nodeValue;
}else{
$result = $item->nodeValue;
}
}
if(isset($result)){
$result = $result."\n|}";
}
$query = '//p';
$items = $xpath->query($query);
foreach($items as $item){
if(isset($result)){
$result = $result."\n\n".$item->nodeValue;
}else{
$result = $item->nodeValue;
}
}
return $result;
}
/**
* Split the expcite html comment into an array of info on each document
*
* Each document in an FDSys html file includes an html comment
* <!-- expcite: ... --> that includes the name and number of the title, subtitle
* chapter, subchapter, part, and section. This information is seperated by "!@!"
*
* @param $input string The info found in <!-- expcite: ... -->
* @return array
* "titleNo" => TITLE NUMBER
* "titleName" => TITLE FULL NAME
* "subtitleNo" => SUBTITLE NUMBER
* "subtitleName" => SUBTITLE FULL NAME
* "chapNo" => CHAPTER NUMBER
* "chapName" => CHAPTER FULL NAME
* "subchapNo" => SUBCHAPTER NUMBER
* "subchapName" => SUBCHAPTER FULL NAME
* "partNo" => PART NUMBER
* "partName" => PART FULL NAME
* "secNo" => SECTION NUMBER
* "level" => the lowest level identified in expcite
*/
function expcite($input){
$cites = explode("!@!",$input);
foreach($cites as $cite){
if(preg_match("/^TITLE ([0-9]+)-([A-Za-z\s\-]+)/",$cite,$title)){
$titleNo = $title[1];
$titleName = $title[2];
$level = "title";
}
if(preg_match("/S[Uu][Bb][Tt][Ii][Tt][Ll][Ee] ([A-Z]+)-([A-Za-z\s\-]+)/",$cite,$subtitle)){
$subtitleNo = $subtitle[1];
$subtitleName = $subtitle[2];
$level = "subtitle";
}
if(preg_match("/^CHAPTER ([0-9A-Z]+)-([A-Za-z\s\-]+)/",$cite,$chap)){
$chapNo = $chap[1];
$chapName = $chap[2];
$level = "chapter";
}
if(preg_match("/S[Uu][Bb][Cc][Hh][Aa][Pp][Tt][Ee][Rr] ([A-Z]+)-([A-Za-z\s\-]+)/",$cite,$subchap)){
$subchapNo = $subchap[1];
$subchapName = $subchap[2];
$level = "subchapter";
}
if(preg_match("/P[Aa][Rr][Tt] ([A-Z0-9]+)-([A-Za-z\s0-9]+)/",$cite,$part)){
$partNo = $part[1];
$partName = $part[2];
$level = "part";
}
if(preg_match("/Sec[s]*. ([0-9A-Za-z,\s\-]+)/",$cite,$sec)){
$secNo = $sec[1];
$level = "section";
}
}
if(!isset($titleNo)){
$titleNo = false;
$titleName = false;
}
if(!isset($subtitleNo)){
$subtitleNo = false;
$subtitleName = false;
}
if(!isset($chapNo)){
$chapNo = false;
$chapName = false;
}
if(!isset($subchapNo)){
$subchapNo = false;
$subchapName = false;
}
if(!isset($partNo)){
$partNo = false;
$partName = false;
}
if(!isset($secNo)){
$secNo = false;
}
return array ("titleNo" => $titleNo, "titleName" => $titleName, "subtitleNo" => $subtitleNo, "subtitleName" => $subtitleName, "chapNo" => $chapNo, "chapName" => $chapName, "subchapNo" => $subchapNo, "subchapName" => $subchapName, "partNo" => $partNo, "partName" => $partName, "secNo" => $secNo, "level" => $level);
}
/**
* Names the document, including brackets
*
* @param $txt string The info found in <!-- expcite: ... -->
* @param $year string The year, item [2] from collectPublicationData()
* @return string The name of the document inside double brackets
*/
function getNames($txt,$year) {
$nameTitle = $nameSubtitle = $nameChapter = $nameSubchapter = $namePart = $nameSec = $display = false;
if(preg_match("/<!-- expcite:(.*) -->/",$txt,$expcite)){
$cite = expcite($expcite[1]);
}else{
$cite = false;
}
if($cite["titleNo"] != false){
$nameTitle = "/Title ".$cite['titleNo'];
$display = $nameTitle;
}
if($cite["subtitleNo"] != false){
$nameSubtitle = "/Subtitle ".$cite['subtitleNo'];
$display = $nameSubtitle;
}
if($cite["chapNo"] != false){
$nameChapter = "/Chapter ".$cite['chapNo'];
$display = $nameChapter;
}
if($cite["subchapNo"] != false){
$nameSubchapter = "/Subchapter ".$cite['subchapNo'];
$display = $nameSubchapter;
}
if($cite["partNo"] != false){
$namePart = "/Part ".$cite['partNo'];
$display = $namePart;
}
if($cite["secNo"] != false){
$nameSec = "/Sec. ".$cite['secNo'];
$display = $nameSec;
}
$display = trim($display,"/");
$name = "[[United States Code (".$year.")".$nameTitle.$nameSubtitle.$nameChapter.$nameSubchapter.$namePart.$nameSec."|".$display."]]";
return $name;
}
/**
* Comapares the amendment from a current document with a previous
* year's amendment for the same document
*
* Since a new year's publication may include duplicate information, running a comparison
* allows transclusion (such as {{:Content}}) instead of reuploading duplicate info
* @param $name string Name of the section, as built from expcite()
* @param $amendment string Revision determined by determineRevisionKey()
* @param $key string Generated by printRevisionIndex() on the previous year's title
*
* @return true or false
*/
function compareAmendments($name, $revisionThisYear, $indexFromPreviousYear){
if($indexFromPreviousYear != ""){
foreach($indexFromPreviousYear as $statute => $revisionPrevYear){
/**
* seperate out the year from the title... the year goes in $statuteArray[1]
*/
preg_match("/United States Code \(([0-9]+)\)(.*[0-9]+$)/",$statute,$statuteArray);
if(isset($statuteArray[2])&&isset($statuteArray[1])){
if(($statuteArray[2] == $name)&&($revisionThisYear != "")&&($revisionThisYear == $revisionPrevYear)) {
return $statuteArray[1];
}
}
}
}
}
if(!isset($RevisionIndex)){
$RevisionIndex = "";
}
/**
* Format each section of the document.
*
* Prints the full text of the document, including statute, talk page,
*
* @param $html string Complete html of the title
* @param $data array Generated by collectPublicationData()
* @param $previous string Name of previous section, collected in array from getNames()
* @param $next string Name of next section, collected in array from getNames()
*
* @return array [0] => NAME, [1] => SIZE
*/
function formatFDSys($html, $data, $previous, $next) {
global $revisionIndex;
$revision = $sectionFootnotes = $docinfo[1] = $docinfo[3] = $docinfo[5] = $docinfo[7] = "";
$currentyear = $data[2];
$title = $data[6];
/**
* A comment in the html includes some useful info in <!-- documentid:... -->,
* so this is captured in $sectioninfo() and put in {{textinfo-fdsys}} on the
* section's talk page.
*/
if(preg_match("/documentid:([0-9a-z_,\-]+)[ ]*(usckey:([0-9a-z]+))*[ ]*(currentthrough:([0-9]+))*[ ]*(documentPDFPage:([0-9]+))*/",$html,$docinfo)){
if(!isset($docinfo[1])) { $docinfo[1] = "Unspecified"; }
if(!isset($docinfo[3])) { $docinfo[3] = "Unspecified"; }
if(!isset($docinfo[5])) { $docinfo[5] = "Unspecified"; }
if(!isset($docinfo[7])) { $docinfo[7] = "Unspecified"; }
if($docinfo[1] == "") { $docinfo[1] = "Unspecified"; }
if($docinfo[3] == "") { $docinfo[3] = "Unspecified"; }
if($docinfo[5] == "") { $docinfo[5] = "Unspecified"; }
if($docinfo[7] == "") { $docinfo[7] = "Unspecified"; }
$sectioninfo['documentid'] = $docinfo[1];
$sectioninfo['usckey'] = $docinfo[3];
$sectioninfo['currentthrough'] = $docinfo[5];
$sectioninfo['PDFPage'] = $docinfo[7];
}else{
$sectioninfo['documentid'] = "Unspecified";
$sectioninfo['usckey'] = "Unspecified";
$sectioninfo['currentthrough'] = "Unspecified";
$sectioninfo['PDFPage'] = "Unspecified";
}
/**
* Edit HTML
*/
//wikifyCite() slows things down sometimes, so the next two lines are commented out
//$x = wikifyCite($html,$formats);
//$html = str_replace("<","<",$x[0]);
/**
* Removes extra spacing
*/
$html = str_replace("<p class=\"note-body-flush0_hang4\"> ","<p class=\"note-body-flush0_hang4\">:",$html);
/**
* Replaces html small caps with Template:Small-caps
*/
$html = preg_replace("/<cap-smallcap>/","{{sc|",$html);
$html = preg_replace("/<\/cap-smallcap>/","}} ",$html);
/**
* Replaces html foot note links with Template:ref
*/
$html = preg_replace("/<a href=\"#[0-9_]+_target\" name=\"([0-9_]+)\">([0-9]+)<\/a>/","{{ref|$1|$2}}",$html);
$html = preg_replace("/<a href=\"#[0-9_]+\" name=\"([0-9_]+)_target\"><sup>([0-9]+)<\/sup>(.*)<\/a>/","{{note|$1|$2}} $3</a>",$html);
/**
* Build wiki tables
*/
$html = preg_replace("/<div class=\"analysis-head-left\">(.*)<\/div>/","|-\n! $1 \n!",$html);
$html = preg_replace("/<h4 class=\"analysis-subhead\">(.*)<\/h4>/","<div class=\"analysis\">\n|-\n! colspan=2 | $1</div>",$html);
$html = preg_replace("/<div class=\"two-column-analysis-style-content-left\">(.+)\.<\/div><div class=\"two-column-analysis-style-content-right\">(.*)(<sup>.*<\/sup>)*<\/div>/","|- \n| [[$data[6] U.S.C. § $1 ($currentyear)|$1]]\n| $2 $3",$html);
$html = preg_replace("/<div class=\"two-column-analysis-style-content-left\" id=\"wide\">(.*)<\/div><\/div>
/","|- \n| colspan=2 | $1\n",$html);
$html = preg_replace("/<div class=\"two-column-analysis-style-content-left\">(.*)<\/div><div class=\"two-column-analysis-style-content-right\" id=\"wide\">(.*)(<sup>.*<\/sup>)*<\/div>/","|- \n| colspan=2 | $1 $2",$html);
/**
* Title Table
*/
$html = preg_replace("/<div class=\"three-column-analysis-style-content-left\">([0-9A-Za-z–]+)\.<\/div><div class=\"three-column-analysis-style-content-center\">(.*)[ ]*(<sup>.*<\/sup>)*<\/div><div class=\"three-column-analysis-style-content-right\">(.*)<\/div>/","|- \n| [[United States Code ($data[2])/Title $data[6]/Chapter $1|$1]]\n| $2 § $3 $4 $5",$html);
$html = preg_replace("/<div><div class=\"three-column-analysis-style-content-left\">([0-9A-Za-z–]+)\.<\/div><div class=\"three-column-analysis-style-content-center\" id=\"wide\">(.*)<\/div><div class=\"three-column-analysis-style-content-right\">(.*)<\/div><\/div>/","|- \n| [[United States Code ($data[2])/Title $title/Chapter $1|$1]]\n| $2 § $3",$html);
/**
* Chapter Table
*/
$html = preg_replace("/<table class=\"uscdispo2col\" cellspacing=\"0\" width=\"700\"><caption>(.*)<\/caption>
/","<p>\n{| class=wikitable \n|- \n| colspan=2 | $1\n",$html);
$html = preg_replace("/<tr><th id=\"row0col0\">(.*)<\/th><th id=\"row0col1\">(.*)<\/th><\/tr>/","|- \n! $1\n! $2",$html);
$html = preg_replace("#<tr>\n<td class=\"left\">(.*)<\/td>\n<td class=\"right\">(.*)<\/td>\n<\/tr>#","|- \n| $1\n| $2",$html);
$html = str_replace("</table>","|}\n</p>",$html);
$html = preg_replace("/<p class=\"intabledata\">(.*)<p\/>/","'''$1'''",$html);
/**
* Replace html comment with PDFPage to Template:page
*/
//$html = preg_replace("/<!-- PDFPage:([0-9]+) -->/","<p>{{page|$1}}</p>",$html);
/**
* run expcite() when the expcite comment is found
*/
if(preg_match("/<!-- expcite:(.*) -->/",$html,$expcite)){
$cite = expcite($expcite[1]);
}
$html = str_replace("<br />"," ",$html);
$split = preg_split("/<!-- field-start:([A-Za-z\-]+) -->/",$html,0,PREG_SPLIT_DELIM_CAPTURE);
foreach($split as $k => $v){
if($k == 0){
$txt[] = array("intro", $v);
}elseif(is_odd($k)){
$txt[] = array($v, trim($split[$k+1]));
}
}
/**
* Build footnotes section
*/
if(isset($footnotes)){
foreach($footnotes as $note){
if(isset($sectionFootnotes)){
$sectionFootnotes = $sectionFootnotes."\n".$note;
}else{
$sectionFootnotes = $note;
}
}
}
if(isset($sectionFootnotes)){
$sectionFootnotes = "\n\n===Foot notes===\n\n".$sectionFootnotes;
}
/**
* Build name
*/
if(!isset($nameTitle)){ $nameTitle = ""; }
if(!isset($nameSubtitle)){ $nameSubtitle = ""; }
if(!isset($nameChapter)){ $nameChapter = ""; }
if(!isset($nameSubchapter)){ $nameSubchapter = ""; }
if(!isset($namePart)){ $namePart = ""; }
if(!isset($nameSec)){ $nameSec = ""; }
if($cite["titleNo"] != false){
$nameTitle = "/Title ".$cite['titleNo'];
$display = $nameTitle;
}else{
$nameTitle = "";
}
if($cite["subtitleNo"] != false){
$nameTitle = "/Subtitle ".$cite['subtitleNo'];
if(isset($nameSubtitle)){
$display = $nameSubtitle;
}
}else{
$nameSubtitle = "";
}
if($cite["chapNo"] != false){
$nameChapter = "/Chapter ".$cite['chapNo'];
$display = $nameChapter;
}else{
$nameChapter = "";
}
if($cite["subchapNo"] != false){
$nameSubchapter = "/Subchapter ".$cite['subchapNo'];
$display = $nameSubchapter;
}else{
$nameSubchapter = "";
}
if($cite["partNo"] != false){
$namePart = "/Part ".$cite['partNo'];
$display = $namePart;
}else{
$namePart = "";
}
if($cite["secNo"] != false){
$nameSec = "/Sec. ".$cite['secNo'];
$display = $nameSec;
}else{
$nameSec = "";
}
$name = $nameTitle.$nameSubtitle.$nameChapter.$nameSubchapter.$namePart.$nameSec;
/**
* print everything out
*/
/**
* the statute's page
*/
foreach($txt as $section) {
if($section[0] == "sourcecredit"){
$revision = determineRevisionKey($section[1]);
}
}
/**
* Build $fulltext, the body of the section
*/
if(!isset($fulltext)){
$fulltext = "";
}
$prevYearCheck = compareAmendments($name, $revision, $revisionIndex);
if(preg_match("/[0-9]+/",$prevYearCheck,$prevYearFound)){
/**
* if content already exists, transclude it rather than uploading it
*/
print "{{-start-}} \n'''United States Code (".$currentyear.")".$name."''' \n\n{{:United States Code (".$prevYearFound[0].")".$name."}} \n\n{{-end-}}\n";
}else{
foreach($txt as $section){
$sectionlist[] = $section[0]."<br/>";
/**
* go through each section and get headers and text
*/
switch($section[0]){
case "head<br/>":
$fulltext = $fulltext . getFieldHeadThree($section[1]);
break;
case "statute":
$fulltext = $fulltext . indentStatute($section[1]);
break;
case "analysis":
$fulltext = $fulltext . "\n{| class=wikitable".getAnalysisSection($section[1]);
break;
case "sourcecredit":
$revision = determineRevisionKey($section[1]);
$fulltext = $fulltext . "\n\n===Source(s)===\n\n".getSourcecreditParagraph($section[1]);
break;
case "historicalandrevision-note":
$fulltext = $fulltext . "\n\n===Historical and revision notes===\n\n".getP($section[1]);
break;
case "miscellaneous-note":
$fulltext = $fulltext . "\n\n===Miscellaneous notes===\n\n".getP($section[1]);
break;
case "amendment-note":
$fulltext = $fulltext . "\n\n===Amendment notes===\n\n".getP($section[1]);
break;
case "shorttitle-amendment-note":
$fulltext = $fulltext . "\n\n===Short title amendment notes===\n\n".getP($section[1]);
break;
case "effectivedate-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "savingsprovision-note":
$fulltext = $fulltext . "\n\n===Savings provision===\n\n".getP($section[1]);
break;
case "titlehead":
$fulltext = $fulltext . getFieldHeadThree($section[1]) .getFieldHeadFour($section[1]).getP($section[1]);
break;
case "structuralhead":
$fulltext = $fulltext . getFieldHeadThree($section[1]) .getFieldHeadFour($section[1]).getP($section[1]);
break;
case "executivedate-amendment-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "shorttitle-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "codification-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "changeofname-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "priorprovisions-note":
$footnotes[] = getFieldHeadFour($section[1]).getP($section[1]);
break;
case "referenceintext-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "function-transfer-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "repealedhead":
$fulltext = $fulltext . getFieldHeadThree($section[1]) .getFieldHeadFour($section[1]).getP($section[1]);
break;
case "repealsummary":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "effectivedate-repeal-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "futureamendment-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "terminationdate-amendment-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "effectivedate-termination-amendment-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "omittedhead":
$fulltext = $fulltext . getFieldHeadThree($section[1]) . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "effectivedate-terminationdate-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "terminationdate-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "function-delegation-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "repeal-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "construction-amendment-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "construction-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "function-abolition-construction":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "titleenactmentcredit":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "terminationdate-repeal-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
case "function-transfer-repeal-savingsclause-similarprovisions-note":
$fulltext = $fulltext . getFieldHeadFour($section[1]).getP($section[1]);
break;
}
}
/**
* add start marker, title, and {{USC-header}}
*/
print "{{-start-}} \n'''United States Code (".$currentyear.")".$name."''' \n<noinclude>\n{{USC-header | cite = ".$cite['titleNo']." USC Sec. ".$cite['secNo']." | year = ".$data[2]." | title = ".$cite['titleNo']." | title-name = ".$cite['titleName']." | subtitle = ".$cite['subtitleNo']." | subtitle-name = ".$cite['subtitleName']." | chapter = ".$cite['chapNo']." | chapter-name = ".$cite['chapName']." | subchapter = ".$cite['subchapNo']." | subchapter-name = ".$cite['subchapName']." | part = ".$cite['partNo']." | part-name = ".$cite['partName']." | section = ".$cite['secNo']." | level = ".$cite['level']." | previous = ".$previous." | next = ".$next." | revision = ".$revision." | current = | notes = }}\n</noinclude>\n\n";
$fulltext = $fulltext . $sectionFootnotes;
/**
* add a Last-amended category, for checking a section against a previous year
*/
if(isset($revision)) {
$fulltext = $fulltext . "\n[[Category:Last-amended ".$revision."]]";
}else{
$fulltext = $fulltext . "[[Category:Last-amendment unknown]]";
}
$fulltext = $fulltext . "\n{{-stop-}}\n";
}
/**
* statute's talk page
*/
$fulltext = $fulltext . "\n{{-start-}} \n'''Talk:United States Code (".$currentyear.")".$name."''' \n\n{{textinfo-fdsys | Authorities-Publication-Name = ".$data[0]." | Authorities-Publication-ID = ".$data[1]." | Authorities-Pulication-Year = ".$data[2]." | Authorities-Laws-Enacted-Through-Date = ".$data[3]." | Searchable-Laws-Enacted-Through-Date = ".$data[4]." | Authorities-USC-Title-Name = ".$data[5]." | Authorities-USC-Title-Enum = ".$data[6]." | Authorities-USC-Title-Status = ".$data[7]." | Conversion-Program = ".$data[8]." | Conversion-Datetime = ".$data[9]."| documentid = ".$sectioninfo['documentid']." | usckey = ".$sectioninfo['usckey']." | currentthrough = ".$sectioninfo['currentthrough']." | PDFPage = ".$sectioninfo['PDFPage']." }} \n\n{{-stop-}} \n";
/**
* create citation redirect, for convenient linking.
*/
$nameTitle = $nameSubtitle = $nameChapter = $nameSubchapter = $namePart = $nameSec = $display = false;
if($nameSec != false) {
$fulltext = $fulltext."\n{{-start-}}\n'''".$title." U.S.C. § ".$cite['secNo']." (".$currentyear.")'''\n\n\n#REDIRECT [[United States Code (".$currentyear.")".$nameTitle.$nameChapter.$nameSubchapter.$nameSec."]]\n\n\n{{-stop-}}\n";
}
$sectionSize = strlen($fulltext);
print $fulltext;
/**
* generate master list of sections
*/
global $masterList;
$masterList["United States Code (".$data[2].")".$name] = $revision;
return $sectionSize;
}
print "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />";
$us = explode("|",$_GET["u"]);
?>
<html>
<head>
<title>Format Title of U.S. Code</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<style>
fieldset {
width:70em;
}
</style>
</head>
<fieldset>
<legend>Enter URL of HTML file to format</legend>
<form method="get">
<label for="u">URL</label>
<input type="text" name="u" value="<?php if(isset($_GET['u'])) { print $_GET['u']; } ?>"><br />
<button type="submit">Submit</button><br />
</form></fieldset>
<fieldset>
<legend>Create bot file</legend>
<?php
/**
* assemble the full code
*/
foreach($us as $u){
$targetURL = $u;
$userAgent = '';
/**
* Open up each file specified in $us. Multiple files are seperated by |
*/
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_URL,$targetURL);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 100);
$html= curl_exec($ch);
$sections = preg_split("/(<!-- documentid:[0-9]+_[\-ch0-9]+)/",$html,-1,PREG_SPLIT_DELIM_CAPTURE);
$data = collectPublicationData($html);
print "<ul> \n<li>AUTHORITIES-PUBLICATION-NAME: ".$data[0]."</li> \n<li>AUTHORITIES-PUBLICATION-ID: ".$data[1]."</li> \n<li>AUTHORITIES-PUBLICATION-YEAR: ".$data[2]."</li> \n<li>AUTHORITIES-LAWS-ENACTED-THROUGH-DATE: ".$data[3]."</li> \n<li>SEARCHABLE-LAWS-ENACTED-THROUGH-DATE: ".$data[4]."</li> \n<li>AUTHORITIES-USC-TITLE-NAME: ".$data[5]."</li> \n<li>AUTHORITIES-USC-TITLE-ENUM: ".$data[6]."</li> \n<li>AUTHORITIES-USC-TITLE-STATUS: ".$data[7]."</li> \n<li>CONVERSION-PROGRAM: ".$data[8]."</li> \n<li>CONVERSION-DATETIME: ".$data[9]."</li> \n</ul>";
/**
* get a list of names, for the "Next" and "Previous" sections
*/
foreach($sections as $section){
$names[] = getNames($section,$data[2]);
}
$sizeTotal = 0;
/**
* print each section via formatFDSys()
*/
print "<textarea cols=100 rows=30>";
foreach($sections as $key=>$section){
if(is_odd($key)){
/**
* if it is the last section in the title, there is no "next"
*/
if(isset($names[$key+3])){
$sectionSize = formatFDSys($section.$sections[$key+1],$data,$names[$key-1],$names[$key+3]);
unset($sections[$key]);
$sizeTotal = $sizeTotal + $sectionSize;
}else{
$sectionSize = formatFDSys($section.$sections[$key+1],$data,$names[$key-1],"");
unset($sections[$key]);
$sizeTotal = $sizeTotal + $sectionSize;
}
/**
* Seperate each section into managable chunks of text
*/
if($sizeTotal >= $splitSize){
print "</textarea><br/> \nSplit at: ".$sizeTotal."b <br/> \n<textarea cols=100 rows=30>";
$sizeTotal = 0;
}
}
if($key == 0){
$name[] = "United States Code (".$data[2].")".formatFDSys($section.$sections[$key], $data, "", $names[$key+3]);
unset($sections[$key]);
}
}
print "</textarea>";
if (!$html) {
echo "\ncURL error number:" .curl_errno($ch)."\ncURL error:" . curl_error($ch);
exit;
}
/**
* Add the year to the $masterList
*/
print "<h2>Revision key for Title ".$data[5].", ".$data[2]."</h2> \n<textarea name='revisionkey' cols=100 rows=30>\n".RevisionIndex($masterList)." \n</textarea>";
}
?>
</fieldset>
</html>