User:Slaporte/Automated categorization
Jump to navigation
Jump to search
This is the code I use to categorize
- 7/14/2010: Added new categories and an example script.
Code
[edit]$catlist = array(
"tax" => "[[Category:United States Supreme Court decisions on taxation]]",
"civil.*jurisdiction" => "[[Category:United States Supreme Court decisions on civil procedure]]",
"Sherman" => "[[Category:United States Supreme Court decisions on antitrust]]",
"antitrust" => "[[Category:United States Supreme Court decisions on antitrust]]",
"copyright" => "[[Category:United States Supreme Court decisions on copyright]]",
"impeach" => "[[Category:United States Supreme Court decisions on evidence]]",
"class action" => "[[Category:United States Supreme Court decisions on class action]]",
"ERISA" => "[[Category:United States Supreme Court decisions on ERISA]]",
//"employee benefit" => "[[Category:United States Supreme Court decisions on ERISA]]",
"treaty" => "[[Category:United States Supreme Court decisions on treaties]]",
"constitutional" => "[[Category:United States Supreme Court decisions on constitutionality]]",
"delegation of .* power" => "[[Category:United States Supreme Court decisions on separation of Powers]]",
"discrimination" => "[[Category:United States Supreme Court decisions on civil rights]]",
"§ 1983" => "[[Category:United States Supreme Court decisions on civil rights]]",
"ethical obligation" => "[[Category:United States Supreme Court decisions on professional responsibility]]",
"Rule 12\(b\)\(6\)" => "[[Category:United States Supreme Court decisions on civil procedure]]",
"NEPA" => "[[Category:United States Supreme Court decisions on environmental aw]]",
"environmental" => "[[Category:United States Supreme Court decisions on environmental law]]",
"law enforcement" => "[[Category:United States Supreme Court decisions on criminal law]]",
"First Amendment" => "[[Category:United States Supreme Court decisions on the First Amendment]]",
"Second Amendment" => "[[Category:United States Supreme Court decisions on the Second Amendment]]",
"Fourth Amendment" => "[[Category:United States Supreme Court decisions on the Fourth Amendment]]",
"Fifth Amendment" => "[[Category:United States Supreme Court decisions on the Fifth Amendment]]",
"Eighth Amendment" => "[[Category:United States Supreme Court decisions on the Eighth Amendment]]",
"[Dd]ue [Pp]rocess" => "[[Category:United States Supreme Court decisions on due process]]",
"community property" => "[[Category:United States Supreme Court decisions on property]]",
"disparate-impact" => "[[Category:United States Supreme Court decisions on civil rights]]",
"freedom of speech" => "[[Category:United States Supreme Court decisions on freedom of speech]]",
"time, place, and manner" => "[[Category:United States Supreme Court decisions on freedom of speech]]",
"clear and present danger" => "[[Category:United States Supreme Court decisions on freedom of speech]]",
"free exercise of religion" => "[[Category:United States Supreme Court decisions on religion]]",
"Establishment Clause" => "[[Category:United States Supreme Court decisions on religion]]",
"Sixth Amendment" => "[[Category:United States Supreme Court decisions on the Sixth Amendment]]",
"Commerce Clause" => "[[Category:United States Supreme Court decisions on the Commerce Clause]]",
"justiciable" => "[[Category:United States Supreme Court decisions on justiciability]]",
"justiciability" => "[[Category:United States Supreme Court decisions on justiciability]]",
"abortion" => "[[Category:United States Supreme Court decisions on abortion]]",
"SEC" => "[[Category:United States Supreme Court decisions on securities]]",
"arbitrary and capricious" => "[[Category:United States Supreme Court decisions on statutory interpretation]]",
"complete diversity" => "[[Category:United States Supreme Court decisions on civil procedure]]",
);
function categoryGuess($txt, $list){
$categories = array();
foreach($list as $key => $cat) {
if(preg_match("/$key/",$txt)){
if(!in_array($cat,$categories)) {
$categories[] = $cat;
}
}
}
if($categories == array()){
$categories[] = "[[Category:Uncategorized United States Supreme Court decision]]";
} else {
$categories[] = "[[Category:Automated categorization]]";
}
return $categories;
}
Example script
[edit]<?php
function getRawText($url){
$ch = curl_init();
$timeout = 5;
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
curl_setopt($ch, CURLOPT_USERAGENT, 'User-Agent: guessingcategory/1 [[User:Slaporte]]');
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
function categoryGuess($txt, $list){
$categories = array();
foreach($list as $key => $cat) {
if(preg_match("/$key/",$txt)){
if(!in_array($cat,$categories)) {
$categories[] = $cat;
}
}
}
if($categories == array()){
$categories[] = "[[Category:Uncategorized United States Supreme Court decision]]";
} else {
$categories[] = "[[Category:Automated categorization]]";
}
return $categories;
}
function getList(){
$txt = getRawText("http://en.wikisource.org/w/index.php?action=raw&title=User:Slaporte/Automated_categorization/list");
$txt = str_replace("<nowiki>\n","",$txt);
$txt = str_replace("<pre>\n","",$txt);
$txt = str_replace("\n</nowiki>","",$txt);
$txt = str_replace("\n</pre>","",$txt);
$list = explode("\n",$txt);
foreach($list as $k=>$item) {
$pair[$k] = explode("=>",$item);
if(isset($pair[$k][1])){
$pair[$k][0] = trim($pair[$k][0]);
$pair[$k][1] = trim($pair[$k][1]);
}
$cats[$pair[$k][0]] = $pair[$k][1];
}
return $cats;
}
function displayCats($cats){
foreach($cats as $cat){
print $cat."\n";
}
}
if(isset($_POST["PageName"])){
$page = $_POST["PageName"];
}
?>
<html>
<head><title>CategoryGuesser</title>
</head>
<body>
<div id=main>
<h1>Court Case Category Suggestion Tool</h1>
<form method="post" action="categoryguess.php">
<label for="PageName">Page title:</label>
<input type="text" name="PageName" value="<?PHP if(isset($page)){print $page;} ?>">
<div class="subtitle"><p>Enter the title of the page on wikisource, such as <i>International Shoe v. State of Washington</i></p></div>
<button type="submit" value="Submit" id="find">Submit</button>
<br />
<div id='results'>
<?php
if(isset($_POST["PageName"])){
$page = $_POST["PageName"];
$page = str_replace(" ","_",$page);
$url = "http://en.wikisource.org/w/index.php?action=raw&title=".$page;
print "<br/><label for='cats'>Suggested Categories:</label><br/><br/>";
print "<textarea cols=70 rows=30 name='cats'>";
displayCats(categoryGuess(getRawText($url),getlist()));
print "</textarea>";
}
?>
</div>
</form>
</div>
<p><a href="http://en.wikisource.org/wiki/User:Slaporte/Automated_categorization/list">Add or edit</a> category suggestions (live!). <a href="http://en.wikisource.org/wiki/User:Slaporte/Automated_categorization">source code and documentation</a> available.</p><p>leave <a href="http://en.wikisource.org/wiki/User_talk:Slaporte/Automated_categorization">feedback</a>.</p>
</body>
</html>