User:Mcktimo/scripts/discussgrab2.php
< User:Mcktimo | scripts
<?php
header('Content-Type: text/xml');
include_once('simple_html_dom.php');
$dir = "obout/"; //directory of all pages
$urllist = whichPagesHaveDiscussion($dir);
//print_r($urllist);
?>
<mediawiki xml:lang="en" xmlns="http://www.mediawiki.org/xml/export-0.5/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.5/ http://www.mediawiki.org/xml/export-0.5.xsd" version="0.5" >
<?
$theXML = createDiscussionXML($urllist);
//echo $theXML;
function whichPagesHaveDiscussion($dir){
// open a known directory, and proceed to read its contents
$farr=array();
if (is_dir($dir)) {
if ($dh = opendir($dir)) {
while (($file = readdir($dh)) !== false) {
if (filetype($dir . $file)=='file'){
$farr[]=$file; //mak a list of just the files in the directory
}
//echo "filename: $file : filetype: " . filetype($dir . $file) . "\n";
}
closedir($dh);
}
}
// print_r($farr); //a list of all files in dir
$urll=array();
foreach ($farr as $pah){
$url="http://occupyboston.wikispaces.com/message/list/".$pah;
$html = file_get_html($url);
$d = $html->find('th[class="pagination"] ');
foreach ($d as $dd){
if (strlen($dd) >5 ) { //if this tag is found then there is discussion
$urll[]=$url; //this page has a discussion, add it to list
}
}
}
return $urll; //a list of web pages with discussions
}
function createDiscussionXML($urllist){
foreach ($urllist as $url){
$pgtitle = str_replace(" ", "_", substr($url,48));
$html = file_get_html($url);
$dl=array(); //declares an array
foreach($html->find('td[class="w_subject"] a') as $d)
{
$path = $d->href;
$dl[] = strip_tags($path); //adds to the end of the array
}
//print_r($dl);
$d=$dl[0];
$pg = 'This is my page, its not much anyway so what';
$pg = createDiscussionPage($dl);
?>
<page>
<title>User_talk:Mcktimo/take1/<?echo $pgtitle;?></title>
<revision>
<timestamp>2011-10-19T01:01:00Z</timestamp>
<contributor>
<username>Mcktimo</username>
<id>3</id>
</contributor>
<text xml:space="preserve">
<?
echo $pg;
?>
</text>
</revision>
</page>
<?
$xmlf.= $pg;
}
return $xmlf;
}
function createDiscussionPage($dlist) {
$pg="";
foreach($dlist as $d){
$url= "http://occupyboston.wikispaces.com" . $d;
$html = file_get_html($url);
//echo $url."\n\n";
$dti = $html->find('h1[class="noSpacing"] ');
$dtit = strip_tags($dti[0]);
//echo $dtit."\n";
$uid=array(); //declares an array
$dat=array(); //declares an array
foreach($html->find('td[class="w_body"] strong' ) as $ut)
{
$dat[]= strip_tags($ut);
//$dat[] = strip_tags($ut[1]); //adds to the end of the array
}
foreach($html->find('td[class="w_body"] a' ) as $ut)
{
$uid[]= strip_tags($ut);
//$dat[] = strip_tags($ut[1]); //adds to the end of the array
}
$raw=array(); //declares an array
foreach($html->find('div[class="wiki"] ') as $rt)
{
$raw[] = strip_tags($rt); //adds to the end of the array
$raa = array_slice($raw,1,-2);
}
//echo count($uid);
$ui=array();
$di=array();
for($i=0; $i< count($uid); $i++){
if(($i+3) % 3){
//do nothing
} else {
$ui[]=$uid[$i];
}
if(($i+2) % 3){
//do nothing
} else {
$di[]=$uid[$i];
}
}
//print_r($dat);
//print_r($di);
//print_r($ui);
//print_r($raa);
$topic="\n";
$topic.="==".$dtit."==\n";
$topic.=":[[user:".$ui[0]."]] ".$di[0]."\n";
$topic.=$raa[0]."\n";
for ($i=1; $i < count($raa); $i++){
$topic.="====".$dat[$i-1]."====\n";
$topic.="::[[user:".$ui[$i]."]] ".$di[$i]."\n";
$topic.=$raa[$i]."\n";
}
$pg.= $topic;
}
return $pg;
}
?>
</mediawiki>
