User:Mcktimo/scripts/discussgrab2.php

  <? $theXML = createDiscussionXML($urllist);

//echo $theXML;

function whichPagesHaveDiscussion($dir){ // open a known directory, and proceed to read its contents $farr=array; if (is_dir($dir)) { if ($dh = opendir($dir)) { while (($file = readdir($dh)) !== false) { if (filetype($dir. $file)=='file'){ $farr[]=$file; //mak a list of just the files in the directory }				//echo "filename: $file : filetype: ". filetype($dir . $file). "\n"; }			closedir($dh); }	}	// print_r($farr); //a list of all files in dir $urll=array; foreach ($farr as $pah){ $url="http://occupyboston.wikispaces.com/message/list/".$pah; $html = file_get_html($url); $d = $html->find('th[class="pagination"] '); foreach ($d as $dd){ if (strlen($dd) >5 ) { //if this tag is found then there is discussion $urll[]=$url; //this page has a discussion, add it to list }		}	}	return $urll; //a list of web pages with discussions }

function createDiscussionXML($urllist){ foreach ($urllist as $url){ $pgtitle = str_replace(" ", "_", substr($url,48)); $html = file_get_html($url); $dl=array; //declares an array foreach($html->find('td[class="w_subject"] a') as $d) {			$path = $d->href; $dl[] = strip_tags($path); //adds to the end of the array }		//print_r($dl); $d=$dl[0]; $pg = 'This is my page, its not much anyway so what'; $pg = createDiscussionPage($dl); ?>			 User_talk:Mcktimo/take1/<?echo $pgtitle;?> 2011-10-19T01:01:00Z Mcktimo 3

 <? echo $pg; ?> <?				$xmlf.= $pg; }	return $xmlf; }

function createDiscussionPage($dlist) {

$pg=""; foreach($dlist as $d){ $url= "http://occupyboston.wikispaces.com". $d; $html = file_get_html($url); //echo $url."\n\n"; $dti = $html->find('h1[class="noSpacing"] '); $dtit = strip_tags($dti[0]); //echo $dtit."\n"; $uid=array; //declares an array $dat=array; //declares an array foreach($html->find('td[class="w_body"] strong' ) as $ut) {

$dat[]= strip_tags($ut); //$dat[] = strip_tags($ut[1]); //adds to the end of the array }		foreach($html->find('td[class="w_body"] a' ) as $ut) {

$uid[]= strip_tags($ut); //$dat[] = strip_tags($ut[1]); //adds to the end of the array }			$raw=array; //declares an array foreach($html->find('div[class="wiki"] ') as $rt) {			$raw[] = strip_tags($rt); //adds to the end of the array $raa = array_slice($raw,1,-2); }		//echo count($uid); $ui=array; $di=array; for($i=0; $i< count($uid); $i++){ if(($i+3) % 3){ //do nothing } else { $ui[]=$uid[$i]; }			if(($i+2) % 3){ //do nothing } else { $di[]=$uid[$i]; }		}

//print_r($dat); //print_r($di); //print_r($ui); //print_r($raa); $topic="\n"; $topic.="==".$dtit."==\n"; $topic.=":user:".$ui[0]." ".$di[0]."\n"; $topic.=$raa[0]."\n"; for ($i=1; $i < count($raa); $i++){ $topic.="====".$dat[$i-1]."====\n"; $topic.="::user:".$ui[$i]." ".$di[$i]."\n"; $topic.=$raa[$i]."\n"; }		$pg.= $topic; }	return $pg; } ?>