User:Iridibot/Source

From Wikinews, the free news source you can write!
Jump to navigation Jump to search
<?php

/* the list of things that are wrong here:
 * 1. ugly.
 * 2. not enough error checking
 * 3. no lag time checking at all
 * 4. UGLY.
 * 5. mwapi does not support article editing, using really nasty hacky solution
 * 6. we're not using the php return format, but is that a bad thing?
 * 7. u-g-l-y.
 * 8. extremely limited in scope when they have the capacity to be
 * 9. no objective code; might be a good idea to migrate
 * 10. code is basically un-reusable (functionally equivilent to 9, but still)
 * 11. way too much output by default
 * 12. no logs
 * 13. gives up really easily, but that's not critical
 * 14. at least 4 security concerns which i won't outline here
 * 15. yuck. ugly ugly ugly code.
*/

define('APIBASE', 'http://en.wikinews.org/w/');
define('BOTNAME', 'iridibot');
define('BOTLOGIN', '');
define('BOTPASSWORD', '');
define('BOTVERSION', '1.02o'); // o=official
define('DEFAULTAPIPARAMS', 'format=xml');

function newCurlObject($uri='api.php')
{
	$c = curl_init(APIBASE.$uri);
	if ($c === false) return false;
	curl_setopt($c, CURLOPT_HEADER, false);
	curl_setopt($c, CURLOPT_POST, true);
	curl_setopt($c, CURLOPT_RETURNTRANSFER, true);
	curl_setopt($c, CURLOPT_COOKIEFILE, '/tmp/currentBot.cookies');
	curl_setopt($c, CURLOPT_COOKIEJAR, '/tmp/currentBot.cookies');
	curl_setopt($c, CURLOPT_USERAGENT, BOTNAME.' v'.BOTVERSION.' (as '.BOTLOGIN.')');
	return($c);
}

function curl_makePOST($c, $p)
{
	$t = DEFAULTAPIPARAMS;
	if (!is_array($p)) return;
	foreach($p as $k=>$v)
	{
		if (strlen($t) > 0) $t .= '&';
		$t .= $k.'='.urlencode($v);
	}
	curl_setopt($c, CURLOPT_POSTFIELDS, $t);
}

function login()
{
	$c = newCurlObject();
	if ($c === false) return false;
	curl_setopt($c, CURLOPT_COOKIESESSION, true);
	curl_makePOST($c, array('action'=>'login','lgname'=>BOTLOGIN,'lgpassword'=>BOTPASSWORD));
	sleep(5);
	$result = curl_exec($c);
	curl_close($c);
	unset($c);
	
	$xml = new SimpleXMLElement($result);
	if ($xml->login['result'] != 'Success')
		return false;
	return true;
}

function fetchCategoryMembers($cat, $props)
{
	$c = newCurlObject();
	if ($c === false) return false;
	curl_makePOST($c, array('action'=>'query','generator'=>'categorymembers','gcmcategory'=>$cat,
							'gcmsort'=>'timestamp','gcmdir'=>'desc','prop'=>'info','gcmprop'=>join('|',$props)));
	sleep(5);
	$result = curl_exec($c);
	curl_close($c);
	unset($c);
	
	$xml = new SimpleXMLElement($result);
	return($xml->query->pages->page);
}

function fetchArticleContent($article)
{
	$c = newCurlObject();
	if ($c === false) return false;
	curl_makePOST($c, array('action'=>'query','prop'=>'revisions','titles'=>$article,'rvprop'=>'content'));
	sleep(12);
	$result = curl_exec($c);
	curl_close($c);
	unset($c);
	
	$xml = new SimpleXMLElement($result);
	return((string) $xml->query->pages->page->revisions->rev);
}

function editWikiArticle($article, $content, $summary)
{
	$c = newCurlObject();
	if ($c === false) return false;
	curl_makePOST($c, array('action'=>'query','prop'=>'info','titles'=>$article,'intoken'=>'edit'));
	sleep(4);
	$result = curl_exec($c);
	curl_close($c);
	unset($c);
	$xml = new SimpleXMLElement($result);
	$token = $xml->query->pages->page['edittoken'];
	// $rev = $xml->query->pages->page['lastrevid'];
	unset($xml);
	
	$c = newCurlObject('index.php?title='.urlencode($article).'&action=submit'); // hax
	if ($c === false) return false;
	// curl_makePOST($c, array('action'=>'submit','title'=>$article,'edittoken'=>$token,'summary'=>$summary,'content'=>$content,'revid'=>$rev));
	$date = gmdate('YmdHis'); // dumb
	curl_makePOST($c, array('wpSummary'=>$summary,'wpTextbox1'=>$content,'wpSave'=>'Save page','wpEditToken'=>$token,'wpStarttime'=>$date,'wpEdittime'=>$date));
	sleep(7);
	$result = curl_exec($c);
	curl_close($c);
	return true;
}

function parseCurrentTemplate($text)
{
	$r = preg_match('/{{[Cc]urrent\|.*}}/', $text, $match);
	if ($r == 0)
		return false;
	
	$text = $match[0];
	$r = preg_match('/\|story\=(.*?)(\|.*}}|}})/', $text, $match);

	if ($r > 0 and sizeof($match) > 0)
		$ret['story'] = $match[1];
	else
		return false;
	
	$r = preg_match('/\|update\=(.*?)(\|.*}}|}})/', $text, $match);
	
	if ($r > 0 and sizeof($match) > 0)
		$ret['update'] = (int)$match[1];
	else
		$ret['update'] = 1;
	
	return($ret);
}

function getStoryID($storyName)
{
	$q = mysql_query('SELECT sid FROM updates WHERE story = "'.mysql_real_escape_string($storyName).'"');
	if ($q === false)
		return false; // can't query the database for the story ID
	
	if (mysql_num_rows($q) == 0)
	{
		$q = mysql_query('INSERT INTO updates SET story = "'.mysql_real_escape_string($storyName).'"');
		if ($q === false)
			return false; // can't allocate a new story ID
		
		return mysql_insert_id();
	}
	
	$r = mysql_fetch_row($q);
	mysql_free_result($q);
	
	return((int)$r[0]);
}

function getArticleName($aid)
{
	$q = mysql_query('SELECT article FROM articles WHERE aid='.(int)$aid);
	if ($q === false or mysql_num_rows($q) == 0)
		return false; // query error or no article by that name
		
	$r = mysql_fetch_row($q);
	mysql_free_result($q);
	
	return($r[0]);
}

function updateStory($sid, $aid, $uid)
{
	$q = mysql_query('SELECT uid FROM updates WHERE sid='.(int)$sid);
	if ($q === false or mysql_num_rows($q) == 0)
		return false; // not a valid story ID
	
	$r = mysql_fetch_row($q);
	mysql_free_result($q);
	
	if ((int)$r[0] >= $uid)
		return false; // story is already up to date

	return true;
}

function updateArticle($article, $aid, $sid, $uid, $rev)
{
	$q = mysql_query('SELECT uid,sid FROM articles WHERE aid='.(int)$aid);
	if ($q === false)
		return false; // unable to issue this query
	
	if (mysql_num_rows($q) == 0)
	{
		$q = mysql_query('INSERT INTO articles SET aid='.(int)$aid.', sid='.(int)$sid.', uid='.(int)$uid.
							', article="'.mysql_real_escape_string($article).'", rev='.(int)$rev);
		if ($q === false or mysql_affected_rows() == 0)
			return false; // oh my, we're in trouble, out of disk space?
		
		return true;
	}
	
	$r = mysql_fetch_row($q);
	mysql_free_result($q);
	
	if ((int)$r[0] != (int)$uid or (int)$r[1] != (int)$sid)
	{
		$q = mysql_query('UPDATE articles SET sid='.(int)$sid.', uid='.(int)$uid.', rev='.(int)$rev.
							', article="'.mysql_real_escape_string($article).'" WHERE aid='.(int)$aid);
		if ($q === false)
			return false; // can't issue our query
		
		return true;
	}
}

function notTouched($aid, $rev)
{ // this function is really dumb; we should be doing something smarter here.
	$q = mysql_query('SELECT aid FROM articles WHERE aid='.(int)$aid.' AND rev='.(int)$rev);
	if ($q === false)
		return false;
	$x = mysql_num_rows($q);
	mysql_free_result($q);
	return (bool)($x > 0);
}

function bestUpdate($sid)
{
	$q = mysql_query('SELECT MAX(uid) FROM articles WHERE sid='.(int)$sid);
	if ($q === false)
		return false; // can't issue our query
	
	$r = mysql_fetch_row($q);
	mysql_free_result($q);
	
	$bestUID = $r[0];
	
	if ($bestUID == 0)
		return false; // no articles use this story
	
	$q = mysql_query('SELECT aid FROM articles WHERE uid='.(int)$bestUID.' AND sid='.(int)$sid.' LIMIT 1');
	if ($q === false or mysql_num_rows($q) == 0)
		return false; // some problem with the query
	
	$r = mysql_fetch_row($q);
	mysql_free_result($q);
	
	$bestAID = $r[0];
	
	$q = mysql_query('SELECT uid, aid, story FROM updates WHERE sid='.(int)$sid);
	if ($q === false or mysql_num_rows($q) == 0)
		return false; // can't find the referenced story
	
	$r = mysql_fetch_row($q);
	mysql_free_result($q);
	
	if ($r[0] != $bestUID or $r[1] != $bestAID)
	{
		$bestArticle = getArticleName($bestAID);
		$q = mysql_query('UPDATE updates SET uid='.(int)$bestUID.', aid='.(int)$bestAID.' WHERE sid='.(int)$sid);
		return($bestUID.'/'.$bestArticle); // return string to insert into story page
	}
	
	return false; // no update needed
}

mysql_connect('localhost','currentBot','currentBot');
mysql_select_db('currentBot');

print("Logging in... ");
if (login() === false)
	die("Login failed. Giving up.\n\n");
print("done.\n");

print("Fetching category members... ");
$articleList = fetchCategoryMembers('Updateable Stories', array('ids','title'));
print("done.\n");

$batch = array();

print("Iterating through category members...\n");
if (!is_array($articleList))
{
	print("-* Looks like category fetch failed. Bailing.\n");
	die();
}

foreach($articleList as $article)
{
	print('-* Fetching '.$article['title']."\n");
	if (notTouched($article['pageid'], $article['lastrevid']))
	{
		print(" * * Content hasn't changed, skipping.\n");
		continue;
	}
	$content = fetchArticleContent($article['title']);
	if ($content === false)
	{
		print('Failed to fetch the contents of: '.$article['title']."\n\n");
		continue;
	}
	print(" * Parsing contents\n");
	$curParams = parseCurrentTemplate($content);
	unset($content);
	if ($curParams === false)
	{
		print('Could not find {{current}} template in: '.$article['title']."\n\n");
		continue;
	}
	print(' * * Story: '.$curParams['story']."\n");
	print(' * * Update: '.$curParams['update']."\n");
	print(" * Finding story information\n");
	$sid = getStoryID($curParams['story']);
	if ($sid === false)
	{
		print('Unable to get story ID for story: '.$curParams['story']."\n\n");
		continue;
	}
	print(' * * Story ID: '.$sid."\n");
	print(" * Updating article data\n");
	$r = updateArticle($article['title'], $article['pageid'], $sid, $curParams['update'], $article['lastrevid']);
	if ($r === false)
	{
		print('Unable to update article information on article: '.$article['title']."\n\n");
		continue;
	}
	
	print(" * Updating story data\n");
	if (updateStory($sid, $curParams['update'], $article['pageid']))
		$batch[$sid] = $curParams['story']; // mark story for processing
}
print("...done.\n");

print("Iterating through batched stories...\n");
foreach($batch as $sid=>$story)
{
	print("-* Processing story: $story (sid=$sid)\n");
	$u = bestUpdate($sid);
	if ($u === false)
	{
		print(" * No updates needed.\n");
		continue;
	}
	
	print(" * Updating wiki... ");
	$r = editWikiArticle('Template:Current/'.$story,$u,BOTNAME.' v'.BOTVERSION.' updating current story');
	if ($r === false)
	{
		print("failed.\n");
	}
	else
	{
		print("done.\n");
	}
}
?>