User:Iridibot/Source
Jump to navigation
Jump to search
<?php
/* the list of things that are wrong here:
* 1. ugly.
* 2. not enough error checking
* 3. no lag time checking at all
* 4. UGLY.
* 5. mwapi does not support article editing, using really nasty hacky solution
* 6. we're not using the php return format, but is that a bad thing?
* 7. u-g-l-y.
* 8. extremely limited in scope when they have the capacity to be
* 9. no objective code; might be a good idea to migrate
* 10. code is basically un-reusable (functionally equivilent to 9, but still)
* 11. way too much output by default
* 12. no logs
* 13. gives up really easily, but that's not critical
* 14. at least 4 security concerns which i won't outline here
* 15. yuck. ugly ugly ugly code.
*/
define('APIBASE', 'http://en.wikinews.org/w/');
define('BOTNAME', 'iridibot');
define('BOTLOGIN', '');
define('BOTPASSWORD', '');
define('BOTVERSION', '1.02o'); // o=official
define('DEFAULTAPIPARAMS', 'format=xml');
function newCurlObject($uri='api.php')
{
$c = curl_init(APIBASE.$uri);
if ($c === false) return false;
curl_setopt($c, CURLOPT_HEADER, false);
curl_setopt($c, CURLOPT_POST, true);
curl_setopt($c, CURLOPT_RETURNTRANSFER, true);
curl_setopt($c, CURLOPT_COOKIEFILE, '/tmp/currentBot.cookies');
curl_setopt($c, CURLOPT_COOKIEJAR, '/tmp/currentBot.cookies');
curl_setopt($c, CURLOPT_USERAGENT, BOTNAME.' v'.BOTVERSION.' (as '.BOTLOGIN.')');
return($c);
}
function curl_makePOST($c, $p)
{
$t = DEFAULTAPIPARAMS;
if (!is_array($p)) return;
foreach($p as $k=>$v)
{
if (strlen($t) > 0) $t .= '&';
$t .= $k.'='.urlencode($v);
}
curl_setopt($c, CURLOPT_POSTFIELDS, $t);
}
function login()
{
$c = newCurlObject();
if ($c === false) return false;
curl_setopt($c, CURLOPT_COOKIESESSION, true);
curl_makePOST($c, array('action'=>'login','lgname'=>BOTLOGIN,'lgpassword'=>BOTPASSWORD));
sleep(5);
$result = curl_exec($c);
curl_close($c);
unset($c);
$xml = new SimpleXMLElement($result);
if ($xml->login['result'] != 'Success')
return false;
return true;
}
function fetchCategoryMembers($cat, $props)
{
$c = newCurlObject();
if ($c === false) return false;
curl_makePOST($c, array('action'=>'query','generator'=>'categorymembers','gcmcategory'=>$cat,
'gcmsort'=>'timestamp','gcmdir'=>'desc','prop'=>'info','gcmprop'=>join('|',$props)));
sleep(5);
$result = curl_exec($c);
curl_close($c);
unset($c);
$xml = new SimpleXMLElement($result);
return($xml->query->pages->page);
}
function fetchArticleContent($article)
{
$c = newCurlObject();
if ($c === false) return false;
curl_makePOST($c, array('action'=>'query','prop'=>'revisions','titles'=>$article,'rvprop'=>'content'));
sleep(12);
$result = curl_exec($c);
curl_close($c);
unset($c);
$xml = new SimpleXMLElement($result);
return((string) $xml->query->pages->page->revisions->rev);
}
function editWikiArticle($article, $content, $summary)
{
$c = newCurlObject();
if ($c === false) return false;
curl_makePOST($c, array('action'=>'query','prop'=>'info','titles'=>$article,'intoken'=>'edit'));
sleep(4);
$result = curl_exec($c);
curl_close($c);
unset($c);
$xml = new SimpleXMLElement($result);
$token = $xml->query->pages->page['edittoken'];
// $rev = $xml->query->pages->page['lastrevid'];
unset($xml);
$c = newCurlObject('index.php?title='.urlencode($article).'&action=submit'); // hax
if ($c === false) return false;
// curl_makePOST($c, array('action'=>'submit','title'=>$article,'edittoken'=>$token,'summary'=>$summary,'content'=>$content,'revid'=>$rev));
$date = gmdate('YmdHis'); // dumb
curl_makePOST($c, array('wpSummary'=>$summary,'wpTextbox1'=>$content,'wpSave'=>'Save page','wpEditToken'=>$token,'wpStarttime'=>$date,'wpEdittime'=>$date));
sleep(7);
$result = curl_exec($c);
curl_close($c);
return true;
}
function parseCurrentTemplate($text)
{
$r = preg_match('/{{[Cc]urrent\|.*}}/', $text, $match);
if ($r == 0)
return false;
$text = $match[0];
$r = preg_match('/\|story\=(.*?)(\|.*}}|}})/', $text, $match);
if ($r > 0 and sizeof($match) > 0)
$ret['story'] = $match[1];
else
return false;
$r = preg_match('/\|update\=(.*?)(\|.*}}|}})/', $text, $match);
if ($r > 0 and sizeof($match) > 0)
$ret['update'] = (int)$match[1];
else
$ret['update'] = 1;
return($ret);
}
function getStoryID($storyName)
{
$q = mysql_query('SELECT sid FROM updates WHERE story = "'.mysql_real_escape_string($storyName).'"');
if ($q === false)
return false; // can't query the database for the story ID
if (mysql_num_rows($q) == 0)
{
$q = mysql_query('INSERT INTO updates SET story = "'.mysql_real_escape_string($storyName).'"');
if ($q === false)
return false; // can't allocate a new story ID
return mysql_insert_id();
}
$r = mysql_fetch_row($q);
mysql_free_result($q);
return((int)$r[0]);
}
function getArticleName($aid)
{
$q = mysql_query('SELECT article FROM articles WHERE aid='.(int)$aid);
if ($q === false or mysql_num_rows($q) == 0)
return false; // query error or no article by that name
$r = mysql_fetch_row($q);
mysql_free_result($q);
return($r[0]);
}
function updateStory($sid, $aid, $uid)
{
$q = mysql_query('SELECT uid FROM updates WHERE sid='.(int)$sid);
if ($q === false or mysql_num_rows($q) == 0)
return false; // not a valid story ID
$r = mysql_fetch_row($q);
mysql_free_result($q);
if ((int)$r[0] >= $uid)
return false; // story is already up to date
return true;
}
function updateArticle($article, $aid, $sid, $uid, $rev)
{
$q = mysql_query('SELECT uid,sid FROM articles WHERE aid='.(int)$aid);
if ($q === false)
return false; // unable to issue this query
if (mysql_num_rows($q) == 0)
{
$q = mysql_query('INSERT INTO articles SET aid='.(int)$aid.', sid='.(int)$sid.', uid='.(int)$uid.
', article="'.mysql_real_escape_string($article).'", rev='.(int)$rev);
if ($q === false or mysql_affected_rows() == 0)
return false; // oh my, we're in trouble, out of disk space?
return true;
}
$r = mysql_fetch_row($q);
mysql_free_result($q);
if ((int)$r[0] != (int)$uid or (int)$r[1] != (int)$sid)
{
$q = mysql_query('UPDATE articles SET sid='.(int)$sid.', uid='.(int)$uid.', rev='.(int)$rev.
', article="'.mysql_real_escape_string($article).'" WHERE aid='.(int)$aid);
if ($q === false)
return false; // can't issue our query
return true;
}
}
function notTouched($aid, $rev)
{ // this function is really dumb; we should be doing something smarter here.
$q = mysql_query('SELECT aid FROM articles WHERE aid='.(int)$aid.' AND rev='.(int)$rev);
if ($q === false)
return false;
$x = mysql_num_rows($q);
mysql_free_result($q);
return (bool)($x > 0);
}
function bestUpdate($sid)
{
$q = mysql_query('SELECT MAX(uid) FROM articles WHERE sid='.(int)$sid);
if ($q === false)
return false; // can't issue our query
$r = mysql_fetch_row($q);
mysql_free_result($q);
$bestUID = $r[0];
if ($bestUID == 0)
return false; // no articles use this story
$q = mysql_query('SELECT aid FROM articles WHERE uid='.(int)$bestUID.' AND sid='.(int)$sid.' LIMIT 1');
if ($q === false or mysql_num_rows($q) == 0)
return false; // some problem with the query
$r = mysql_fetch_row($q);
mysql_free_result($q);
$bestAID = $r[0];
$q = mysql_query('SELECT uid, aid, story FROM updates WHERE sid='.(int)$sid);
if ($q === false or mysql_num_rows($q) == 0)
return false; // can't find the referenced story
$r = mysql_fetch_row($q);
mysql_free_result($q);
if ($r[0] != $bestUID or $r[1] != $bestAID)
{
$bestArticle = getArticleName($bestAID);
$q = mysql_query('UPDATE updates SET uid='.(int)$bestUID.', aid='.(int)$bestAID.' WHERE sid='.(int)$sid);
return($bestUID.'/'.$bestArticle); // return string to insert into story page
}
return false; // no update needed
}
mysql_connect('localhost','currentBot','currentBot');
mysql_select_db('currentBot');
print("Logging in... ");
if (login() === false)
die("Login failed. Giving up.\n\n");
print("done.\n");
print("Fetching category members... ");
$articleList = fetchCategoryMembers('Updateable Stories', array('ids','title'));
print("done.\n");
$batch = array();
print("Iterating through category members...\n");
if (!is_array($articleList))
{
print("-* Looks like category fetch failed. Bailing.\n");
die();
}
foreach($articleList as $article)
{
print('-* Fetching '.$article['title']."\n");
if (notTouched($article['pageid'], $article['lastrevid']))
{
print(" * * Content hasn't changed, skipping.\n");
continue;
}
$content = fetchArticleContent($article['title']);
if ($content === false)
{
print('Failed to fetch the contents of: '.$article['title']."\n\n");
continue;
}
print(" * Parsing contents\n");
$curParams = parseCurrentTemplate($content);
unset($content);
if ($curParams === false)
{
print('Could not find {{current}} template in: '.$article['title']."\n\n");
continue;
}
print(' * * Story: '.$curParams['story']."\n");
print(' * * Update: '.$curParams['update']."\n");
print(" * Finding story information\n");
$sid = getStoryID($curParams['story']);
if ($sid === false)
{
print('Unable to get story ID for story: '.$curParams['story']."\n\n");
continue;
}
print(' * * Story ID: '.$sid."\n");
print(" * Updating article data\n");
$r = updateArticle($article['title'], $article['pageid'], $sid, $curParams['update'], $article['lastrevid']);
if ($r === false)
{
print('Unable to update article information on article: '.$article['title']."\n\n");
continue;
}
print(" * Updating story data\n");
if (updateStory($sid, $curParams['update'], $article['pageid']))
$batch[$sid] = $curParams['story']; // mark story for processing
}
print("...done.\n");
print("Iterating through batched stories...\n");
foreach($batch as $sid=>$story)
{
print("-* Processing story: $story (sid=$sid)\n");
$u = bestUpdate($sid);
if ($u === false)
{
print(" * No updates needed.\n");
continue;
}
print(" * Updating wiki... ");
$r = editWikiArticle('Template:Current/'.$story,$u,BOTNAME.' v'.BOTVERSION.' updating current story');
if ($r === false)
{
print("failed.\n");
}
else
{
print("done.\n");
}
}
?>