User:Bawolff/sanbox/leadGenerator.js

From Wikinews, the free news source you can write!
Jump to navigation Jump to search

Note: After saving, you may have to bypass your browser's cache to see the changes. Mozilla / Firefox / Safari: hold down Shift while clicking Reload, or press Ctrl-Shift-R (Cmd-Shift-R on Apple Mac); IE: hold Ctrl while clicking Refresh, or press Ctrl-F5; Konqueror: simply click the Reload button, or press F5; Opera users may need to completely clear their cache in Tools→Preferences. — More skins

importScript('user:Bawolff/mwapilib2.js');
if (!window.Bawolff) {
 window.Bawolff = {};
}
// http://en.wikinews.org/w/api.php?action=query&prop=revisions&titles=template:Lead%20article%201|template:Lead%20article%202|template:Lead%20article%203|template:Lead%20article%204|template:Lead%20article%205&rvprop=timestamp|content
//Call as Bawolff.leadGen(pageName, 1, alert)
//replacing 1 with which lead (1-5), alert with your callback
//optionally takes summary method (as numeric) argument. choose 0-4
Bawolff.leadGen = function (title, leadNumb, callback, summaryMethod) {
 api(title).getPage().lift(Bawolff.leadGen.extract, title, summaryMethod).lift(Bawolff.leadGen.create, leadNumb).lift(callback, title).exec();
}

/********
This is a list that maps categories/infboxes to generic images

Note: the category map only works with categories explicity included. it does not count categories included by templates
Note: this does not consider templates with parameters

*****/
Bawolff.leadGen.imgMap = { "UK" : "Flag of the United Kingdom.svg",
 "United States": "Flag of the United States.svg",
 "Canada": "Flag of Canada.svg",
 "Computing": "Computer-aj aj ashton 01.svg",
 "Google": "Google logo png.png",
 "Obituary": "Wikinews tag obituary.png",
 "Science and technology infobox": "Science-symbol-2.svg",
 "Science and technology": "Science-symbol-2.svg",
"ASEAN": "LocationASEAN.png", "Afghanistan": "LocationAfghanistan.png", "Albania": "LocationAlbania.png", "Algeria": "LocationAlgeria.png", "Andorra": "LocationAndorra.png", "Angola": "LocationAngola.png", "Antarctica": "LocationAntarctica.png", "Argentina": "LocationArgentina.png", "Armenia": "LocationArmenia.png", "Australia": "LocationAustralia.png", "Austria": "LocationAustria.png", "Bahrain": "LocationBahrain.png", "Bahamas": "LocationBahamas.png", "Barbados": "LocationBarbados.png", "Belarus": "LocationBelarus.png", "Belgium": "LocationBelgium.png", "Bangladesh": "LocationBangladesh.png", "Azerbaijan": "LocationAzerbaijan.png", "Belize": "LocationBelize.png", "Benin": "LocationBenin.png", "Bhutan": "LocationBhutan.png", "Brazil": "LocationBrazil.png", "Brunei": "LocationBrunei.png", "Bolivia": "LocationBolivia.png", "Botswana": "LocationBotswana.png", "Burkina Faso": "LocationBurkinaFaso.png", "Burundi": "LocationBurundi.png", "Bulgaria": "LocationBulgaria.png", "Cameroon": "LocationCameroon.png", "Chad": "LocationChad.png", "Cape Verde": "LocationCapeVerde.png", "Central African Republic": "LocationCentralAfricanRepublic.png", "Chile": "LocationChile.png", "China": "LocationChina.png", "Colombia": "LocationColombia.png", "Comoros": "LocationComoros.png", "Croatia": "LocationCroatia.png", "Costa Rica": "LocationCostaRica.png", "Cuba": "LocationCuba.png", "Cyprus": "LocationCyprus.png", "Czech Republic": "LocationCzechRepublic.png", "Denmark": "LocationDenmark.png", "Djibouti": "LocationDjibouti.png", "Dominica": "LocationDominica.png", "Dominican Republic": "LocationDominicanRepublic.png", "Egypt": "LocationEgypt.png", "Ecuador": "LocationEcuador.png", "East Timor": "LocationEastTimor.png", "El Salvador": "LocationElSalvador.png", "Equatorial Guinea": "LocationEquatorialGuinea.png", "Eritrea": "LocationEritrea.png", "Estonia": "LocationEstonia.png", "Fiji": "LocationFiji.png", "Finland": "LocationFinland.png", "Ethiopia": "LocationEthiopia.png", "France": "LocationFrance.png", "Gabon": "LocationGabon.png", "Gambia": "LocationGambia.png", "Georgia": "LocationGeorgia.png", "Germany": "LocationGermany.png", "Ghana": "LocationGhana.png", "Greece": "LocationGreece.png", "Grenada": "LocationGrenada.png", "Guatemala": "LocationGuatemala.png", "Guinea": "LocationGuinea.png", "Guyana": "LocationGuyana.png", "Haiti": "LocationHaiti.png", "Honduras": "LocationHonduras.png", "Hungary": "LocationHungary.png", "Iceland": "LocationIceland.png", "Indonesia": "LocationIndonesia.png", "Iran": "LocationIran.png", "Iraq": "LocationIraq.png", "Isle of Man": "LocationIsleofMan.png", "Italy": "LocationItaly.png", "Israel": "LocationIsrael.png", "Japan": "LocationJapan.png", "Jamaica": "LocationJamaica.png", "Jordan": "LocationJordan.png", "Kazakhstan": "LocationKazakhstan.png", "Kuwait": "LocationKuwait.png", "Kyrgyzstan": "LocationKyrgyzstan.png", "Laos": "LocationLaos.png", "Latvia": "LocationLatvia.png", "Lebanon": "LocationLebanon.png", "Kenya": "LocationKenya.png", "Lesotho": "LocationLesotho.png", "Liberia": "LocationLiberia.png", "Libya": "LocationLibya.png", "Liechtenstein": "LocationLiechtenstein.png", "Lithuania": "LocationLithuania.png", "Luxembourg": "LocationLuxembourg.png", "Madagascar": "LocationMadagascar.png", "Malawi": "LocationMalawi.png", "Maldives": "LocationMaldives.png", "Malaysia": "LocationMalaysia.png", "Mali": "LocationMali.png", "Malta": "LocationMalta.png", "Mauritania": "LocationMauritania.png", "Mexico": "LocationMexico.png", "Mauritius": "LocationMauritius.png", "Moldova": "LocationMoldova.png", "Monaco": "LocationMonaco.png", "Montenegro": "LocationMontenegro.png", "Morocco": "LocationMorocco.png", "Mongolia": "LocationMongolia.png", "Mozambique": "LocationMozambique.png", "Myanmar": "LocationMyanmar.png", "Nepal": "LocationNepal.png", "Namibia": "LocationNamibia.png", "Netherlands": "LocationNetherlands.png", "New Zealand": "LocationNewZealand.png", "Nicaragua": "LocationNicaragua.png", "Nigeria": "LocationNigeria.png", "North Korea": "LocationNorthKorea.png", "Niger": "LocationNiger.png", "Norway": "LocationNorway.png", "Oman": "LocationOman.png", "Pakistan": "LocationPakistan.png", "Palestine": "LocationPalestine.png", "Papua New Guinea": "LocationPapuaNewGuinea.png", "Panama": "LocationPanama.png", "Paraguay": "LocationParaguay.png", "Peru": "LocationPeru.png", "Philippines": "LocationPhilippines.png", "Portugal": "LocationPortugal.png", "Poland": "LocationPoland.png", "Qatar": "LocationQatar.png", "Romania": "LocationRomania.png", "Russia": "LocationRussia.png", "Rwanda": "LocationRwanda.png", "Saint Lucia": "LocationSaintLucia.png", "San Marino": "LocationSanMarino.png", "Saudi Arabia": "LocationSaudiArabia.png", "Seychelles": "LocationSeychelles.png", "Serbia": "LocationSerbia.png", "Sierra Leone": "LocationSierraLeone.png", "Senegal": "LocationSenegal.png", "Singapore": "LocationSingapore.png", "Slovakia": "LocationSlovakia.png", "Slovenia": "LocationSlovenia.png", "Somalia": "LocationSomalia.png", "Solomon Islands": "LocationSolomonIslands.png", "South Africa": "LocationSouthAfrica.png", "South Korea": "LocationSouthKorea.png", "Spain": "LocationSpain.png", "Sri Lanka": "LocationSriLanka.png", "Suriname": "LocationSuriname.png", "Sudan": "LocationSudan.png", "Swaziland": "LocationSwaziland.png", "Sweden": "LocationSweden.png", "Switzerland": "LocationSwitzerland.png", "Syria": "LocationSyria.png", "Taiwan": "LocationTaiwan.png", "Tajikistan": "LocationTajikistan.png", "Tanzania": "LocationTanzania.png", "Thailand": "LocationThailand.png", "Togo": "LocationTogo.png", "Tonga": "LocationTonga.png", "Tunisia": "LocationTunisia.png", "Turkmenistan": "LocationTurkmenistan.png", "Turkey": "LocationTurkey.png", "Uganda": "LocationUganda.png", "Ukraine": "LocationUkraine.png", "United Arab Emirates": "LocationUnitedArabEmirates.png", "United Kingdom": "LocationUnitedKingdom.png", "Uruguay": "LocationUruguay.png", "Uzbekistan": "LocationUzbekistan.png", "Vatican City": "LocationVaticanCity.png", "Venezuela": "LocationVenezuela.png", "Vietnam": "LocationVietnam.png", "Yemen": "LocationYemen.png", "Zambia": "LocationZambia.png", "Western Sahara": "LocationWesternSahara.png", "Zimbabwe": "LocationZimbabwe.png"}

//takes the source of a wikipage, extracts the first image name without the leading namespace
Bawolff.leadGen.extractImg = function (page) {
 var imgRegex = /\[\[(?:[iI][mM][aA][gG][eE]\:|[fF][Ii][lL][eE]\:)((?:[^\|\]])*?\.[pPsSjJgG][nNvVpPiI][gGeEfF][gG]?)[\|\]]?/;
 var img = imgRegex.exec(page);
 if (img && img.length >= 2) {
  return img[1];
 }
 else {
  var infoboxRegex = /\{\{([^|}]*)\}\}/g;
  var categoryRegex = /\[\[[cC]ategory:([^|\]]*)(?:\|[^\]]*)?\]\]/g;
  //js seems to reuse these objects from prev calls without reseting lastIndex. reset lastIndex.
  infoboxRegex.lastIndex = categoryRegex.lastIndex = 0;
  var item; //note doesn't match infoboxes w/params

  while (item = infoboxRegex.exec(page)) {
   item = item[1];
   item = item.charAt(0).toUpperCase() + item.substring(1, item.length);
   if (Bawolff.leadGen.imgMap[item]) return Bawolff.leadGen.imgMap[item];
  }
  while (item = categoryRegex.exec(page)) {
   item = item[1];
   item = item.charAt(0).toUpperCase() + item.substring(1, item.length);
   if (Bawolff.leadGen.imgMap[item]) return Bawolff.leadGen.imgMap[item];
  }

  //return "Wikinews-logo.png"; //default
  return "";
 } 

}
Bawolff.leadGen.extractType = function (page) {
 //valid types are: breaking, special, original, exclusive, urgent or none.
 //this currently does not detect special or urget.
 if (page.match(/\{\{[bB]reaking(?: [nN]ews)?\}\}/)) {
  return "breaking";
 }
 else if (page.match(/\{\{[iI]nterview(?:\|[^}]*)?\}\}/i)) {
  return "exclusive";
 }
 else if (page.match(/\{\{[oO]riginal(?: reporting)?(?:\|[^}]*)?\}\}/i)) {
  return "original";
 }
 else {
  //default
  return "none";
 } 

}

Bawolff.leadGen.takeIntro = function (pageText, method) {
 //first test for redirects.
 var isRedirect = pageText.match(/^#redirect\s?\[\[([^\]]*)\]\]/i)
 if (isRedirect) {
  alert("It appears you are trying to use make lead on a redirect page. Please use the real page name instead (" + isRedirect[1] + ").");
  throw new Error("Page is redirect. please manually resolve to: " + isRedirect[1] );
 }

 //doesn't handle links that make [1].
 //intentionally doesn't strip ' chars (bold or italic)
 //as this often marks thigs with periods (E. coli)
 //method is a number (must be a number. no type conversion preformed).
 // 0: 1st sentence
 // 1: 1st two senetences
 // 2: 1st paragraph
 // 3: 1st 250 characters (+ a couple so we don't end in middle of word), or paragraph
 // 4: 1st 500 characters-ish, or paragraph
 var fixWLink = /\{\{[wW]\|([^\}]+)\}\}/g
 var stripTemplates = /\{\{[^\}\{]*(?:\{\{[^\}\{]*(?:\{\{[^\}\{]*(?:\{\{[^\}\{]*\}\})?\}\})?\}\})?\}\}/g;
 //the img regex, looks for a start of the image, than checks for nested internal links, external links, and for ending ]] in caption
 //templates shold already be stripped at this point.
 var img = /\[\[[IifF][mMIi][aALl][GgEe][eE]?\:(?:\[\[(?:[^\]]*)\]\]|[^\]]|\](?!\]))*]]/g

 pageText = pageText.replace(fixWLink, '[[$1]]');
 pageText = pageText.replace(stripTemplates, '')
 pageText = pageText.replace(img, '');
 var pipedLink = /\[\[[^\]\|]*\|([^\]\|]*)\]\]/g;
 pageText = pageText.replace(pipedLink, '$1');
 var normLink = /\[\[([^\]\|]*)\]\]/g;
 pageText = pageText.replace(normLink, '$1');
 var extLink = /\[(?:http|ftp|gopher|irc|https)\:[^\]\s]*\s?([^\]]*)]/g
 pageText = pageText.replace(extLink, '$1');
 var firstPar;
 switch (method) {
  case 1: //1st 2 sentence or 1st paragraph
   firstPar = /[^\n]+?\s*\.(?=\s)(?:[^\n]+?\s[^\s\.]*\.(?=\s)|(?=\s))/;
   pageText = pageText.match(firstPar)[0];
   break;
  case 2: //1st paragraph
   firstPar = /[^\n]+?(?=\n)/;
   pageText = pageText.match(firstPar)[0];
   break;
  case 3: // 1st 250 characters-ish
   firstPar = /[^\n]{2,250}.*?\b/;
   pageText = pageText.match(firstPar)[0] + "...";
   break;
  case 4: //1st 500 characters-ish
   firstPar = /[^\n]{2,500}.*?\b/;
   pageText = pageText.match(firstPar)[0] + "...";
   break;
  default: //aka case 0. 1st sentence
   firstPar = /[^\n]+?\s[^\s\.]*\.(?=\s)/;
   pageText = pageText.match(firstPar)[0];
   break;
 }
 return pageText;
}

Bawolff.leadGen.extract = function(pageText, pageName, summaryMethod) {
//editlink is overriden later.
return { width:'100x100px',
         image: Bawolff.leadGen.extractImg(pageText),
         title: pageName,
         synopsis: Bawolff.leadGen.takeIntro(pageText, summaryMethod),
         order: 'right',
         edit_this: 'Wikinews:Sandbox',
         type: Bawolff.leadGen.extractType(pageText)
         };
}


Bawolff.leadGen.create = function (leadObj, leadNumb) {
 if (leadNumb === 1) {
  leadObj.width = "150x150px"; //ugly i know, but whatever.
 }
 var res = '{' + '{Lead 2.0';
 res += "\n |id=" + leadNumb + " <!-- do not change. Each lead must have its own unique ID -->";
 res += "\n |image=" + leadObj.image;
 res += "\n |width=" + leadObj.width;
 res += "\n |type=" + leadObj.type;
 res += "\n |title=" + leadObj.title;
 res += "\n |short_title=";
 res += "\n |summary=" + leadObj.synopsis;
 res += "\n}}<noinclude>\n{{"
 res += "Lead article doc}}</noinclude>";

 return res;
}
//calls its argument giving it an object with meta info about the current leads.
//probably want to use Bawolff.leadGen.makeLeadTable instead.

Bawolff.leadGen.makeLeadMetaObject = function (callback) {
 var leadObj = function (doc) {
  var exTitle = function (text) {
   text = text.replace(/[\s\S]*?\|title=([^\|]*)[\s\S]*/, '$1');
   text = text.replace(/\s*$/, '');
   return text;
  }
  var pages = doc.getElementsByTagName('page');

  var obj = {}, time, time2 = Infinity;
  for (var i = 0; i < pages.length; i++) {
   pages[i].getElementsByTagName('rev')[0].normalize();
   time = Bawolff.mwapi.parseAPIDate(pages[i].getElementsByTagName('rev')[0].getAttribute('timestamp'));
   //This is really ugly...
   //put the oldest lead in a global variable.
   if (time < time2) { //this compares miliseconds after epoch
    Bawolff.leadGen.oldestLead = pages[i].getAttribute('title');
    time2 = time;
   }
   obj[pages[i].getAttribute('title')] = {timestamp: time,
   title: exTitle(pages[i].getElementsByTagName('rev')[0].firstChild.data)};
  }
  return obj;
 }
 api().makeRequest({action: 'query', prop: 'revisions', titles: 'Template:Lead article 1|Template:Lead article 2|Template:Lead article 3|Template:Lead article 4|Template:Lead article 5', prop: 'revisions', rvprop: 'timestamp|content'}, leadObj).lift(callback).exec();
}


Bawolff.leadGen.oldestLead = null;
Bawolff.leadGen.makeLeadTable = function (callback) {
 var wrapper = function(leadObj) {
  var html = '<table class="wikitable"><caption>Current Leads:</caption>';
  html += '<thead><tr><th>#</th><th>Position</th><th>Article</th><th>Age</th></tr></thead><tbody>';
  for (var i in leadObj) {
   if (leadObj.hasOwnProperty(i)) {
    var numb = i.charAt(i.length - 1);
    html += '<tr><th>';
    html += numb; //last character
    html += '</th><td>';
    html += Bawolff.leadGen.leadToPosition(numb);
    html += '</td><td>';
    html += leadObj[i].title;
    html += '</td><td>';
    var time = leadObj[i].timestamp.getTime(); //convert to miliseconds
    var d = (new Date).getTime();
    var delta = Math.round((d - time) / (1000*60*60));
    var timeStr;
    if (delta === 1) {
     timeStr = delta + ' hour';
    }
    else {
     timeStr = delta + ' hours';
    }
    html += timeStr;
    html += "</td></tr>";
   }
  }
  html += '</tbody></table>';
  callback(html);
 }
 Bawolff.leadGen.makeLeadMetaObject(wrapper);
}

Bawolff.leadGen.leadToPosition = function (numb) {
//double equal sign intentional to convert from string.
 if (numb == 1) {
  return "Top";
 }
 if (numb == 2) {
  return "Middle-left";
 }
 if (numb == 3) {
  return "Middle-right";
 }
 if (numb == 4) {
  return "Bottom-left";
 }
 if (numb == 5) {
  return "Bottom-right";
 }
 else {
  throw new Error("Invalid lead number (" + numb +") passed to Bawolff.leadGen.leadToPosition");
 }
}