SQL
x
WITH articles AS (
SELECT DISTINCT(Article.page_title), Article.page_id, rev_timestamp, rev_len as "InitialSize", Article.page_len as "CurrentSize"
FROM revision_userindex
JOIN page AS Article ON Article.page_id = rev_page AND Article.page_namespace = 0
JOIN page AS Talk ON Article.page_title = Talk.page_title AND Talk.page_namespace = 1
JOIN categorylinks AS TalkCat ON TalkCat.cl_from = Talk.page_id
WHERE rev_parent_id = 0
AND Article.page_is_redirect = 0
AND rev_actor = 482
AND TalkCat.cl_to = "WikiProject_Biography_articles")
AND Article.page_len < 2500
),
contributions AS (
SELECT articles.page_title, articles.page_id, articles.InitialSize, articles.CurrentSize, articles.rev_timestamp, childRev.rev_len as "ChildRevLen", parentRev.rev_len as "ParentRevLen", childRev.rev_actor
FROM articles
LEFT JOIN revision_userindex AS childRev ON page_id = childRev.rev_page
LEFT JOIN revision AS parentRev ON childRev.rev_parent_id = parentRev.rev_id
LEFT JOIN change_tag ON childRev.rev_id = ct_rev_id AND ct_tag_id IN (590, 1, 539, 582) #reverted or undo
WHERE ct_tag_id IS NULL
),
contribution_count AS (
SELECT page_title, page_id, InitialSize, CurrentSize, CAST(CurrentSize AS SIGNED) - CAST(InitialSize AS SIGNED) as Growth, rev_timestamp, SUM(CASE WHEN CAST(ChildRevLen as SIGNED) - CAST(ParentRevLen as SIGNED) > 200 THEN 1 ELSE 0 END) as ContributorCount
FROM contributions
WHERE rev_actor != 482
GROUP BY page_id
),
reference AS (
SELECT DISTINCT(page_id)
FROM articles
JOIN externallinks ON el_from = page_id
WHERE el_to_domain_index NOT IN ("https://org.wikidata.www.", "https://org.archive.web.", "https://com.sports-reference.www.", "https://org.olympedia.www.", "http://com.espncricinfo.www.", "https://com.espncricinfo.www.", "https://com.olympics.", "http://com.cyclingarchives.www.", "https://org.viaf.", "https://net.worldfootball.www.", "https://com.procyclingstats.www.", "https://com.national-football-teams.www.", "https://com.google.www.", "https://org.iaaf.www.", "https://com.imdb.www.", "https://org.isni.", "https://info.d-nb.", "https://com.worldrowing.", "https://gov.loc.id.", "https://com.cricketarchive.", "https://fr.bnf.data.", "https://fr.bnf.catalogue.", "https://org.rsssf.www.", "https://org.worldathletics.www.", "https://info.eu-football.", "http://net.memoire-du-cyclisme.www.", "https://fr.idref.www.", "https://de.deutsche-biographie.www.", "https://se.sok.", "http://com.procyclingstats.www.", "https://cz.olympijskytym.www.", "https://com.fiba.archive.", "https://org.uci.www.", "https://com.rio2016.www.", "https://org.uww.whatsmat.", "https://org.olympic.www.", "https://org.jstor.www.", "https://com.google.scholar.", "https://com.google.translate.", "https://com.basketball-reference.www.", "https://pl.org.bn.dbn.", "https://com.worldaquatics.www.", "https://cz.nkp.aleph.", "https://net.swimrankings.www.", "https://com.fis-ski.www.", "http://nl.bibliotheken.data.", "http://il.org.nli.uli.", "https://nl.cyclebase.www.", "http://es.bne.catalogo.", "https://au.com.olympics.www.", "http://org.worldcat.id.", "http://be.belgianfootball.static.", "https://no.fotball.www.", "https://au.gov.nla.trove.", "https://eu.footballdatabase.www.", "http://net.memoire-du-cyclisme.", "https://nl.rkd.", "https://com.deepl.", "https://com.cqranking.", "https://com.judoinside.www.", "https://com.teamgb.www.", "https://edu.getty.www.", "http://com.omegatiming.", "https://org.sailing.www.", "https://today.archive.", "http://com.eliteprospects.www.", "http://com.databaseolympics.www.", "https://com.google.books.", "http://hu.olimpia.archiv.", "http://com.speedskatingstats.www.", "https://ca.olympic.", "https://eu.speedskatingbase.www.", "https://com.fina-budapest2017.", "https://com.boxrec.", "https://com.eliteprospects.www.", "https://info.speedskatingnews.www.", "https://org.teamengland.", "https://com.omegatiming.www.", "https://com.biathlonresults.", "https://com.thecgf.", "https://org.fie.", "http://org.iaaf.www.", "https://org.issf-sports.www.", "https://no.bibsys.authority.", "https://it.sbn.opac.", "http://com.espncricinfo.stats.", "https://org.iaaf.aws.media.", "https://com.pyeongchang2018.www.", "https://jp.go.ndl.id.", "https://com.proballers.www.", "https://kr.go.nl.lod.", "https://se.kb.libris.", "https://fr.athle.bases.", "https://com.icc-cricket.www.", "https://com.itftennis.www.", "https://io.matchid.deces.", "http://com.london2012.www.", "http://nl.biografischportaal.www.", "https://com.directvelo.www.", "http://org.olympedia.www.", "https://com.eurohockey.www.", "http://com.eurohockey.www.", "https://jp.ac.nii.ci.", "https://es.coe.web.", "https://it.coni.www.", "https://org.fei.www.", "http://uk.co.espn.en.", "https://be.kbr.opac.", "https://de.dfb.www.", "http://ch.uci.www.", "https://org.cwgc.www.", "https://net.soticcloud.data-isaf.", "https://pl.olimpijski.", "https://com.realgm.basketball.", "https://org.ibsf.www.", "https://de.uni-leipzig.iat.www." "http://cn.sports.star.data.", "https://com.possumbility.athletics.", "https://com.european-athletics.www.", "https://org.archive.", "https://com.legacy.www.", "https://com.findagrave.www.", "https://pk.com.pcb.www.", "https://com.hockeydb.www.", "http://com.espncricinfo.static.", "https://org.ijf.www.", "https://org.englandboxing.www.", "http://pl.strefa.amateur-boxing.", "https://org.worldcat.www.", "https://de.teamdeutschland.www.", "https://com.eurobasket.www.", "http://pl.olimpijski.www.", "http://com.nbcolympics.results.", "https://run.arrs.more.", "https://sport.gymnastics.www.", "http://eu.memoire-du-cyclisme.www.", "http://com.tissottiming.www.", "https://org.triathlon.www.", "https://com.britishfencing.www.", "http://com.rsssf.", "https://hr.hoo.www.", "https://com.gc2018.results.", "http://se.sok.", "http://com.worldrowing.www.", "https://sport.worldarchery.", "http://pt.gov.bnportugal.id.", "http://org.iaaf.aws.media.", "http://com.national-football-teams.www.", "https://au.gov.nla.", "https://org.usawaterpolo.", "https://org.toolforge.wikidata-externalid-url.", "https://info.shorttrackonline.www.", "https://com.twitter.", "https://com.tournamentsoftware.bwf.", "https://com.singaporeolympics.www.")
OR (
el_to_domain_index IN ("https://com.espncricinfo.www.", "http://com.espncricinfo.www.", "https://com.google.books.", "http://com.google.books.", "https://com.google.news./newspaper", "http://com.google.news./newspapers")
AND (LEFT(6, el_to_path) IN ("wisden", "story/", "books?", "newspa")
OR el_to_path LIKE "%/content/story/%")
)
)
SELECT page_title, InitialSize, CurrentSize, (CAST(CurrentSize AS SIGNED) - CAST(InitialSize AS SIGNED)) as Growth, rev_timestamp
FROM contribution_count
WHERE page_id NOT EXISTS (
SELECT 1
FROM reference
WHERE reference.page_id = contribution_count.page_id)
AND ContributorCount = 0
ORDER BY page_title ASC
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.