SQL
x
WITH RECURSIVE all_cite_temps (page_title, page_id, page_namespace) AS (
(
SELECT page_title, page_id, page_namespace
FROM page
WHERE page_title = "Citation_templates"
AND page_namespace = 14
)
UNION
(
SELECT child.page_title, child.page_id, child.page_namespace
FROM page AS child
JOIN categorylinks as childLinks ON child.page_id = childLinks.cl_from
JOIN all_cite_temps AS parent ON childLinks.cl_to = parent.page_title
LEFT JOIN page_props ON childLinks.cl_from = pp_page AND pp_propname = "hiddencat"
WHERE (child.page_namespace = 10 OR child.page_namespace = 14) AND parent.page_namespace = 14 AND pp_propname IS NULL
)
),
articles AS (
SELECT DISTINCT(Article.page_title), Article.page_id, Talk.page_id AS talk_id, rev_timestamp, rev_len as "InitialSize", Article.page_len as "CurrentSize", rev_actor
FROM revision_userindex
JOIN page AS Article ON Article.page_id = rev_page AND Article.page_namespace = 0
JOIN page AS Talk ON Article.page_title = Talk.page_title AND Talk.page_namespace = 1
JOIN categorylinks AS TalkCat ON TalkCat.cl_from = Talk.page_id
WHERE rev_parent_id = 0
AND Article.page_is_redirect = 0
AND rev_actor != 482
AND TalkCat.cl_to = "Sports_and_games_work_group_articles"
AND Article.page_len < 2500
),
all_cites AS (
SELECT page_title
FROM all_cite_temps
WHERE page_namespace = 10
AND page_title NOT IN ("Cite_web", "Cricinfo", "Reflist", "Subscription_required", "Webarchive", "One_source", "More_footnotes_needed", "More_citations_needed", "Link_note", "Family_name_explanation/core", "Commons_category-inline", "Commons_category", "Cite_sports-reference", "BLP_sources", "Cite_news")
),
contributions AS (
SELECT page_title, page_id, talk_id, articles.rev_actor AS creator, InitialSize, CurrentSize, articles.rev_timestamp, childRev.rev_len as "ChildRevLen", parentRev.rev_len as "ParentRevLen", childRev.rev_actor AS editor
FROM articles
LEFT JOIN revision_userindex AS childRev ON page_id = childRev.rev_page
LEFT JOIN revision AS parentRev ON childRev.rev_parent_id = parentRev.rev_id
LEFT JOIN change_tag ON childRev.rev_id = ct_rev_id AND ct_tag_id IN (590, 1, 539, 582) #reverted or undo
WHERE ct_tag_id IS NULL
),
contribution_count AS (
SELECT page_title, page_id, creator, talk_id, InitialSize, CurrentSize, CAST(CurrentSize AS SIGNED) - CAST(InitialSize AS SIGNED) as Growth, rev_timestamp, SUM(CASE WHEN CAST(ChildRevLen as SIGNED) - CAST(ParentRevLen as SIGNED) > 200 THEN 1 ELSE 0 END) as ContributorCount
FROM contributions
WHERE creator != editor
GROUP BY page_id
),
reference AS (
SELECT DISTINCT(page_id)
FROM articles
JOIN externallinks ON el_from = page_id
WHERE el_to_domain_index NOT IN ("https://org.archive.web.","https://org.wikidata.www.","https://com.google.www.","https://com.sports-reference.www.","https://org.olympedia.www.","https://com.soccerway.int.","http://com.espncricinfo.www.","https://org.jstor.www.","https://com.google.scholar.","https://com.baseball-reference.www.","https://com.australianfootball.","https://com.baseball-reference.www.","https://com.australianfootball.","https://com.olympics.","https://com.national-football-teams.www.","https://net.worldfootball.www.","https://org.iaaf.www.","http://com.afltables.","https://com.cricketarchive.","https://com.pro-football-reference.www.","http://com.cyclingarchives.www.","https://org.viaf.","http://com.nfl.www.","https://org.worldathletics.www.","http://com.eliteprospects.www.","http://com.hockeydb.www.","https://com.procyclingstats.www.","https://com.edwardbetts.","https://today.archive.","https://com.worldrowing.","https://org.isni.","https://com.google.translate.","https://ru.footballfacts.","https://org.rsssf.www.","https://org.olympic.www.","http://com.newcastlefans.neilbrown.www.","https://com.fis-ski.www.","https://eu.footballdatabase.www.","https://com.itftennis.www.","https://com.google.books.","http://com.barryhugmansfootballers.","https://com.rio2016.www.","https://info.d-nb.","https://gov.loc.id.","http://com.procyclingstats.www.","http://com.eurohockey.www.","http://com.london2012.www.","https://com.seamheads.www.","http://com.justsportsstats.www.","https://com.deepl.","https://com.basketball-reference.www.","https://jp.or.j-league.data.","https://info.eu-football.","https://org.uci.www.","https://com.atptour.www.","http://uk.co.enfa.www.","https://org.paralympic.www.","https://org.uww.whatsmat.","https://com.fiba.archive.","https://de.fussballdaten.www.","https://com.boxrec.","http://org.fivb.www.","https://com.chessgames.www.","http://org.rugbyleagueproject.www.","https://org.webcitation.www.","http://org.olimpbase.www.","https://com.google.news.","http://com.gbrathletics.www.","http://com.worldrowing.www.","https://com.findagrave.www.","https://fr.bnf.data.","https://fr.bnf.catalogue.","http://uk.co.espn.en.","https://com.judoinside.www.","https://com.uefa.www.","https://com.fide.ratings.","https://se.sok.","http://com.databaseolympics.www.","https://com.fifa.www.","http://net.memoire-du-cyclisme.www.","https://org.archive.","https://com.365chess.www.","http://com.eurohandball.www.","https://fr.idref.www.","http://com.eliteprospects.","https://com.thecgf.","https://com.worldaquatics.www.","https://cz.olympijskytym.www.","http://de.kicker.www.","http://com.squashinfo.www.","http://pl.90minut.www.","https://ar.com.bdfa.www.","http://com.todor66.www.","https://org.sailing.www.","https://com.soccerbase.www.","http://nl.bibliotheken.data.","http://com.profootballarchives.www.")
OR (
el_to_domain_index IN ("https://com.espncricinfo.www.", "http://com.espncricinfo.www.", "https://com.google.books.", "http://com.google.books.", "https://com.google.news./newspaper", "http://com.google.news./newspapers")
AND (LEFT(el_to_path, 6) IN ("wisden", "story/", "books?", "newspa")
OR el_to_path LIKE "%/content/story/%")
)
),
citations AS (
SELECT DISTINCT(page_id)
FROM articles
JOIN templatelinks ON tl_from = page_id
JOIN linktarget ON tl_target_id = lt_id
WHERE EXISTS (SELECT 1 FROM all_cites WHERE linktarget.lt_title = all_cites.page_title)
OR lt_title IN ("ISBN", "Isbn")
)
SELECT page_title, CurrentSize, rev_timestamp, actor_name
FROM contribution_count
JOIN actor ON creator = actor_id
WHERE NOT EXISTS (SELECT 1 FROM reference WHERE reference.page_id = contribution_count.page_id)
AND NOT EXISTS (SELECT 1 FROM citations WHERE citations.page_id = contribution_count.page_id)
AND ContributorCount = 0
ORDER BY page_title ASC
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.