Fork of Lugnuts underreferenced sports biographies for GSCC by BilledMammal
This query is marked as a draft This query has been published by BilledMammal.

SQL

x
 
WITH RECURSIVE all_cite_temps (page_title, page_id, page_namespace) AS (
  (
    SELECT page_title, page_id, page_namespace
    FROM page
    WHERE page_title = "Citation_templates"
      AND page_namespace = 14
  )
  UNION
  (
    SELECT child.page_title, child.page_id, child.page_namespace
    FROM page AS child
    JOIN categorylinks as childLinks ON child.page_id = childLinks.cl_from
    JOIN all_cite_temps AS parent ON childLinks.cl_to = parent.page_title
    LEFT JOIN page_props ON childLinks.cl_from = pp_page AND pp_propname = "hiddencat"
    WHERE (child.page_namespace = 10 OR child.page_namespace = 14) AND parent.page_namespace = 14 AND pp_propname IS NULL
  )
),
articles AS (
  SELECT DISTINCT(Article.page_title), Article.page_id, Talk.page_id AS talk_id, rev_timestamp, rev_len as "InitialSize", Article.page_len as "CurrentSize", rev_actor
FROM revision_userindex
JOIN page AS Article ON Article.page_id = rev_page AND Article.page_namespace = 0 
JOIN page AS Talk ON Article.page_title = Talk.page_title AND Talk.page_namespace = 1
JOIN categorylinks AS TalkCat ON TalkCat.cl_from = Talk.page_id
WHERE rev_parent_id = 0
    AND Article.page_is_redirect = 0
    AND rev_actor != 482
    AND TalkCat.cl_to = "Sports_and_games_work_group_articles"
    AND Article.page_len < 2500
),
all_cites AS (
  SELECT page_title
  FROM all_cite_temps
  WHERE page_namespace = 10
  AND page_title NOT IN ("Cite_web", "Cricinfo", "Reflist", "Subscription_required", "Webarchive", "One_source", "More_footnotes_needed", "More_citations_needed", "Link_note", "Family_name_explanation/core", "Commons_category-inline", "Commons_category", "Cite_sports-reference", "BLP_sources", "Cite_news")
),
contributions AS (
  SELECT page_title, page_id, talk_id, articles.rev_actor AS creator, InitialSize, CurrentSize, articles.rev_timestamp, childRev.rev_len as "ChildRevLen", parentRev.rev_len as "ParentRevLen", childRev.rev_actor AS editor
  FROM articles
  LEFT JOIN revision_userindex AS childRev ON page_id = childRev.rev_page
  LEFT JOIN revision AS parentRev ON childRev.rev_parent_id = parentRev.rev_id
  LEFT JOIN change_tag ON childRev.rev_id = ct_rev_id AND ct_tag_id IN (590, 1, 539, 582) #reverted or undo
  WHERE ct_tag_id IS NULL
),
contribution_count AS (
  SELECT page_title, page_id, creator, talk_id, InitialSize, CurrentSize, CAST(CurrentSize AS SIGNED) - CAST(InitialSize AS SIGNED) as Growth, rev_timestamp, SUM(CASE WHEN CAST(ChildRevLen as SIGNED) - CAST(ParentRevLen as SIGNED) > 200 THEN 1 ELSE 0 END) as ContributorCount
  FROM contributions
  WHERE creator != editor
  GROUP BY page_id
),
reference AS (
  SELECT DISTINCT(page_id)
  FROM articles
  JOIN externallinks ON el_from = page_id
  WHERE el_to_domain_index NOT IN ("https://org.archive.web.","https://org.wikidata.www.","https://com.google.www.","https://com.sports-reference.www.","https://org.olympedia.www.","https://com.soccerway.int.","http://com.espncricinfo.www.","https://org.jstor.www.","https://com.google.scholar.","https://com.baseball-reference.www.","https://com.australianfootball.","https://com.baseball-reference.www.","https://com.australianfootball.","https://com.olympics.","https://com.national-football-teams.www.","https://net.worldfootball.www.","https://org.iaaf.www.","http://com.afltables.","https://com.cricketarchive.","https://com.pro-football-reference.www.","http://com.cyclingarchives.www.","https://org.viaf.","http://com.nfl.www.","https://org.worldathletics.www.","http://com.eliteprospects.www.","http://com.hockeydb.www.","https://com.procyclingstats.www.","https://com.edwardbetts.","https://today.archive.","https://com.worldrowing.","https://org.isni.","https://com.google.translate.","https://ru.footballfacts.","https://org.rsssf.www.","https://org.olympic.www.","http://com.newcastlefans.neilbrown.www.","https://com.fis-ski.www.","https://eu.footballdatabase.www.","https://com.itftennis.www.","https://com.google.books.","http://com.barryhugmansfootballers.","https://com.rio2016.www.","https://info.d-nb.","https://gov.loc.id.","http://com.procyclingstats.www.","http://com.eurohockey.www.","http://com.london2012.www.","https://com.seamheads.www.","http://com.justsportsstats.www.","https://com.deepl.","https://com.basketball-reference.www.","https://jp.or.j-league.data.","https://info.eu-football.","https://org.uci.www.","https://com.atptour.www.","http://uk.co.enfa.www.","https://org.paralympic.www.","https://org.uww.whatsmat.","https://com.fiba.archive.","https://de.fussballdaten.www.","https://com.boxrec.","http://org.fivb.www.","https://com.chessgames.www.","http://org.rugbyleagueproject.www.","https://org.webcitation.www.","http://org.olimpbase.www.","https://com.google.news.","http://com.gbrathletics.www.","http://com.worldrowing.www.","https://com.findagrave.www.","https://fr.bnf.data.","https://fr.bnf.catalogue.","http://uk.co.espn.en.","https://com.judoinside.www.","https://com.uefa.www.","https://com.fide.ratings.","https://se.sok.","http://com.databaseolympics.www.","https://com.fifa.www.","http://net.memoire-du-cyclisme.www.","https://org.archive.","https://com.365chess.www.","http://com.eurohandball.www.","https://fr.idref.www.","http://com.eliteprospects.","https://com.thecgf.","https://com.worldaquatics.www.","https://cz.olympijskytym.www.","http://de.kicker.www.","http://com.squashinfo.www.","http://pl.90minut.www.","https://ar.com.bdfa.www.","http://com.todor66.www.","https://org.sailing.www.","https://com.soccerbase.www.","http://nl.bibliotheken.data.","http://com.profootballarchives.www.")
  OR (
    el_to_domain_index IN ("https://com.espncricinfo.www.", "http://com.espncricinfo.www.", "https://com.google.books.", "http://com.google.books.", "https://com.google.news./newspaper", "http://com.google.news./newspapers")
    AND (LEFT(el_to_path, 6) IN ("wisden", "story/", "books?", "newspa")
    OR el_to_path LIKE "%/content/story/%")
    )
),
citations AS (
  SELECT DISTINCT(page_id)
  FROM articles
  JOIN templatelinks ON tl_from = page_id
  JOIN linktarget ON tl_target_id = lt_id
  WHERE EXISTS (SELECT 1 FROM all_cites WHERE linktarget.lt_title = all_cites.page_title)
  OR lt_title IN ("ISBN", "Isbn")
)
SELECT page_title, CurrentSize, rev_timestamp, actor_name
FROM contribution_count
JOIN actor ON creator = actor_id
WHERE NOT EXISTS (SELECT 1 FROM reference WHERE reference.page_id = contribution_count.page_id)
AND NOT EXISTS (SELECT 1 FROM citations WHERE citations.page_id = contribution_count.page_id)
AND ContributorCount = 0
ORDER BY page_title ASC
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.

Checking query status...