Fork of Lugnuts Olympian Biographies Limited - All database sources by BilledMammal
This query is marked as a draft This query has been published by BilledMammal.

SQL

x
 
WITH RECURSIVE all_citation_templates (page_title, page_id, page_namespace) AS (
  (
    SELECT page_title, page_id, page_namespace
    FROM page
    WHERE page_title = "Citation_templates"
      AND page_namespace = 14
  )
  UNION
  (
    SELECT child.page_title, child.page_id, child.page_namespace
    FROM page AS child
    JOIN categorylinks as childLinks ON child.page_id = childLinks.cl_from
    JOIN all_citation_templates AS parent ON childLinks.cl_to = parent.page_title
    LEFT JOIN page_props ON childLinks.cl_from = pp_page AND pp_propname = "hiddencat"
    WHERE (child.page_namespace = 10
      OR child.page_namespace = 14)
      AND parent.page_namespace = 14
      AND pp_propname IS NULL
  )
),
articles AS (
  SELECT DISTINCT(Article.page_title), Article.page_id, rev_timestamp, rev_len as "InitialSize", Article.page_len as "CurrentSize"
FROM revision_userindex
JOIN page AS Article ON Article.page_id = rev_page AND Article.page_namespace = 0 
JOIN page AS Talk ON Article.page_title = Talk.page_title AND Talk.page_namespace = 1
JOIN categorylinks AS ArticleCat ON ArticleCat.cl_from = Article.page_id
JOIN categorylinks AS TalkCat ON TalkCat.cl_from = Talk.page_id
WHERE rev_parent_id = 0
    AND Article.page_is_redirect = 0
    AND rev_actor = 482
    AND (TalkCat.cl_to LIKE '%iography%')
    AND (ArticleCat.cl_to LIKE '%ricketers')
    AND Article.page_len < 2500
),
#Excludes templates that are part of the group, but don't indicate the possible presence of a relevant source
all_citation_templates_titles AS (
  SELECT page_title
  FROM all_citation_templates
  WHERE page_namespace = 10
  AND page_title NOT IN ("Cite_web", 
                         "Cricinfo", 
                         "Reflist",
                         "Subscription_required",
                         "Webarchive",
                         "One_source",
                         "More_footnotes_needed",
                         "More_citations_needed",
                         "Link_note",
                         "Family_name_explanation/core",
                         "Commons_category-inline",
                         "Commons_category",
                         "Cite_sports-reference",
                         "BLP_sources",
                         "Cite_news")
),
articledaycount AS (
  SELECT LEFT(rev_timestamp, 8) AS date, COUNT(*) as datecount
    FROM revision_userindex
    JOIN page ON page_id = rev_page
    WHERE rev_parent_id = 0
        AND page_is_redirect = 0
        AND rev_actor = 482
        AND page_namespace = 0
    GROUP BY date
),
contributions AS (
  SELECT articles.page_title, articles.page_id, articles.InitialSize, articles.CurrentSize, articles.rev_timestamp, childRev.rev_len as "ChildRevLen", parentRev.rev_len as "ParentRevLen", childRev.rev_actor
  FROM articles
  LEFT JOIN revision_userindex AS childRev ON page_id = childRev.rev_page
  LEFT JOIN revision AS parentRev ON childRev.rev_parent_id = parentRev.rev_id
  LEFT JOIN change_tag ON childRev.rev_id = ct_rev_id AND ct_tag_id IN (590, 1, 539, 582) #reverted or undo
  WHERE ct_tag_id IS NULL
),
contribution_count AS (
  SELECT page_title, page_id, InitialSize, CurrentSize, CAST(CurrentSize AS SIGNED) - CAST(InitialSize AS SIGNED) as Growth, rev_timestamp, SUM(CASE WHEN CAST(ChildRevLen as SIGNED) - CAST(ParentRevLen as SIGNED) > 200 THEN 1 ELSE 0 END) as ContributorCount
  FROM contributions
  WHERE rev_actor != 482
  GROUP BY page_id
),
reference AS (
  SELECT DISTINCT(page_id)
  FROM articles
  JOIN externallinks ON el_from = page_id
  WHERE el_to_domain_index NOT IN (
    "http://com.google.www.",
    "https://com.google.www.",
    "https://org.jstor.www.",
    "https://org.wikidata.www.", 
    "https://org.archive.web.",
    "https://org.archive.",
    "https://com.google.scholar.",
    "https://com.google.translate.",
    "https://com.deepl.",
    "http://com.espncricinfo.static.",
    "http://com.espncricinfo.www.",
    "https://com.espncricinfo.www.",
    "https://com.cricketarchive."
    )
  OR el_to_domain_index IN (
    "https://com.espncricinfo.www.", 
    "http://com.espncricinfo.www.", 
    "https://com.google.books.", 
    "http://com.google.books.", 
    "https://com.google.news./newspaper", 
    "http://com.google.news./newspapers")
    AND (LEFT(6, el_to_path) IN ("wisden", "story/", "books?", "newspa")
    OR el_to_path LIKE "%/content/story/%"
    )
),
potential_pages AS (
  SELECT page_title, page_id, InitialSize, CurrentSize, Growth, rev_timestamp
  FROM contribution_count
  WHERE page_id NOT IN (SELECT * FROM reference)
  AND ContributorCount = 0
),
citations AS (
  SELECT page_id
  FROM potential_pages
  JOIN templatelinks ON tl_from = page_id
  JOIN linktarget ON tl_target_id = lt_id
  WHERE lt_title IN (SELECT * FROM all_citation_templates_titles)
  OR lt_title IN ("ISBN", "Isbn") #ISBN template can be used seperately from a citation template
)
SELECT page_title, InitialSize, CurrentSize, (CAST(CurrentSize AS SIGNED) - CAST(InitialSize AS SIGNED)) as Growth, rev_timestamp, datecount
FROM potential_pages
JOIN articledaycount ON date = LEFT(rev_timestamp, 8)
WHERE page_id NOT IN (SELECT * FROM citations)
ORDER BY page_title ASC
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.

Checking query status...