Fork of Page creation stats, 2024 by Cryptic
This query is marked as a draft This query has been published by Cryptic.

SQL

x
 
-- this doesn't really need a CSE, but it's a handy way to relabel the columns and make the main query more readable
WITH newpages(np_is_user, np_editcount, np_autopatrolled, np_administrator, np_newpagepatroller) AS
(
  SELECT user_id, COALESCE(user_editcount, 0), autopatrolled.ug_user, administrator.ug_user, newpagepatroller.ug_user
  FROM revision
  JOIN actor_revision ON actor_id = rev_actor
  LEFT JOIN user ON user_id = actor_user
  LEFT JOIN user_groups AS autopatrolled ON autopatrolled.ug_user = actor_user AND autopatrolled.ug_group = 'autoreviewer'
  LEFT JOIN user_groups AS administrator ON administrator.ug_user = actor_user AND administrator.ug_group = 'sysop'
  LEFT JOIN user_groups AS newpagepatroller ON newpagepatroller.ug_user = actor_user AND newpagepatroller.ug_group = 'patroller'
  LEFT JOIN user_groups AS bots ON bots.ug_user = actor_user AND bots.ug_group = 'bot'
  JOIN page ON page_id = rev_page
  WHERE rev_timestamp LIKE '2024%'
    AND rev_parent_id = 0
    AND page_namespace = 0
    AND page_is_redirect = 0
    AND bots.ug_group IS NULL
)
-- straight_join so that this uses the index on revision, which is much faster than the one on page despite the query pessimizer estimating
-- 184m rows for this and only 32m for page
SELECT STRAIGHT_JOIN COUNT(*) AS 'total',
       100 AS '% total',
                      SUM(np_is_user IS NULL) AS   'non-user',
       100/COUNT(*) * SUM(np_is_user IS NULL) AS '% non-user',
                      SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NOT NULL) AS   'autopatrolled',
       100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NOT NULL) AS '% autopatrolled',
                      SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_administrator IS NULL) AS   'non-autopatrolled, non-admin',
       100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_administrator IS NULL) AS '% non-autopatrolled, non-admin',
                      SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_administrator IS NOT NULL) AS   'non-autopatrolled, admin',
       100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_administrator IS NOT NULL) AS '% non-autopatrolled, admin',
                      SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_editcount < 10000) AS   'non-autopatrolled, <10k edits',
       100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_editcount < 10000) AS '% non-autopatrolled, <10k edits',
                      SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_editcount >= 10000) AS   'non-autopatrolled, >=10k edits',
       100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_editcount >= 10000) AS '% non-autopatrolled, >=10k edits',
                      SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_newpagepatroller IS NULL) AS   'non-autopatrolled, non-npp',
       100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_newpagepatroller IS NULL) AS '% non-autopatrolled, non-npp',
                      SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_newpagepatroller IS NOT NULL) AS   'non-autopatrolled, npp',
       100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_newpagepatroller IS NOT NULL) AS '% non-autopatrolled, npp'
FROM newpages;
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.

Checking query status...