Fork of
Page creation stats, 2024
by Cryptic
This query is marked as a draft
This query has been published
by Cryptic.
SQL
x
-- this doesn't really need a CSE, but it's a handy way to relabel the columns and make the main query more readable
WITH newpages(np_is_user, np_editcount, np_autopatrolled, np_administrator, np_newpagepatroller) AS
(
SELECT user_id, COALESCE(user_editcount, 0), autopatrolled.ug_user, administrator.ug_user, newpagepatroller.ug_user
FROM revision
JOIN actor_revision ON actor_id = rev_actor
LEFT JOIN user ON user_id = actor_user
LEFT JOIN user_groups AS autopatrolled ON autopatrolled.ug_user = actor_user AND autopatrolled.ug_group = 'autoreviewer'
LEFT JOIN user_groups AS administrator ON administrator.ug_user = actor_user AND administrator.ug_group = 'sysop'
LEFT JOIN user_groups AS newpagepatroller ON newpagepatroller.ug_user = actor_user AND newpagepatroller.ug_group = 'patroller'
LEFT JOIN user_groups AS bots ON bots.ug_user = actor_user AND bots.ug_group = 'bot'
JOIN page ON page_id = rev_page
WHERE rev_timestamp LIKE '2024%'
AND rev_parent_id = 0
AND page_namespace = 0
AND page_is_redirect = 0
AND bots.ug_group IS NULL
)
-- straight_join so that this uses the index on revision, which is much faster than the one on page despite the query pessimizer estimating
-- 184m rows for this and only 32m for page
SELECT STRAIGHT_JOIN COUNT(*) AS 'total',
100 AS '% total',
SUM(np_is_user IS NULL) AS 'non-user',
100/COUNT(*) * SUM(np_is_user IS NULL) AS '% non-user',
SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NOT NULL) AS 'autopatrolled',
100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NOT NULL) AS '% autopatrolled',
SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_administrator IS NULL) AS 'non-autopatrolled, non-admin',
100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_administrator IS NULL) AS '% non-autopatrolled, non-admin',
SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_administrator IS NOT NULL) AS 'non-autopatrolled, admin',
100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_administrator IS NOT NULL) AS '% non-autopatrolled, admin',
SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_editcount < 10000) AS 'non-autopatrolled, <10k edits',
100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_editcount < 10000) AS '% non-autopatrolled, <10k edits',
SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_editcount >= 10000) AS 'non-autopatrolled, >=10k edits',
100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_editcount >= 10000) AS '% non-autopatrolled, >=10k edits',
SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_newpagepatroller IS NULL) AS 'non-autopatrolled, non-npp',
100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_newpagepatroller IS NULL) AS '% non-autopatrolled, non-npp',
SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_newpagepatroller IS NOT NULL) AS 'non-autopatrolled, npp',
100/COUNT(*) * SUM(np_is_user IS NOT NULL AND np_autopatrolled IS NULL AND np_newpagepatroller IS NOT NULL) AS '% non-autopatrolled, npp'
FROM newpages;
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.