Fork of
Wikimedia incubator stats
by KCVelaga
This query is marked as a draft
This query has been published
by CMyrick-WMF.
SQL
x
WITH base_table AS (
SELECT DISTINCT
rev_id,
rev_actor,
rev_timestamp,
rev_len,
rev_parent_id,
page_namespace,
page_id,
actor_id,
REGEXP_SUBSTR(page_title, 'W[a-z]/[a-z]+') AS prefix,
rc_new_len - rc_old_len AS byte_diff
FROM
revision rev
JOIN
page
ON rev.rev_page = page.page_id
JOIN
actor
ON rev.rev_actor = actor.actor_id
LEFT OUTER JOIN
recentchanges rc
ON rev.rev_id = rc.rc_this_oldid
JOIN
user
ON actor.actor_id = user.user_id
WHERE
page_namespace IN (0, 1, 10, 11, 14, 15, 828, 829) AND
-- explicity remove admins who often make edits across multiple incubating projects
NOT user_name IN ('MF-Warburg', 'Jon Harald Søby', 'Minorax')
HAVING
prefix <> ''
ORDER BY
rev_id DESC)
SELECT
prefix,
SUM(CASE WHEN byte_diff < 0 THEN byte_diff ELSE 0 END) AS bytes_removed_30D,
SUM(CASE WHEN byte_diff >= 0 THEN byte_diff ELSE 0 END) AS bytes_added_30D,
COUNT(DISTINCT actor_id) AS actor_count,
COUNT(DISTINCT rev_id) AS edit_count,
COUNT(DISTINCT page_id) AS pages_count
FROM
base_table
GROUP BY
prefix
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.