SQL
AخA
query <- "
SELECT
DATE_FORMAT(mwh.event_timestamp, 'yyyy-MM-dd') as `date`,
mwh.wiki_db AS language,
event_user_text AS user_name,
mwh.revision_id AS revision_id,
topics.topic,
tc.main_topic,
tc.sub_topic,
event_user_registration_timestamp AS user_registration_date,
event_user_revision_count,
CASE WHEN array_contains(revision_tags, 'mobile app edit') THEN 'Mobile app'
WHEN array_contains(revision_tags, 'mobile web edit') THEN 'Mobile web'
ELSE 'Other' END AS platform,
CASE WHEN (ARRAY_CONTAINS(revision_tags,'sectiontranslation') AND revision_parent_id > 0) THEN 'expand an article'
WHEN ((ARRAY_CONTAINS(revision_tags,'contenttranslation') OR
ARRAY_CONTAINS(revision_tags, 'sectiontranslation') OR
ARRAY_CONTAINS(revision_tags,'contenttranslaton-v2')) AND revision_parent_id = 0) THEN 'create new article'
ELSE 'overwrite an article'
END AS translation_activity
FROM
wmf.mediawiki_history mwh
INNER JOIN canonical_data.wikis cd
ON mwh.wiki_db = cd.database_code AND
cd.database_group = 'wikipedia'
INNER JOIN isaacj.article_topics_outlinks topics ON (
cd.database_code = topics.wiki_db
AND mwh.page_id = topics.pageid
-- only include topics with a confidence score above 0.15
AND topics.score >= 0.5)
LEFT JOIN cchen.topic_component tc ON topics.topic = tc.topic
WHERE
mwh.event_entity = 'revision'
AND mwh.event_type = 'create'
-- only content mainspace
AND mwh.page_namespace = 0
AND (ARRAY_CONTAINS(revision_tags, 'sectiontranslation')
OR ARRAY_CONTAINS(revision_tags, 'contenttranslation')
OR ARRAY_CONTAINS(revision_tags, 'contenttranslation-v2'))
-- last snapshot where topic data was available
AND mwh.snapshot = '2022-09'
AND topics.snapshot = '2022-09'
-- last year of available data
AND mwh.event_timestamp BETWEEN '2021-09-01' and '2022-09-31'
"
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.