SQL
x
cx_topics_data <- wmfdata::query_hive(query)
# save data
write.csv(cx_topics_data,
file = 'Data/cx_topics_data.csv',
row.names = FALSE
)
# load in data
cx_topics_data <-
read.csv(
file = 'Data/cx_topics_data.csv',
header = TRUE,
sep = ",",
stringsAsFactors = FALSE
)
# reformat and clean data
cx_topics_data$date <- as.Date(cx_topics_data$date, format = "%Y-%m-%d")
# set factor levels
cx_topics_data$main_topic <-
factor(
cx_topics_data$main_topic,
levels = c("Geography", "Culture", "History_and_Society", "STEM"),
labels = c("Geography", "Culture", "History and Society", "STEM")
)
# set factor level and rename platform
# note there is no specific tag for desktop in mediawiki; however, the majority of these instances are desktop. This may include some API type edit tools but they are rare.
cx_topics_data$platform <-
factor(
cx_topics_data$platform,
levels = c("Mobile web", "Other"),
labels = c("Mobile Web", "Desktop")
)
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.