SQL
x
SET @basecat='Gridiron_football_players';
SET max_recursive_iterations=8; -- current maximum depth for this tree; 2777 categories, 49831 pages
WITH RECURSIVE deepcat (subcat, catpath, depth) AS
(
SELECT @basecat, REPLACE(@basecat, '_', ' '), 0
UNION
SELECT page_title, CONCAT(catpath, ' > ', REPLACE(page_title, '_', ' ')), depth + 1
FROM categorylinks
JOIN page ON page_id = cl_from AND page_namespace = 14
JOIN deepcat ON cl_to = subcat
)
SELECT DISTINCT REPLACE(page_title, '_', ' ') AS title,
page_len,
MIN(depth),
MIN(catpath) OVER (PARTITION BY page_title ORDER BY depth ASC) AS 'category path',
GROUP_CONCAT(DISTINCT els.el_to SEPARATOR ' ') AS 'external links'
FROM page
JOIN categorylinks ON cl_from = page_id
JOIN deepcat ON cl_to = subcat
LEFT JOIN externallinks AS els ON els.el_from = page_id
WHERE page_namespace = 0
AND page_is_redirect = 0
AND page_len <= 1500
GROUP BY page_id
ORDER BY MIN(depth) ASC;
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.