SQL
x
/*
select count(*) from category where cat_subcats = 0;
select count(distinct cl_to) from categorylinks
join page on cl_from = page_id and page_namespace = 14
where page_title in (select cat_title from category where cat_subcats = 0);
*/
/*
with category_2 as (
select distinct cl_to from categorylinks
join page on cl_from = page_id and page_namespace = 14
where page_title in (select cat_title from category where cat_subcats = 0)
),
category_3 as (
select distinct cl_to from categorylinks
join page on cl_from = page_id and page_namespace = 14
where page_title in (select cl_to from category_2)
)
select count(cat_title) from category where cat_subcats > 0
and cat_title not in (select cl_to from category_2)
and cat_title not in (select cl_to from category_3)
*/
/*
select page_title, category.* from categorylinks
join page on cl_from = page_id and page_namespace = 14
join category on cat_title = cl_to and cat_subcats = 0
-- where cl_to in (select cat_title from category where cat_subcats = 0)
*/
/*
select count(*) from categorylinks
join page on cl_from = page_id and page_namespace = 14;
*/
set max_recursive_iterations=2;
with recursive ancestors (an_from, an_to, path, is_cycle) as (
select page_title, cl_to, CONCAT(page_title, ',', cl_to), FIND_IN_SET(cl_to, page_title) from subcategories
-- join category on page_title = cat_title and cat_subcats = 0
union all
select an_to, cl_to, CONCAT(path, ',', cl_to), FIND_IN_SET(cl_to, path) from subcategories
join ancestors on page_title = an_to and not is_cycle
),
subcategories as (
select replace(page_title, ',', '|') as page_title, replace(cl_to, ',', '|') as cl_to from categorylinks
join page on cl_from = page_id and page_namespace = 14
where page_title not in (
"Noindexed_pages", "Hidden_categories", "Tracking_categories", "Wikipedia_template_categories",
"Container_categories", "Wikipedia_categories", "Commons_category_link_is_on_Wikidata"
)
)
select * from ancestors where is_cycle;
/*
with recursive ancestors (an_from, an_to, path) as (
select cat_title, cl_to, CONCAT(cat_title, ',', cl_to) from subcategories
join category on page_title = cat_title and cat_subcats = 0
union distinct
select an_to, cl_to, CONCAT(an_to, ',', cl_to) from subcategories
join ancestors on page_title = an_to
union distinct
select an_from, cl_to, CONCAT(path, ',', cl_to) from subcategories
join ancestors on page_title = an_to
),
subcategories as (
select page_title, cl_to from categorylinks
join page on cl_from = page_id and page_namespace = 14
limit 100
)
select * from ancestors;
*/
-- select count(cat_title) from category where cat_subcats = 0 and rand() < 0.1;
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.