Fork of
Pages where title-with-en-dashes and title-with-ASCII-hypen aren't the same (ignoring fragments)
by Anomie
This query is marked as a draft
This query has been published
by Pppery.
SQL
x
SELECT
CONCAT(CASE p1.page_namespace & ~1 WHEN 0 THEN '' WHEN 4 THEN 'Wikipedia' WHEN 6 THEN 'File' WHEN 8 THEN 'MediaWiki' WHEN 10 THEN 'Template' WHEN 12 THEN 'Help' WHEN 100 THEN 'Portal' WHEN 108 THEN 'Book' WHEN 710 THEN 'TimedText' ELSE '???' END, CASE WHEN p1.page_namespace=0 THEN '' WHEN p1.page_namespace=1 THEN 'Talk:' WHEN p1.page_namespace & 1 THEN ' talk:' ELSE ':' END, REPLACE(p1.page_title,'_',' ')) AS fancyTitle,
CONCAT(CASE r1.rd_namespace & ~1 WHEN 0 THEN '' WHEN 4 THEN 'Wikipedia' WHEN 6 THEN 'File' WHEN 8 THEN 'MediaWiki' WHEN 10 THEN 'Template' WHEN 12 THEN 'Help' WHEN 100 THEN 'Portal' WHEN 108 THEN 'Book' WHEN 710 THEN 'TimedText' ELSE '???' END, CASE WHEN r1.rd_namespace=0 THEN '' WHEN r1.rd_namespace=1 THEN 'Talk:' WHEN r1.rd_namespace & 1 THEN ' talk:' ELSE ':' END, REPLACE(r1.rd_title,'_',' '), IF(r1.rd_fragment!='',CONCAT('#', r1.rd_fragment), '')) AS fancyTarget,
CONCAT(CASE p2.page_namespace & ~1 WHEN 0 THEN '' WHEN 4 THEN 'Wikipedia' WHEN 6 THEN 'File' WHEN 8 THEN 'MediaWiki' WHEN 10 THEN 'Template' WHEN 12 THEN 'Help' WHEN 100 THEN 'Portal' WHEN 108 THEN 'Book' WHEN 710 THEN 'TimedText' ELSE '???' END, CASE WHEN p2.page_namespace=0 THEN '' WHEN p2.page_namespace=1 THEN 'Talk:' WHEN p2.page_namespace & 1 THEN ' talk:' ELSE ':' END, REPLACE(p2.page_title,'_',' ')) AS asciiTitle,
CONCAT(CASE r2.rd_namespace & ~1 WHEN 0 THEN '' WHEN 4 THEN 'Wikipedia' WHEN 6 THEN 'File' WHEN 8 THEN 'MediaWiki' WHEN 10 THEN 'Template' WHEN 12 THEN 'Help' WHEN 100 THEN 'Portal' WHEN 108 THEN 'Book' WHEN 710 THEN 'TimedText' ELSE '???' END, CASE WHEN r2.rd_namespace=0 THEN '' WHEN r2.rd_namespace=1 THEN 'Talk:' WHEN r2.rd_namespace & 1 THEN ' talk:' ELSE ':' END, REPLACE(r2.rd_title,'_',' '), IF(r2.rd_fragment!='',CONCAT('#', r2.rd_fragment), '')) AS asciiTarget
FROM page as p1
JOIN page AS p2 ON ( p1.page_namespace = p2.page_namespace AND REGEXP_REPLACE( CONVERT(p1.page_title USING utf8), '[–]', '-' ) = p2.page_title )
LEFT JOIN redirect AS r1 ON(r1.rd_from=p1.page_id)
LEFT JOIN redirect AS r2 ON(r2.rd_from=p2.page_id)
WHERE p1.page_title LIKE '%–%'
AND p1.page_namespace NOT IN (2,3,14,15,118,119,446,447,828,829,2300,2301,2302,2303,2600,2601)
AND (
COALESCE(r2.rd_namespace,p2.page_namespace) != COALESCE(r1.rd_namespace,p1.page_namespace)
OR COALESCE(r2.rd_title,p2.page_title) != COALESCE(r1.rd_title,p1.page_title)
)
and p1.page_namespace=0
ORDER BY p1.page_namespace, p1.page_title
LIMIT 10000;
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.