Fork of
List of URL not in TLD-list
by ⵓ
This query is marked as a draft
This query has been published
by ⵓ.
SQL
AخA
WITH p as (
SELECT page.page_id, page.page_title
FROM page
WHERE page.page_namespace =0
)
, TLDh as (
SELECT DISTINCT e.el_to_domain_index, SUBSTRING_INDEX(SUBSTRING_INDEX(e.el_to_domain_index,'.',1),'/',-1) as TLD
FROM externallinks as e INNER JOIN p
ON p.page_id = e.el_from
WHERE LEFT(e.el_to_domain_index,4) ='http'
AND SUBSTRING_INDEX(SUBSTRING_INDEX(e.el_to_domain_index,'.',1),'/',-1)
NOT IN ('V4','aaa','abb','abbott','abudhabi','ac','academy','accountants','actor','ad','ae','aero','af','afl','africa','ag','agency','ai','airforce','akdn','al','alsace','am','amazon','amsterdam','an','anz','ao','app','apple','aq','ar','archi','army','art','as','asia','associates','at','attorney','au','auction','audi','audio','auspost','auto','aw','aws','ax','axa','az','ba','band','bank','bar','barcelona','barclaycard','barclays','barefoot','bargains','basketball','bayern','bb','bbc','bd','be','beauty','beer','berlin','best','bet','bf','bg','bh','bi','bible','bid','bike','bingo','bio','biz','bj','black','blog','bloomberg','blue','bm','bmw','bn','bnpparibas','bo','boats','bond','book','boston','bot','boutique','box','br','bradesco','bridgestone','broadway','brother','brussels','bs','bt','build','builders','business','buzz','bw','by','bz','bzh','ca','cab','cafe','cam','camera','camp','canon','capetown','capital','cards','care','careers','cars','casa','cash','casino','cat','catholic','cc','cd','center','ceo','cern','cf','cfa','cg','ch','charity','chat','cheap','christmas','chrome','church','ci','citic','city','ck','cl','claims','click','clinic','clothing','cloud','club','clubmed','cm','cn','co','coach','codes','coffee','college','cologne','com','community','company','computer','condos','construction','consulting','contact','contractors','cooking','cool','coop','corsica','country','coupons','courses','cr','credit','cricket','crs','cruises','cu','cv','cw','cx','cy','cymru','cyou','cz','dance','data','date','day','de','deals','delivery','democrat','dental','dentist','desi','design','dev','dhl','diamonds','digital','direct','directory','dj','dk','dm','do','doctor','dog','domains','download','durban','dvag','dz','earth','ec','eco','edeka','edu','edu*','education','ee','eg','email','emerck','energy','engineer','engineering','enterprises','epson','equipment','er','es','estate','et','eu','eus','events','exchange','expert','exposed','express','fage','fail','faith','family','fan','fans','farm','fashion','fi','film','finance','financial','fish','fishing','fit','fitness','fj','fk','flights','florist','flowers','fm','fo','foo','football','forsale','forum','foundation','fr','free','frl','frogans','fun','fund','furniture','futbol','fyi','ga','gal','gallery','game','games','garden','gay','gb','gd','gdn','ge','gent','genting','gf','gg','gh','gi','gifts','gives','gl','glass','gle','global','globo','gm','gmbh','gn','godaddy','gold','golf','goo','goog','google','gop','got','gov','gp','gq','gr','graphics','gratis','green','group','gs','gt','gu','guardian','guide','guitars','guru','gw','gy','hamburg','haus','health','healthcare','help','helsinki','hiphop','hisamitsu','hitachi','hiv','hk','hm','hn','hockey','holdings','holiday','honda','horse','hospital','host','hosting','house','how','hr','hsbc','ht','hu','icu','id','ie','ikano','il','im','imamat','imdb','immo','immobilien','in','inc','industries','info','ing','ink','institute','insurance','insure','int','international','invalid','investments','io','iq','ir','irish','is','ismaili','ist','istanbul','it','java','jcb','je','jetzt','jll','jm','jo','jobs','joburg','jp','kaufen','ke','kg','kh','ki','kim','kitchen','kiwi','km','kn','koeln','komatsu','kp','kpmg','kpn','kr','krd','kred','kw','ky','kyoto','kz','la','land','lat','latrobe','law','lawyer','lb','lc','leclerc','legal','lgbt','li','lidl','life','lighting','limited','link','live','lk','llc','loans','local','lol','london','love','lr','ls','lt','ltd','ltda','lu','luxury','lv','ly','ma','madrid','management','mango','market','marketing','markets','mba','mc','md','me','med','media','melbourne','memorial','men','menu','mg','mh','miami','microsoft','mil','mini','mit','mk','ml','mm','mn','mo','mobi','moe','mom','monash','money','monster','moscow','movie','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','nagoya','name','navy','nc','ne','nec','net','network','neustar','new','news','nf','ng','ngo','nhk','ni','nico','ninja','nl','no','np','nr','nrw','ntt','nu','nyc','nz','observer','okinawa','om','one','ong','onion','onl','online','ooo','open','org','org*','organic','osaka','ovh','pa','page','panasonic','paris','partners','parts','party','pe','pet','pf','pg','ph','pharmacy','photo','photography','photos','physio','pics','pictet','pictures','pink','pioneer','pizza','pk','pl','place','plumbing','plus','pm','pn','poker','porn','post','pr','press','pro','productions','promo','properties','property','ps','pt','pub','pw','pwc','py','qa','quebec','quest','racing','radio','re','realestate','realtor','recipes','red','rehab','reise','reit','ren','rentals','repair','report','republican','rest','restaurant','review','reviews','ricoh','rio','rip','ro','rocks','rodeo','rs','ru','rugby','ruhr','run','rw','rwe','ryukyu','sa','saarland','sale','salon','sandvik','sanofi','sap','saxo','sb','sbi','sbs','sc','sca','scholarships','school','schule','schwarz','science','scot','sd','se','seat','security','sener','services','sex','sexy','sg','sh','sharp','shell','shiksha','shoes','shop','shopping','show','si','site','sk','ski','sky','sl','sm','smart','sn','sncf','so','soccer','social','softbank','software','solar','solutions','sony','soy','space','sport','sr','ss','st','statebank','statefarm','stockholm','storage','store','stream','studio','study','style','su','sucks','supply','support','surf','surgery','sv','swiss','sx','sy','sydney','systems','sz','taipei','tatar','tattoo','tax','taxi','tc','td','team','tech','technology','tel','tennis','test','tf','tg','th','theater','theatre','tickets','tips','tires','tirol','tj','tk','tl','tm','tn','to','today','tokyo','tools','top','toshiba','total','tours','town','toyota','toys','tr','trade','training','travel','trust','tt','tube','tv','tvs','tw','tz','ua','ug','uk','university','uno','uol','us','uy','uz','va','vc','ve','vegas','ventures','vet','vg','vi','viajes','video','villas','vin','vip','vision','vlaanderen','vn','vodka','volvo','vote','voyage','vu','w*','wales','wang','watch','weather','webcam','weber','website','wed','wedding','weir','wf','whoswho','wien','wiki','williamhill','win','wine','work','works','world','ws','wtf','xin','xxx','xyz','yahoo','yandex','ye','yoga','yokohama','youtube','yt','yu','za','zm','zone','zuerich','zw'
,'бг','бел','мон','москва','онлайн','орг','рус','рф','сайт','срб','укр','қаз','հայ','ישראל','קום','الجزائر','امارات','شبكة','قطر','مصر','भारत',
/*'বাংলা','ไทย','გე',*/
'中信','中国','台灣','政务','移动','삼성','한국')
)
SELECT DISTINCT CONCAT( '# \t '
, SUBSTRING_INDEX(SUBSTRING_INDEX(externallinks.el_to_domain_index,'.',1),'/',-1)
, ' {{Edit|1=',p.page_title ,'|2=',p.page_title,'}} \t~<nowiki>\t'
, externallinks.el_to_domain_index ,'\t~\t'
, externallinks.el_to_path ,'\t~</nowiki>\t' )as output
FROM externallinks INNER JOIN p
ON p.page_id = externallinks.el_from
INNER JOIN TLDh
ON externallinks.el_to_domain_index = TLDh.el_to_domain_index
ORDER BY 1;
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.