Toggle navigation
Home
New Query
Recent Queries
Discuss
Database tables
Database names
MediaWiki
Wikibase
Replicas browser and optimizer
Login
History
Fork
This query is marked as a draft
This query has been published
by
Mdaniels5757
.
Toggle Highlighting
SQL
DELIMITER ; DROP FUNCTION IF EXISTS urlencode; DELIMITER | CREATE FUNCTION URLENCODE(str VARCHAR(4096) CHARSET utf8) RETURNS VARCHAR(4096) CHARSET utf8 DETERMINISTIC CONTAINS SQL BEGIN -- the individual character we are converting in our loop -- NOTE: must be VARCHAR even though it won't vary in length -- CHAR(1), when used with SUBSTRING, made spaces '' instead of ' ' DECLARE sub VARCHAR(1) CHARSET utf8; -- the ordinal value of the character (i.e. ñ becomes 50097) DECLARE val BIGINT DEFAULT 0; -- the substring index we use in our loop (one-based) DECLARE ind INT DEFAULT 1; -- the integer value of the individual octet of a character being encoded -- (which is potentially multi-byte and must be encoded one byte at a time) DECLARE oct INT DEFAULT 0; -- the encoded return string that we build up during execution DECLARE ret VARCHAR(4096) DEFAULT ''; -- our loop index for looping through each octet while encoding DECLARE octind INT DEFAULT 0; IF ISNULL(str) THEN RETURN NULL; ELSE SET ret = ''; -- loop through the input string one character at a time - regardless -- of how many bytes a character consists of WHILE ind <= CHAR_LENGTH(str) DO SET sub = MID(str, ind, 1); SET val = ORD(sub); -- these values are ones that should not be converted -- see http://tools.ietf.org/html/rfc3986 IF NOT (val BETWEEN 48 AND 57 OR -- 48-57 = 0-9 val BETWEEN 65 AND 90 OR -- 65-90 = A-Z val BETWEEN 97 AND 122 OR -- 97-122 = a-z -- 45 = hyphen, 46 = period, 95 = underscore, 126 = tilde val IN (45, 46, 95, 126)) THEN -- This is not an "unreserved" char and must be encoded: -- loop through each octet of the potentially multi-octet character -- and convert each into its hexadecimal value -- we start with the high octect because that is the order that ORD -- returns them in - they need to be encoded with the most significant -- byte first SET octind = OCTET_LENGTH(sub); WHILE octind > 0 DO -- get the actual value of this octet by shifting it to the right -- so that it is at the lowest byte position - in other words, make -- the octet/byte we are working on the entire number (or in even -- other words, oct will no be between zero and 255 inclusive) SET oct = (val >> (8 * (octind - 1))); -- we append this to our return string with a percent sign, and then -- a left-zero-padded (to two characters) string of the hexadecimal -- value of this octet) SET ret = CONCAT(ret, '%', LPAD(HEX(oct), 2, 0)); -- now we need to reset val to essentially zero out the octet that we -- just encoded so that our number decreases and we are only left with -- the lower octets as part of our integer SET val = (val & (POWER(256, (octind - 1)) - 1)); SET octind = (octind - 1); END WHILE; ELSE -- this character was not one that needed to be encoded and can simply be -- added to our return string as-is SET ret = CONCAT(ret, sub); END IF; SET ind = (ind + 1); END WHILE; END IF; RETURN ret; END; | DELIMITER ; SELECT CONCAT("File:", img_name) AS curr_title, CONCAT("File:", REPLACE(img_name, "%_20", "_")) AS suggested_title, urlencode(CONCAT("https://commons.wikimedia.org/w/index.php?title=Special:MovePage", "&wpOldTitle=File:", img_name, "&wpNewTitle=", CONCAT("File:", REPLACE(img_name, "%_20", "_")), "&wpReason=[[Commons:File_renaming#FR6|Criterion 6]]: fixing character handling issue(s)")) FROM image WHERE img_name LIKE "%\%_20%" LIMIT 10;
By running queries you agree to the
Cloud Services Terms of Use
and you irrevocably agree to release your SQL under
CC0 License
.
Submit Query
Stop Query
All SQL code is licensed under
CC0 License
.
Checking query status...