Created
May 12, 2016 20:56
-
-
Save Aminadav/33d90cb99c26298c48f670b8ffac39c3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- test by do | |
-- select extract_json_value('{"a":["a","2"]}','(/a)') | |
-- common schema | |
delimiter // | |
drop function if exists json_to_xml// | |
create function json_to_xml( | |
json_text text charset utf8 | |
) returns text charset utf8 | |
comment 'Transforms JSON to XML' | |
language SQL | |
deterministic | |
modifies sql data | |
sql security invoker | |
begin | |
declare v_from, v_old_from int unsigned; | |
declare v_token text; | |
declare v_level int; | |
declare v_state, expect_state varchar(255); | |
declare _json_tokens_id int unsigned default 0; | |
declare is_lvalue, is_rvalue tinyint unsigned; | |
declare scope_stack text charset ascii; | |
declare xml text charset utf8; | |
declare xml_nodes, xml_node text charset utf8; | |
set json_text := trim_wspace(json_text); | |
set expect_state := 'object_begin'; | |
set is_lvalue := true; | |
set is_rvalue := false; | |
set scope_stack := ''; | |
set xml_nodes := ''; | |
set xml_node := ''; | |
set xml := ''; | |
get_token_loop: repeat | |
set v_old_from = v_from; | |
call _get_json_token(json_text, v_from, v_level, v_token, 1, v_state); | |
set _json_tokens_id := _json_tokens_id + 1; | |
if v_state = 'whitespace' then | |
iterate get_token_loop; | |
end if; | |
if v_level < 0 then | |
return null; | |
-- call throw('Negative nesting level found in _get_json_tokens'); | |
end if; | |
if v_state = 'start' and scope_stack = '' then | |
leave get_token_loop; | |
end if; | |
if FIND_IN_SET(v_state, expect_state) = 0 then | |
return null; | |
-- call throw(CONCAT('Expected ', expect_state, '. Got ', v_state)); | |
end if; | |
if v_state = 'array_end' and left(scope_stack, 1) = 'o' then | |
return null; | |
-- call throw(CONCAT('Missing "}". Found ', v_state)); | |
end if; | |
if v_state = 'object_end' and left(scope_stack, 1) = 'a' then | |
return null; | |
-- call throw(CONCAT('Missing "]". Found ', v_state)); | |
end if; | |
if v_state = 'alpha' and lower(v_token) not in ('true', 'false', 'null') then | |
return null; | |
-- call throw(CONCAT('Unsupported literal: ', v_token)); | |
end if; | |
set is_rvalue := false; | |
case | |
when v_state = 'object_begin' then set expect_state := 'string', scope_stack := concat('o', scope_stack), is_lvalue := true; | |
when v_state = 'array_begin' then set expect_state := 'string,object_begin', scope_stack := concat('a', scope_stack), is_lvalue := false; | |
when v_state = 'string' and is_lvalue then set expect_state := 'colon', xml_node := v_token; | |
when v_state = 'colon' then set expect_state := 'string,number,alpha,object_begin,array_begin', is_lvalue := false; | |
when FIND_IN_SET(v_state, 'string,number,alpha') and not is_lvalue then set expect_state := 'comma,object_end,array_end', is_rvalue := true; | |
when v_state = 'object_end' then set expect_state := 'comma,object_end,array_end', scope_stack := substring(scope_stack, 2); | |
when v_state = 'array_end' then set expect_state := 'comma,object_end,array_end', scope_stack := substring(scope_stack, 2); | |
when v_state = 'comma' and left(scope_stack, 1) = 'o' then set expect_state := 'string', is_lvalue := true; | |
when v_state = 'comma' and left(scope_stack, 1) = 'a' then set expect_state := 'string,object_begin', is_lvalue := false; | |
end case; | |
set xml_node := unquote(xml_node); | |
if v_state = 'object_begin' then | |
if substring_index(xml_nodes, ',', 1) != '' then | |
set xml := concat(xml, '<', substring_index(xml_nodes, ',', 1), '>'); | |
end if; | |
set xml_nodes := concat(',', xml_nodes); | |
end if; | |
if v_state = 'string' and is_lvalue then | |
if left(xml_nodes, 1) = ',' then | |
set xml_nodes := concat(xml_node, xml_nodes); | |
else | |
set xml_nodes := concat(xml_node, substring(xml_nodes, locate(',', xml_nodes))); | |
end if; | |
end if; | |
if is_rvalue then | |
set xml := concat(xml, '<', xml_node, '>', encode_xml(unquote(v_token)), '</', xml_node, '>'); | |
end if; | |
if v_state = 'object_end' then | |
set xml_nodes := substring(xml_nodes, locate(',', xml_nodes) + 1); | |
if substring_index(xml_nodes, ',', 1) != '' then | |
set xml := concat(xml, '</', substring_index(xml_nodes, ',', 1), '>'); | |
end if; | |
end if; | |
until | |
v_old_from = v_from | |
end repeat; | |
return xml; | |
end; | |
// | |
delimiter ; | |
-- | |
-- | |
-- | |
delimiter // | |
drop function if exists extract_json_value// | |
create function extract_json_value( | |
json_text text charset utf8, | |
xpath text charset utf8 | |
) returns text charset utf8 | |
comment 'Extracts JSON value via XPath' | |
language SQL | |
deterministic | |
modifies sql data | |
sql security invoker | |
begin | |
return ExtractValue(json_to_xml(json_text), xpath); | |
end; | |
// | |
delimiter ; | |
-- | |
-- Trim white space characters on both sides of text. | |
-- As opposed to the standard TRIM() function, which only trims | |
-- strict space characters (' '), trim_wspace() also trims new line, | |
-- tab and backspace characters | |
-- | |
-- example: | |
-- | |
-- SELECT trim_wspace('\n a b c \n ') | |
-- Returns: 'a b c' | |
-- | |
DELIMITER $$ | |
DROP FUNCTION IF EXISTS trim_wspace $$ | |
CREATE FUNCTION trim_wspace(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8 | |
DETERMINISTIC | |
NO SQL | |
SQL SECURITY INVOKER | |
COMMENT 'Trim whitespace characters on both sides' | |
begin | |
declare len INT UNSIGNED DEFAULT 0; | |
declare done TINYINT UNSIGNED DEFAULT 0; | |
if txt IS NULL then | |
return txt; | |
end if; | |
while not done do | |
set len := CHAR_LENGTH(txt); | |
set txt = trim(' ' FROM txt); | |
set txt = trim('\r' FROM txt); | |
set txt = trim('\n' FROM txt); | |
set txt = trim('\t' FROM txt); | |
set txt = trim('\b' FROM txt); | |
if CHAR_LENGTH(txt) = len then | |
set done := 1; | |
end if; | |
end while; | |
return txt; | |
end $$ | |
DELIMITER ; | |
delimiter // | |
set names utf8 | |
// | |
drop procedure if exists _get_json_token; | |
// | |
create procedure _get_json_token( | |
in p_text text charset utf8 | |
, inout p_from int unsigned | |
, inout p_level int | |
, out p_token text charset utf8 | |
, in allow_script_tokens int | |
, inout p_state enum( | |
'alpha' | |
, 'alphanum' | |
, 'colon' | |
, 'comma' | |
, 'decimal' | |
, 'error' | |
, 'integer' | |
, 'number' | |
, 'minus' | |
, 'object_begin' | |
, 'object_end' | |
, 'array_begin' | |
, 'array_end' | |
, 'start' | |
, 'string' | |
, 'whitespace' | |
) | |
) | |
comment 'Reads a token according to lexical rules for JSON' | |
language SQL | |
deterministic | |
no sql | |
sql security invoker | |
begin | |
declare v_length int unsigned default character_length(p_text); | |
declare v_char, v_lookahead, v_quote_char varchar(1) charset utf8; | |
declare v_from int unsigned; | |
declare negative_number bool default false; | |
if p_from is null then | |
set p_from = 1; | |
end if; | |
if p_level is null then | |
set p_level = 0; | |
end if; | |
if p_state = 'object_end' then | |
set p_level = p_level - 1; | |
end if; | |
if p_state = 'array_end' and allow_script_tokens then | |
set p_level = p_level - 1; | |
end if; | |
set v_from = p_from; | |
set p_token = '' | |
, p_state = 'start'; | |
my_loop: while v_from <= v_length do | |
set v_char = substr(p_text, v_from, 1) | |
, v_lookahead = substr(p_text, v_from+1, 1) | |
; | |
if v_char = '-' then | |
set negative_number := true, v_from = v_from + 1; | |
iterate my_loop; | |
end if; | |
state_case: begin case p_state | |
when 'error' then | |
set p_from = v_length; | |
leave state_case; | |
when 'start' then | |
case | |
when v_char = '-' then | |
set p_state = 'minus', v_from = v_from + 1; | |
when v_char between '0' and '9' then | |
set p_state = 'integer'; | |
when v_char between 'A' and 'Z' | |
or v_char between 'a' and 'z' | |
or v_char = '_' then | |
set p_state = 'alpha'; | |
when v_char = ' ' then | |
set p_state = 'whitespace' | |
, v_from = v_length - character_length(ltrim(substring(p_text, v_from))) | |
; | |
leave state_case; | |
when v_char in ('\t', '\n', '\r') then | |
set p_state = 'whitespace'; | |
when v_char = '"' then | |
set p_state = 'string', v_quote_char = v_char; | |
when v_char = '.' then | |
if substr(p_text, v_from + 1, 1) between '0' and '9' then | |
set p_state = 'decimal', v_from = v_from + 1; | |
else | |
set p_state = 'error'; | |
leave my_loop; | |
end if; | |
when v_char = ',' then | |
set p_state = 'comma', v_from = v_from + 1; | |
leave my_loop; | |
when v_char = ':' then | |
set p_state = 'colon', v_from = v_from + 1; | |
leave my_loop; | |
when v_char = '{' then | |
set p_state = 'object_begin', v_from = v_from + 1, p_level = p_level + 1; | |
leave my_loop; | |
when v_char = '}' then | |
set p_state = 'object_end', v_from = v_from + 1; | |
leave my_loop; | |
when v_char = '[' then | |
set p_state = 'array_begin', v_from = v_from + 1, p_level = p_level + 1; | |
leave my_loop; | |
when v_char = ']' then | |
set p_state = 'array_end', v_from = v_from + 1; | |
leave my_loop; | |
else | |
set p_state = 'error'; | |
end case; | |
when 'alpha' then | |
case | |
when v_char between 'A' and 'Z' | |
or v_char between 'a' and 'z' | |
or v_char = '_' then | |
leave state_case; | |
when v_char between '0' and '9' then | |
set p_state = 'alphanum'; | |
else | |
leave my_loop; | |
end case; | |
when 'alphanum' then | |
case | |
when v_char between 'A' and 'Z' | |
or v_char between 'a' and 'z' | |
or v_char = '_' | |
or v_char between '0' and '9' then | |
leave state_case; | |
else | |
leave my_loop; | |
end case; | |
when 'integer' then | |
case | |
when v_char between '0' and '9' then | |
leave state_case; | |
when v_char = '.' then | |
set p_state = 'decimal'; | |
else | |
leave my_loop; | |
end case; | |
when 'decimal' then | |
case | |
when v_char between '0' and '9' then | |
leave state_case; | |
else | |
leave my_loop; | |
end case; | |
when 'whitespace' then | |
if v_char not in ('\t', '\n', '\r') then | |
leave my_loop; | |
end if; | |
when 'string' then | |
set v_from = locate(v_quote_char, p_text, v_from); | |
if v_from then | |
if substr(p_text, v_from + 1, 1) = v_quote_char then | |
set v_from = v_from + 1; | |
elseif substr(p_text, v_from - 1, 1) != '\\' then | |
set v_from = v_from + 1; | |
leave my_loop; | |
end if; | |
else | |
set p_state = 'error'; | |
leave my_loop; | |
end if; | |
else | |
leave my_loop; | |
end case; end state_case; | |
set v_from = v_from + 1; | |
end while my_loop; | |
set p_token = substr(p_text, p_from, v_from - p_from) collate utf8_general_ci; | |
set p_from = v_from; | |
if p_state in ('decimal', 'integer') then | |
set p_state := 'number'; | |
end if; | |
if p_state = 'alphanum' then | |
set p_state := 'alpha'; | |
end if; | |
if negative_number and (p_state != 'number') then | |
set p_token := NULL; | |
end if; | |
end; | |
// | |
delimiter ; | |
-- | |
-- Unquotes a given text. | |
-- Removes leading and trailing quoting characters (one of: "'/) | |
-- Unquoting works only if both leading and trailing character are identical. | |
-- There is no nesting or sub-unquoting. | |
-- | |
-- example: | |
-- | |
-- SELECT unquote('\"saying\"') | |
-- Returns: 'saying' | |
-- | |
DELIMITER $$ | |
DROP FUNCTION IF EXISTS unquote $$ | |
CREATE FUNCTION unquote(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8 | |
DETERMINISTIC | |
NO SQL | |
SQL SECURITY INVOKER | |
COMMENT 'Unquotes a given text' | |
begin | |
declare quoting_char VARCHAR(1) CHARSET utf8; | |
declare terminating_quote_escape_char VARCHAR(1) CHARSET utf8; | |
declare current_pos INT UNSIGNED; | |
declare end_quote_pos INT UNSIGNED; | |
if CHAR_LENGTH(txt) < 2 then | |
return txt; | |
end if; | |
set quoting_char := LEFT(txt, 1); | |
if not quoting_char in ('''', '"', '`', '/') then | |
return txt; | |
end if; | |
if txt in ('''''', '""', '``', '//') then | |
return ''; | |
end if; | |
set current_pos := 1; | |
terminating_quote_loop: while current_pos > 0 do | |
set current_pos := LOCATE(quoting_char, txt, current_pos + 1); | |
if current_pos = 0 then | |
-- No terminating quote | |
return txt; | |
end if; | |
if SUBSTRING(txt, current_pos, 2) = REPEAT(quoting_char, 2) then | |
set current_pos := current_pos + 1; | |
iterate terminating_quote_loop; | |
end if; | |
set terminating_quote_escape_char := SUBSTRING(txt, current_pos - 1, 1); | |
if (terminating_quote_escape_char = quoting_char) or (terminating_quote_escape_char = '\\') then | |
-- This isn't really a quote end: the quote is escaped. | |
-- We do nothing; just a trivial assignment. | |
iterate terminating_quote_loop; | |
end if; | |
-- Found terminating quote. | |
leave terminating_quote_loop; | |
end while; | |
if current_pos = CHAR_LENGTH(txt) then | |
return SUBSTRING(txt, 2, CHAR_LENGTH(txt) - 2); | |
end if; | |
return txt; | |
end $$ | |
DELIMITER ; | |
-- | |
-- Encode a given text for XML. | |
-- | |
DELIMITER $$ | |
DROP FUNCTION IF EXISTS encode_xml $$ | |
CREATE FUNCTION encode_xml(txt TEXT CHARSET utf8) RETURNS TEXT CHARSET utf8 | |
DETERMINISTIC | |
NO SQL | |
SQL SECURITY INVOKER | |
COMMENT 'Encode (escape) given text for XML' | |
begin | |
set txt := REPLACE(txt, '&', '&'); | |
set txt := REPLACE(txt, '<', '<'); | |
set txt := REPLACE(txt, '>', '>'); | |
set txt := REPLACE(txt, '"', '"'); | |
set txt := REPLACE(txt, '''', '''); | |
return txt; | |
end $$ | |
DELIMITER ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment