如何给MySQL添加自定义语法 ?

技术分享 2年前 (2022-08-05) 0 999+

1 背景

MySQL语法解析器用的bison（即yacc）来实现的，而词法解析是自己来实现的，涉及到的token都在文件lex.h里面，然后通过Lex_input_stream 里面相关的函数，解析client的sql字节流（其中会通过提前构造好的hash表帮助快速找到对应symbol，相关代码在sql_lex_hash.cc里面，转换为token，交给bison进行语法解析。

为了给MySQL添加一个新的语法，我们必须添加新的token（如果有新增），以及增加新的语法（sql_yacc.yy）里面。本文以给create table增加一个新的options为例，来演示如何给MySQL新增一个语法。最终的效果如下：

create table t1 (   id int primary key,   name varchar(100) ) global_partition by hash(id) partitions 10; //global_partition by为新增语法，global_partition为新增token

涉及到的修改文件如下：

sql/lex.h //token sql/parse_tree_nodes.cc sql/parse_tree_nodes.h sql/parse_tree_partitions.cc sql/parse_tree_partitions.h sql/parser_yystype.h sql/sql_yacc.yy

2 新增关键词（token）

文件：sql/lex.h

static const SYMBOL symbols[] = {     /*      Insert new SQL keywords after that commentary (by alphabetical order):     */     //省略部分代码     {SYM("GLOBAL_PARTITION", GLOBAL_PARTITION_SYM)}, //注意按照字典序进行添加。     //省略部分代码 };

按照上面的格式添加即可

3 新增语法

文件：sql/sql_yacc.yy

该文件为bison的语法，关于bison语法可以查看这里。下面凡是注释标有###为新增部分，没有标的注释是为了方便理解

%token<lexer.keyword> GLOBAL_PARTITION_SYM 1309            /* seancheer */  //### 声明上一步添加的token，声明了才可以使用，编号1309选择一个未使用的就行 %type <global_partition_clause> global_partition_clause //### 声明新增加的数据结构，后面会介绍  create_table_stmt:           CREATE opt_temporary TABLE_SYM opt_if_not_exists table_ident           '(' table_element_list ')' opt_create_table_options_etc //最后一个标记在YYSTYPE中对应的是create_table_tail， 后面会介绍           {             $$= NEW_PTN PT_create_table_stmt(YYMEM_ROOT, $1, $2, $4, $5,                                              $7,                                              $9.opt_create_table_options,                                              $9.opt_partitioning,                                              $9.opt_global_partitioning, //### 赋值给对应参数，该构造函数需要新增，后面会介绍                                              $9.on_duplicate,                                              $9.opt_query_expression);           }         | CREATE opt_temporary TABLE_SYM opt_if_not_exists table_ident           opt_create_table_options_etc           {             $$= NEW_PTN PT_create_table_stmt(YYMEM_ROOT, $1, $2, $4, $5,                                              NULL,                                              $6.opt_create_table_options,                                              $6.opt_partitioning,                                              $6.opt_global_partitioning, //### 赋值给对应参数，该构造函数需要新增，后面会介绍                                              $6.on_duplicate,                                              $6.opt_query_expression); //partition相关的语法                                              opt_create_partitioning_etc:           partition_clause opt_duplicate_as_qe //这里是原生的partition表语法           {             $$= $2;             $$.opt_partitioning= $1;           }         | global_partition_clause opt_duplicate_as_qe //### 此处是新增的global_partition语法，           {             $$= $2;             $$.opt_global_partitioning= $1;           }         | opt_duplicate_as_qe         ;  //### 下面为重点，新增的global_partition语法，可以看到，用到了新增的token global_partition_clause:           GLOBAL_PARTITION_SYM BY part_type_def opt_num_parts           {             $$= NEW_PTN PT_global_partition($3, @4, $4);           }         ;

4 类似于`PT_partition`添加对应的数据结构`global_partition_clause`

文件：parser_yystype.h：该文件是bison（yacc）运行的一环，代替bison内置的YYSTYPE的，当bison对相关语法解析后，需要构造相关的数据结构，通过对YYSTYPE的自定义，就可以实现构造自定义数据结构的目的了。添加我们自定义的数据结构代码如下：

union YYSTYPE {   PT_sub_partition *opt_sub_part;   PT_part_type_def *part_type_def;   PT_partition *partition_clause;   PT_global_partition *global_partition_clause; //新加数据结构      struct {     Mem_root_array<PT_create_table_option *> *opt_create_table_options;     PT_partition *opt_partitioning;     PT_global_partition *opt_global_partitioning; //同时注意添加到create_table_tail里面，因为create table语法会有该操作     On_duplicate on_duplicate;     PT_query_primary *opt_query_expression;   } create_table_tail; }; static_assert(sizeof(YYSTYPE) <= 40, "YYSTYPE is too big"); //因为struct里面添加了一个成员变量，所以该union需要的空间也会变大，因此注意修改这一行

下面内容介绍PT_global_partition数据结构，为了保持和MySQL习惯一致，新增加的数据结构放在了

sql/parse_tree_nodes.cc sql/parse_tree_nodes.h sql/parse_tree_partitions.cc sql/parse_tree_partitions.h

四个文件里，理论上可以放在任何地方。可根据自身需求添加对应数据结构：

文件：sql/parse_tree_partitions.h sql/parse_tree_partitions.cc

/** 新增数据结构 */ class PT_global_partition : public Parse_tree_node {     typedef Parse_tree_node super;      PT_part_type_def *const part_type_def;     const POS part_defs_pos;     uint num_parts; public:     partition_info part_info; public:     PT_global_partition(PT_part_type_def *part_type_def, const POS &part_defs_pos,                         uint opt_num_parts)             : part_type_def(part_type_def),               part_defs_pos(part_defs_pos),               num_parts(opt_num_parts) {}     bool contextualize(Parse_context *pc) override; };  //模仿其原生的实现方式即可 bool PT_global_partition::contextualize(Parse_context *pc) {     if (super::contextualize(pc)) return true;      Partition_parse_context part_pc(pc->thd, &part_info, false);     if (part_type_def->contextualize(&part_pc)) return true;      if (part_info.part_type != partition_type::HASH) {         //only support hash partition for shard key         my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), "NOT HASH");         return true;     }      uint count_curr_parts = part_info.partitions.elements;      if (part_info.num_parts != 0) {         if (part_info.num_parts != count_curr_parts) {             error(&part_pc, part_defs_pos,                   ER_THD(pc->thd, ER_PARTITION_WRONG_NO_PART_ERROR));             return true;         }     } else if (count_curr_parts > 0)         part_info.num_parts = count_curr_parts;     return false; }

文件：sql/parse_tree_nodes.cc sql/parse_tree_nodes.h

接下来修改create table对应的数据结构，将新增的PT_global_partition添加到create table里面

class PT_create_table_stmt final : public PT_table_ddl_stmt_base {   PT_partition *opt_partitioning;   PT_global_partition *opt_global_partitioning; //添加成员变量   PT_create_table_stmt(     MEM_ROOT *mem_root, PT_hint_list *opt_hints, bool is_temporary,     bool only_if_not_exists, Table_ident *table_name,     const Mem_root_array<PT_table_element *> *opt_table_element_list,     const Mem_root_array<PT_create_table_option *> *opt_create_table_options,     PT_partition *opt_partitioning,     PT_global_partition *opt_global_partitioning, On_duplicate on_duplicate,     PT_query_primary *opt_query_expression)     : PT_table_ddl_stmt_base(mem_root),   m_opt_hints(opt_hints),   is_temporary(is_temporary),   only_if_not_exists(only_if_not_exists),   table_name(table_name),   opt_table_element_list(opt_table_element_list),   opt_create_table_options(opt_create_table_options),   opt_partitioning(opt_partitioning),   opt_global_partitioning(opt_global_partitioning), //添加构造函数，主要是为了增加对PT_global_partition的赋值操作   on_duplicate(on_duplicate),   opt_query_expression(opt_query_expression),   opt_like_clause(nullptr) {}    //在其对应的函数中增加相关逻辑，调用对应的初始化函数contextualize Sql_cmd *PT_create_table_stmt::make_cmd(THD *thd) {     if (opt_global_partitioning){         if (opt_global_partitioning->contextualize(&pc)) return nullptr;         lex->part_info = &opt_global_partitioning->part_info;     } }

发表评论