2022-10-05 mysql-词法解析与语法解析-分析-CFANZ编程社区

摘要:

SQL作为一门语言,mysql去识别和处理的话, 就必须做词法解析和语法解析。

词法解析就是将输入的字节流按照分词规则,分成一个个的TOKEN。

语法分析是将词法解析后的TOKEN, 按照一定的规则进行处理。

mysql自己写了词法解析, 但是语法解析用了bison。

本文分析mysql的词法解析和语法解析。

词法分析flex和语法分析bison练习的项目: https://github.com/adofsauron/yacc-dev

参考:

图解MySQL 8.0优化器查询解析篇-table-derived-setup-云原生关系型数据库 PolarDB MySQL引擎-阿里云

图解MySQL 8.0优化器查询转换篇-查询-gt-join-云原生关系型数据库 PolarDB MySQL引擎-阿里云

MySQL 8.0 Server层最新架构详解-gt-join-MySQL-云原生关系型数据库 PolarDB MySQL引擎-阿里云

MySQL 8.0 新的火山模型执行器 - 知乎

2022-10-05 mysql-词法解析与语法解析-分析_词法

2022-10-05 mysql-词法解析与语法解析-分析_数据库_02

For example for following query:
 554 
 555    select *
 556      from table1
 557      where table1.field IN (select * from table1_1_1 union
 558                             select * from table1_1_2)
 559      union
 560    select *
 561      from table2
 562      where table2.field=(select (select f1 from table2_1_1_1_1
 563                                    where table2_1_1_1_1.f2=table2_1_1.f3)
 564                            from table2_1_1
 565                            where table2_1_1.f1=table2.f2)
 566      union
 567    select * from table3;
 568 
 569    we will have following structure:
 570 
 571    select1: (select * from table1 ...)
 572    select2: (select * from table2 ...)
 573    select3: (select * from table3)
 574    select1.1.1: (select * from table1_1_1)
 575    ...
 576 
 577      main unit
 578      fake0
 579      select1 select2 select3
 580      |^^     |^
 581     s|||     ||master
 582     l|||     |+---------------------------------+
 583     a|||     +---------------------------------+|
 584     v|||master                         slave   ||
 585     e||+-------------------------+             ||
 586      V|            neighbor      |             V|
 587      unit1.1<+==================>unit1.2       unit2.1
 588      fake1.1
 589      select1.1.1 select 1.1.2    select1.2.1   select2.1.1
 590                                                |^
 591                                                ||
 592                                                V|
 593                                                unit2.1.1.1
 594                                                select2.1.1.1.1
 595 
 596 
 597    relation in main unit will be following:
 598    (bigger picture for:
 599       main unit
 600       fake0
 601       select1 select2 select3
 602    in the above picture)
 603 
 604          main unit
 605          |^^^^|fake_query_block
 606          |||||+--------------------------------------------+
 607          ||||+--------------------------------------------+|
 608          |||+------------------------------+              ||
 609          ||+--------------+                |              ||
 610     slave||master         |                |              ||
 611          V|      neighbor |       neighbor |        master|V
 612          select1<========>select2<========>select3        fake0
 613

逻辑追踪:

2022-10-05 mysql-词法解析与语法解析-分析_语法解析_03

词法解析:

词法解析是根据特定的字符, 将输出的字符串分解成一个个TOKEN.

文字表达比较抽象，可以看下具体的例子: yacc-dev/wc.l at main · adofsauron/yacc-dev · GitHub

%option noyywrap

%{

#include <stdio.h>
#include "tools.h"

int chars = 0;
int lines = 0;

%}

WORD_SYM    ([a-zA-Z0-9\*]+)

%%

select {
  printf("select\n");
}

from {
  printf("from\n");
}

;    { return; }

{WORD_SYM} {
  printf("word = [%s]\n", yytext);
}

. {
  const char* str = yytext;
  if (' ' != str[0]) 
  {
    printf(". = [%s]\n", yytext);
  }
}

%%


int start_yacc(int arc, char **argv)
{
  yylex();
  printf("lines:%d chars:%d\n",lines,chars);
  return 0;
}

语法解析:

根据特定规则对语法解析后的TOKEN做处理, 直接上例子:

语法解析成token: https://github.com/adofsauron/yacc-dev/blob/main/bison/calc.l

词法解析转换toekn: https://github.com/adofsauron/yacc-dev/blob/main/bison/calc.y

%{
#include <stdio.h>
#include <stdarg.h>

extern int yylineno; /* from lexer */
int yylex();
void yyerror(const char *s, ...)
{
    va_list ap;
    va_start(ap, s);

    fprintf(stderr, "%d: error: ", yylineno);
    vfprintf(stderr, s, ap);
    fprintf(stderr, "\n");
}

%}

%token T_NUM
%token T_HACK

%left '+' '-'
%left '*' '/'

%%

S   :   S E '\n'        { printf("ans = %d\n", $2); }
    |   /* empty */     { /* empty */ }
    ;

E   :   E '+' E         { $$ = $1 + $3; }
    |   E '-' E         { $$ = $1 - $3; }
    |   E '*' E         { $$ = $1 * $3; }
    |   E '/' E         { $$ = $1 / $3; }
    |   T_NUM           { $$ = $1; }
    |   '(' E ')'       { $$ = $2; }
    ;

%%

int main() {
    return yyparse();
}

mysql的词法解析和语法解析

mysql-词法解析:

参考lex.h文件中对于关键词的定义

mysql-语法解析-语法树

一. IN子查询

2022-10-05 mysql-词法解析与语法解析-分析_语法解析_04

2022-10-05 mysql-词法解析与语法解析-分析_mysql_05

二. 多表多条件

2022-10-05 mysql-词法解析与语法解析-分析_数据库_06

2022-10-05 mysql-词法解析与语法解析-分析_语法解析_07

核心数据结构:

st_lex

/* The state of the lex parsing. This is saved in the THD struct */

typedef struct st_lex {
  uint   yylineno,yytoklen;      /* Simulate lex */
  LEX_YYSTYPE yylval;
  SELECT_LEX select_lex, *select;
  uchar *ptr,*tok_start,*tok_end,*end_of_query;
  char *length,*dec,*change,*name;
  char *backup_dir;        /* For RESTORE/BACKUP */
  char* to_log;                                 /* For PURGE MASTER LOGS TO */
  char* x509_subject,*x509_issuer,*ssl_cipher;
  enum SSL_type ssl_type; /* defined in violite.h */
  String *wild;
  sql_exchange *exchange;

  List<key_part_spec> col_list;
  List<Alter_drop>    drop_list;
  List<Alter_column>  alter_list;
  List<String>        interval_list;
  List<st_lex_user>   users_list;
  List<LEX_COLUMN>    columns;
  List<Key>        key_list;
  List<create_field>  create_list;
  List<Item>        *insert_list,field_list,value_list;
  List<List_item>     many_values;
  List<Set_option>    option_list;
  SQL_LIST        proc_list, auxilliary_table_list;
  TYPELIB        *interval;
  create_field        *last_field;
  Item *default_value;
  CONVERT *convert_set;
  LEX_USER *grant_user;
  gptr yacc_yyss,yacc_yyvs;
  THD *thd;
  udf_func udf;
  HA_CHECK_OPT   check_opt;      // check/repair options
  HA_CREATE_INFO create_info;
  LEX_MASTER_INFO mi;        // used by CHANGE MASTER
  ulong thread_id,type;
  enum_sql_command sql_command;
  enum lex_states next_state;
  enum enum_duplicates duplicates;
  enum enum_tx_isolation tx_isolation;
  enum enum_ha_read_modes ha_read_mode;
  enum ha_rkey_function ha_rkey_mode;
  enum enum_enable_or_disable alter_keys_onoff;
  uint grant,grant_tot_col,which_columns, union_option;
  thr_lock_type lock_option;
  bool  drop_primary,drop_if_exists,local_file;
  bool  in_comment,ignore_space,verbose,simple_alter, option_type;

} LEX;

st_select_lex

/* The state of the lex parsing for selects */

typedef struct st_select_lex {
  enum sub_select_type linkage;
  char *db,*db1,*table1,*db2,*table2;    /* For outer join using .. */
  Item *where,*having;
  ha_rows select_limit,offset_limit;
  ulong options;
  List<List_item>     expr_list;
  List<List_item>     when_list;
  SQL_LIST        order_list,table_list,group_list;
  List<Item>          item_list;
  List<String>        interval_list,use_index, *use_index_ptr,
          ignore_index, *ignore_index_ptr;
  List<Item_func_match> ftfunc_list;
  uint in_sum_expr, sort_default;
  bool  create_refs, braces;
  st_select_lex *next;
} SELECT_LEX;

st_order

/* Order clause list element */

typedef struct st_order {
  struct st_order *next;
  Item   **item;      /* Point at item in select fields */
  bool   asc;        /* true if ascending */
  bool   free_me;      /* true if item isn't shared  */
  bool   in_field_list;      /* true if in select field list */
  Field  *field;      /* If tmp-table group */
  char   *buff;        /* If tmp-table group */
  table_map used,depend_map;
} ORDER;