摘要:
SQL作为一门语言,mysql去识别和处理的话, 就必须做词法解析和语法解析。
词法解析就是将输入的字节流按照分词规则,分成一个个的TOKEN。
语法分析是将词法解析后的TOKEN, 按照一定的规则进行处理。
mysql自己写了词法解析, 但是语法解析用了bison。
本文分析mysql的词法解析和语法解析。
词法分析flex和语法分析bison练习的项目: https://github.com/adofsauron/yacc-dev
参考:
图解MySQL 8.0优化器查询解析篇-table-derived-setup-云原生关系型数据库 PolarDB MySQL引擎-阿里云
图解MySQL 8.0优化器查询转换篇-查询-gt-join-云原生关系型数据库 PolarDB MySQL引擎-阿里云
MySQL 8.0 Server层最新架构详解-gt-join-MySQL-云原生关系型数据库 PolarDB MySQL引擎-阿里云
MySQL 8.0 新的火山模型执行器 - 知乎
For example for following query:
554
555 select *
556 from table1
557 where table1.field IN (select * from table1_1_1 union
558 select * from table1_1_2)
559 union
560 select *
561 from table2
562 where table2.field=(select (select f1 from table2_1_1_1_1
563 where table2_1_1_1_1.f2=table2_1_1.f3)
564 from table2_1_1
565 where table2_1_1.f1=table2.f2)
566 union
567 select * from table3;
568
569 we will have following structure:
570
571 select1: (select * from table1 ...)
572 select2: (select * from table2 ...)
573 select3: (select * from table3)
574 select1.1.1: (select * from table1_1_1)
575 ...
576
577 main unit
578 fake0
579 select1 select2 select3
580 |^^ |^
581 s||| ||master
582 l||| |+---------------------------------+
583 a||| +---------------------------------+|
584 v|||master slave ||
585 e||+-------------------------+ ||
586 V| neighbor | V|
587 unit1.1<+==================>unit1.2 unit2.1
588 fake1.1
589 select1.1.1 select 1.1.2 select1.2.1 select2.1.1
590 |^
591 ||
592 V|
593 unit2.1.1.1
594 select2.1.1.1.1
595
596
597 relation in main unit will be following:
598 (bigger picture for:
599 main unit
600 fake0
601 select1 select2 select3
602 in the above picture)
603
604 main unit
605 |^^^^|fake_query_block
606 |||||+--------------------------------------------+
607 ||||+--------------------------------------------+|
608 |||+------------------------------+ ||
609 ||+--------------+ | ||
610 slave||master | | ||
611 V| neighbor | neighbor | master|V
612 select1<========>select2<========>select3 fake0
613
逻辑追踪:
词法解析:
词法解析是根据特定的字符, 将输出的字符串分解成一个个TOKEN.
文字表达比较抽象,可以看下具体的例子: yacc-dev/wc.l at main · adofsauron/yacc-dev · GitHub
%option noyywrap
%{
#include <stdio.h>
#include "tools.h"
int chars = 0;
int lines = 0;
%}
WORD_SYM ([a-zA-Z0-9\*]+)
%%
select {
printf("select\n");
}
from {
printf("from\n");
}
; { return; }
{WORD_SYM} {
printf("word = [%s]\n", yytext);
}
. {
const char* str = yytext;
if (' ' != str[0])
{
printf(". = [%s]\n", yytext);
}
}
%%
int start_yacc(int arc, char **argv)
{
yylex();
printf("lines:%d chars:%d\n",lines,chars);
return 0;
}
语法解析:
根据特定规则对语法解析后的TOKEN做处理, 直接上例子:
语法解析成token: https://github.com/adofsauron/yacc-dev/blob/main/bison/calc.l
词法解析转换toekn: https://github.com/adofsauron/yacc-dev/blob/main/bison/calc.y
%{
#include <stdio.h>
#include <stdarg.h>
extern int yylineno; /* from lexer */
int yylex();
void yyerror(const char *s, ...)
{
va_list ap;
va_start(ap, s);
fprintf(stderr, "%d: error: ", yylineno);
vfprintf(stderr, s, ap);
fprintf(stderr, "\n");
}
%}
%token T_NUM
%token T_HACK
%left '+' '-'
%left '*' '/'
%%
S : S E '\n' { printf("ans = %d\n", $2); }
| /* empty */ { /* empty */ }
;
E : E '+' E { $$ = $1 + $3; }
| E '-' E { $$ = $1 - $3; }
| E '*' E { $$ = $1 * $3; }
| E '/' E { $$ = $1 / $3; }
| T_NUM { $$ = $1; }
| '(' E ')' { $$ = $2; }
;
%%
int main() {
return yyparse();
}
mysql的词法解析和语法解析
mysql-词法解析:
参考lex.h文件中对于关键词的定义
mysql-语法解析-语法树
一. IN子查询
二. 多表多条件
核心数据结构:
st_lex
/* The state of the lex parsing. This is saved in the THD struct */
typedef struct st_lex {
uint yylineno,yytoklen; /* Simulate lex */
LEX_YYSTYPE yylval;
SELECT_LEX select_lex, *select;
uchar *ptr,*tok_start,*tok_end,*end_of_query;
char *length,*dec,*change,*name;
char *backup_dir; /* For RESTORE/BACKUP */
char* to_log; /* For PURGE MASTER LOGS TO */
char* x509_subject,*x509_issuer,*ssl_cipher;
enum SSL_type ssl_type; /* defined in violite.h */
String *wild;
sql_exchange *exchange;
List<key_part_spec> col_list;
List<Alter_drop> drop_list;
List<Alter_column> alter_list;
List<String> interval_list;
List<st_lex_user> users_list;
List<LEX_COLUMN> columns;
List<Key> key_list;
List<create_field> create_list;
List<Item> *insert_list,field_list,value_list;
List<List_item> many_values;
List<Set_option> option_list;
SQL_LIST proc_list, auxilliary_table_list;
TYPELIB *interval;
create_field *last_field;
Item *default_value;
CONVERT *convert_set;
LEX_USER *grant_user;
gptr yacc_yyss,yacc_yyvs;
THD *thd;
udf_func udf;
HA_CHECK_OPT check_opt; // check/repair options
HA_CREATE_INFO create_info;
LEX_MASTER_INFO mi; // used by CHANGE MASTER
ulong thread_id,type;
enum_sql_command sql_command;
enum lex_states next_state;
enum enum_duplicates duplicates;
enum enum_tx_isolation tx_isolation;
enum enum_ha_read_modes ha_read_mode;
enum ha_rkey_function ha_rkey_mode;
enum enum_enable_or_disable alter_keys_onoff;
uint grant,grant_tot_col,which_columns, union_option;
thr_lock_type lock_option;
bool drop_primary,drop_if_exists,local_file;
bool in_comment,ignore_space,verbose,simple_alter, option_type;
} LEX;
st_select_lex
/* The state of the lex parsing for selects */
typedef struct st_select_lex {
enum sub_select_type linkage;
char *db,*db1,*table1,*db2,*table2; /* For outer join using .. */
Item *where,*having;
ha_rows select_limit,offset_limit;
ulong options;
List<List_item> expr_list;
List<List_item> when_list;
SQL_LIST order_list,table_list,group_list;
List<Item> item_list;
List<String> interval_list,use_index, *use_index_ptr,
ignore_index, *ignore_index_ptr;
List<Item_func_match> ftfunc_list;
uint in_sum_expr, sort_default;
bool create_refs, braces;
st_select_lex *next;
} SELECT_LEX;
st_order
/* Order clause list element */
typedef struct st_order {
struct st_order *next;
Item **item; /* Point at item in select fields */
bool asc; /* true if ascending */
bool free_me; /* true if item isn't shared */
bool in_field_list; /* true if in select field list */
Field *field; /* If tmp-table group */
char *buff; /* If tmp-table group */
table_map used,depend_map;
} ORDER;