cleanup of code, str: str_append_n
This commit is contained in:
parent
3042bd3e26
commit
7fb937d342
75
lex.c
75
lex.c
|
@ -10,11 +10,11 @@
|
|||
#include"err.h"
|
||||
#include"state.h"
|
||||
|
||||
char*lextype_names[]={"LNONE","LIDENTIFIER","LINTEGER","LFLOAT","LSTRING","LOPERATOR","LKEYWORD","LCOMMENT","LMINUS","LFAKE",NULL};
|
||||
char*lextype_colors[]={"\033[0m","\033[0m","\033[36m","\033[35m","\033[32m","\033[0m","\033[33m","\033[34m"};
|
||||
char*lexsubtype_names[]={"LENDSTATEMENT","LASSIGN","LLPAREN","LRPAREN","LLCBRACE","LRCBRACE","LSMINUS","LADD","LSMUL","LSDIV",NULL};
|
||||
static char*operator_chars="-+*/=;(),.{}<>";
|
||||
static char*keywords[]={"do","false","fn","for","if","let","ret","true","while","call",};
|
||||
const char*lextype_names[]={"LNONE","LIDENTIFIER","LINTEGER","LFLOAT","LSTRING","LOPERATOR","LKEYWORD","LCOMMENT","LMINUS","LFAKE",NULL};
|
||||
const char*lextype_colors[]={"\033[0m","\033[0m","\033[36m","\033[35m","\033[32m","\033[0m","\033[33m","\033[34m"};
|
||||
const char*lexsubtype_names[]={"LENDSTATEMENT","LASSIGN","LLPAREN","LRPAREN","LLCBRACE","LRCBRACE","LSMINUS","LADD","LSMUL","LSDIV",NULL};
|
||||
static const char*operator_chars="-+*/=;(),.{}<>";
|
||||
static const char*keywords[]={"do","false","fn","for","if","let","ret","true","while","call",};
|
||||
/* static char*operators[]={";","=","+=","-=","*=","/=","+","-","/","*","(",")","{","}"}; */
|
||||
|
||||
Lexer lex_new(void)
|
||||
|
@ -36,48 +36,47 @@ void lex_free(Lexer*l)
|
|||
}
|
||||
|
||||
// Read string and store tokens
|
||||
void lex_string(Lexer*lex,char*s)
|
||||
void lex_string(Lexer*lex,char*input_string)
|
||||
{
|
||||
//Reg regex=reg_new();
|
||||
Str tmpstr=str_new();
|
||||
char ch[2]={0};
|
||||
Str tmptokstr=str_new();
|
||||
size_t current_line=1;
|
||||
size_t strl;
|
||||
size_t input_string_len;
|
||||
|
||||
if(!lex||!s)return;
|
||||
if(!lex||!input_string)return;
|
||||
|
||||
strl=strlen(s);
|
||||
input_string_len=strlen(input_string);
|
||||
|
||||
// Read each individual byte
|
||||
for(size_t i=0;i<strl+1;++i)
|
||||
for(size_t i=0;i<input_string_len+1;++i)
|
||||
{
|
||||
|
||||
/*****
|
||||
* initmatch(chset,mode,keepch)
|
||||
* - Match initial character and change lexer to corresponding mode,
|
||||
* - clear tmpstr and initialize new token
|
||||
* chset char* set of characters which s[i] must match
|
||||
* - clear tmptokstr and initialize new token
|
||||
* chset char* set of characters which input_string[i] must match
|
||||
* mode uint32_t change lexer mode to this
|
||||
* keepch bool will we retain this character in the token string?
|
||||
*****/
|
||||
#define initmatch(chset,lmode,keepch) if(s[i]&&memchr((chset),s[i],strlen((chset)))){lex->mode=(lmode);if(keepch)--i;Tok _tmptok={.str=str_new(),.type=lex->mode,.line=current_line};vec_push(&lex->tokens,&_tmptok);str_clear(&tmpstr);}
|
||||
#define initmatch(chset,lmode,keepch) if(input_string[i]&&memchr((chset),input_string[i],strlen((chset)))){lex->mode=(lmode);if(keepch)--i;Tok _tmptok={.str=str_new(),.type=lex->mode,.line=current_line};vec_push(&lex->tokens,&_tmptok);str_clear(&tmptokstr);}
|
||||
|
||||
/*****
|
||||
* modeterminate
|
||||
* - Finalize current token lexing and set state to LNONE
|
||||
* keepch bool will we retain this character in the token string?
|
||||
*****/
|
||||
#define modeterminate(keepch) do{lex->mode=LNONE;if(keepch)--i;str_assign(&(vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->str),tmpstr.buffer);vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->line=current_line;ch[0]=s[i];str_append(&tmpstr,ch);}while(0)
|
||||
#define modeterminate(keepch) do{lex->mode=LNONE;if(keepch)--i;str_assign(&(vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->str),tmptokstr.buffer);vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->line=current_line;str_append_n(&tmptokstr,input_string+i,2);}while(0)
|
||||
|
||||
/*****
|
||||
* modematch(chset,logic,keepch)
|
||||
* - Match lexeme characters following initial character,
|
||||
* - set token type and return lexer mode to normal
|
||||
* chset char* set of characters which s[i] must match
|
||||
* logic bool if false, only modify current token when s[i] does NOT match chset
|
||||
* chset char* set of characters which input_string[i] must match
|
||||
* logic bool if false, only modify current token when input_string[i] does NOT match chset
|
||||
* keepch bool will we retain this character in the token string?
|
||||
*****/
|
||||
#define modematch(chset,logic,keepch) do{if(!s[i]||(logic==(!!memchr((chset),s[i],strlen(chset)))) ){modeterminate(keepch);}ch[0]=s[i];str_append(&tmpstr,ch);}while(0)
|
||||
#define modematch(chset,logic,keepch) do{if(!input_string[i]||(logic==(!!memchr((chset),input_string[i],strlen(chset)))) ){modeterminate(keepch);}str_append_n(&tmptokstr,input_string+i,2);}while(0)
|
||||
|
||||
switch(lex->mode)
|
||||
{
|
||||
|
@ -107,10 +106,10 @@ void lex_string(Lexer*lex,char*s)
|
|||
/* !! FALL THROUGH !! */
|
||||
default:
|
||||
|
||||
// We go past the strl by one to
|
||||
// We go past the input_string_len by one to
|
||||
// make sure the fixup stage (above)
|
||||
// always gets called
|
||||
if(i>=strl)break;
|
||||
if(i>=input_string_len)break;
|
||||
|
||||
initmatch("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_",LIDENTIFIER,true) else
|
||||
initmatch("\"",LSTRING,false) else
|
||||
|
@ -118,8 +117,8 @@ void lex_string(Lexer*lex,char*s)
|
|||
initmatch("-",LMINUS,true) else
|
||||
initmatch(operator_chars,LOPERATOR,true) else
|
||||
initmatch("#",LCOMMENT,true) else
|
||||
if(strchr(" \t\n",s[i])){if(s[i]=='\n')++current_line;continue;} else
|
||||
err_log("%u: unrecognized character '%c' (%x)",current_line,((s[i]>32)?(s[i]):(' ')),s[i]);
|
||||
if(strchr(" \t\n",input_string[i])){if(input_string[i]=='\n')++current_line;continue;} else
|
||||
err_log("%u: unrecognized character '%c' (%x)",current_line,((input_string[i]>32)?(input_string[i]):(' ')),input_string[i]);
|
||||
//initmatch(" \t\n",LNONE,false)
|
||||
break;
|
||||
|
||||
|
@ -127,7 +126,7 @@ void lex_string(Lexer*lex,char*s)
|
|||
case LIDENTIFIER:modematch("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789",false,true);break;
|
||||
case LFLOAT:modematch("0123456789",false,true);break;
|
||||
case LINTEGER:modematch("0123456789.",false,true);
|
||||
if(s[i]=='.')
|
||||
if(input_string[i]=='.')
|
||||
{
|
||||
vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->type=LFLOAT;
|
||||
lex->mode=LFLOAT;
|
||||
|
@ -135,10 +134,10 @@ void lex_string(Lexer*lex,char*s)
|
|||
break;
|
||||
case LSTRING:modematch("\"",true,false);break;
|
||||
case LOPERATOR:modematch(operator_chars,false,true);
|
||||
switch(s[i])
|
||||
switch(input_string[i])
|
||||
{
|
||||
#define opmatch(lstype,keepch) do{vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->subtype=lstype;modeterminate(keepch);}while(0)
|
||||
#define opmatch_nodup(lstype,keepch) do{lex->mode=LNONE;vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->line=current_line;ch[0]=s[i];str_append(&tmpstr,ch);}while(0)
|
||||
#define opmatch_nodup(lstype,keepch) do{lex->mode=LNONE;vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->line=current_line;str_append_n(&tmptokstr,input_string+i,2);}while(0)
|
||||
case '+':opmatch(LADD,false);break;
|
||||
case ';':opmatch(LENDSTATEMENT,false);break;
|
||||
case '(':opmatch(LLPAREN,false);break;
|
||||
|
@ -164,14 +163,14 @@ void lex_string(Lexer*lex,char*s)
|
|||
}
|
||||
break;
|
||||
case LMINUS://modematch("0123456789=",true,true);
|
||||
if(s[i]=='-')
|
||||
if(input_string[i]=='-')
|
||||
{
|
||||
vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->type=LOPERATOR;
|
||||
vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->subtype=LSMINUS;
|
||||
ch[0]=s[i];str_append(&tmpstr,ch);
|
||||
str_append_n(&tmptokstr,input_string+i,2);
|
||||
modeterminate(false);
|
||||
}
|
||||
else// if(s[i]=='=')
|
||||
else// if(input_string[i]=='=')
|
||||
{
|
||||
vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->type=LOPERATOR;
|
||||
lex->mode=LOPERATOR;
|
||||
|
@ -186,7 +185,7 @@ void lex_string(Lexer*lex,char*s)
|
|||
#undef modeterminate
|
||||
}
|
||||
|
||||
str_free(&tmpstr);
|
||||
str_free(&tmptokstr);
|
||||
//reg_free(®ex);
|
||||
}
|
||||
|
||||
|
@ -207,11 +206,11 @@ void lex_print(Lexer*l)
|
|||
printf("]\n");
|
||||
}
|
||||
|
||||
size_t lex_strchrcount(char*str,char c)
|
||||
{
|
||||
size_t count=0;
|
||||
for(size_t i=0;str[i];++i)
|
||||
if(str[i]==c)
|
||||
++count;
|
||||
return count;
|
||||
}
|
||||
/* size_t lex_strchrcount(char*str,char c) */
|
||||
/* { */
|
||||
/* size_t count=0; */
|
||||
/* for(size_t i=0;str[i];++i) */
|
||||
/* if(str[i]==c) */
|
||||
/* ++count; */
|
||||
/* return count; */
|
||||
/* } */
|
||||
|
|
8
lex.h
8
lex.h
|
@ -14,9 +14,9 @@
|
|||
|
||||
enum LEXTYPE {LNONE=0, LIDENTIFIER, LINTEGER, LFLOAT, LSTRING, LOPERATOR, LKEYWORD, LCOMMENT, LMINUS, LFAKE, };
|
||||
enum LEXSUBTYPE {LENDSTATEMENT=55, LASSIGN, LLPAREN, LRPAREN, LLCBRACE, LRCBRACE, LSMINUS, LADD, LSMUL, LSDIV, };
|
||||
extern char*lextype_names[];
|
||||
extern char*lexsubtype_names[];
|
||||
extern char*lextype_colors[];
|
||||
extern const char*lextype_names[];
|
||||
extern const char*lexsubtype_names[];
|
||||
extern const char*lextype_colors[];
|
||||
|
||||
typedef struct Lexer
|
||||
{
|
||||
|
@ -25,7 +25,7 @@ typedef struct Lexer
|
|||
} Lexer;
|
||||
|
||||
Lexer lex_new();
|
||||
size_t lex_strchrcount(char*str,char c);
|
||||
// size_t lex_strchrcount(char*str,char c);
|
||||
void lex_free(Lexer*l);
|
||||
void lex_print(Lexer*l);
|
||||
void lex_string(Lexer*l,char*s);
|
||||
|
|
2
main.c
2
main.c
|
@ -12,7 +12,7 @@
|
|||
|
||||
int main(int argc,char**argv)
|
||||
{
|
||||
bool generate=true;
|
||||
const bool generate=true;
|
||||
bool setoutfile=false;
|
||||
bool showparsetree=false;
|
||||
bool showparsetreebrief=false;
|
||||
|
|
6
pnode.c
6
pnode.c
|
@ -7,14 +7,13 @@
|
|||
#include"mem.h"
|
||||
#include"state.h"
|
||||
|
||||
char*partype_names[]={"PNONE","PEMPTY","PEXPRESSION","PSTATEMENT","PASSIGNMENT","PIF","PCOMMENT","PBLOCK","PWHILE","PVARDECL","PFUNDECL","PRET","PCALL",NULL};
|
||||
const char*partype_names[]={"PNONE","PEMPTY","PEXPRESSION","PSTATEMENT","PASSIGNMENT","PIF","PCOMMENT","PBLOCK","PWHILE","PVARDECL","PFUNDECL","PRET","PCALL",NULL};
|
||||
|
||||
Parser parser_new(void)
|
||||
{
|
||||
Parser p={
|
||||
.root=pnode_new(),
|
||||
.mode=0,
|
||||
.mem_locations=vec_new(sizeof(Var)),
|
||||
};
|
||||
return p;
|
||||
}
|
||||
|
@ -22,9 +21,6 @@ Parser parser_new(void)
|
|||
void parser_free(Parser*p)
|
||||
{
|
||||
pnode_free(&p->root);
|
||||
for(size_t i=0;i<p->mem_locations.size;++i)
|
||||
var_free(vec_at(&p->mem_locations,i,Var*));
|
||||
vec_free(&p->mem_locations);
|
||||
}
|
||||
|
||||
PNode pnode_new(void)
|
||||
|
|
3
pnode.h
3
pnode.h
|
@ -11,7 +11,7 @@
|
|||
|
||||
typedef enum PARTYPE {PNONE, PEMPTY, PEXPRESSION, PSTATEMENT, PASSIGNMENT, PIF, PCOMMENT, PBLOCK, PWHILE, PVARDECL, PFUNDECL, PRET, PCALL, } PARTYPE;
|
||||
|
||||
extern char*partype_names[];
|
||||
extern const char*partype_names[];
|
||||
|
||||
/******
|
||||
* Grammar:
|
||||
|
@ -41,7 +41,6 @@ typedef struct Parser
|
|||
{
|
||||
PNode root;
|
||||
uint32_t mode;
|
||||
Vec mem_locations;
|
||||
} Parser;
|
||||
|
||||
PNode pnode_new(void);
|
||||
|
|
14
str.c
14
str.c
|
@ -76,6 +76,20 @@ void str_append(Str*s,char*c)
|
|||
|
||||
}
|
||||
|
||||
void str_append_n(Str*s,char*c,size_t n)
|
||||
{
|
||||
if(!s){if(STRVERBOSE)fprintf(stderr,"str_append: NULL Str*\n");return;}
|
||||
if(!c){if(STRVERBOSE)fprintf(stderr,"str_append: NULL char*\n");return;}
|
||||
|
||||
if(s->capacity<s->size+n)
|
||||
str_grow(s,s->size+n+STRDEFSIZE);
|
||||
if(!s->buffer){if(STRVERBOSE)fprintf(stderr,"str_append: buffer is NULL after str_grow\n");return;}
|
||||
strncat(s->buffer,c,n);
|
||||
s->size+=n-1;
|
||||
s->buffer[s->size]=0;
|
||||
|
||||
}
|
||||
|
||||
void str_tr(Str*s,char a,char b)
|
||||
{
|
||||
if(!s){if(STRVERBOSE)fprintf(stderr,"str_tr: NULL Str*\n");return;}
|
||||
|
|
Loading…
Reference in New Issue
Block a user