cleanup of code, str: str_append_n

This commit is contained in:
corey 2023-11-16 10:43:48 -06:00
parent 3042bd3e26
commit 7fb937d342
7 changed files with 59 additions and 50 deletions

75
lex.c
View File

@ -10,11 +10,11 @@
#include"err.h"
#include"state.h"
char*lextype_names[]={"LNONE","LIDENTIFIER","LINTEGER","LFLOAT","LSTRING","LOPERATOR","LKEYWORD","LCOMMENT","LMINUS","LFAKE",NULL};
char*lextype_colors[]={"\033[0m","\033[0m","\033[36m","\033[35m","\033[32m","\033[0m","\033[33m","\033[34m"};
char*lexsubtype_names[]={"LENDSTATEMENT","LASSIGN","LLPAREN","LRPAREN","LLCBRACE","LRCBRACE","LSMINUS","LADD","LSMUL","LSDIV",NULL};
static char*operator_chars="-+*/=;(),.{}<>";
static char*keywords[]={"do","false","fn","for","if","let","ret","true","while","call",};
const char*lextype_names[]={"LNONE","LIDENTIFIER","LINTEGER","LFLOAT","LSTRING","LOPERATOR","LKEYWORD","LCOMMENT","LMINUS","LFAKE",NULL};
const char*lextype_colors[]={"\033[0m","\033[0m","\033[36m","\033[35m","\033[32m","\033[0m","\033[33m","\033[34m"};
const char*lexsubtype_names[]={"LENDSTATEMENT","LASSIGN","LLPAREN","LRPAREN","LLCBRACE","LRCBRACE","LSMINUS","LADD","LSMUL","LSDIV",NULL};
static const char*operator_chars="-+*/=;(),.{}<>";
static const char*keywords[]={"do","false","fn","for","if","let","ret","true","while","call",};
/* static char*operators[]={";","=","+=","-=","*=","/=","+","-","/","*","(",")","{","}"}; */
Lexer lex_new(void)
@ -36,48 +36,47 @@ void lex_free(Lexer*l)
}
// Read string and store tokens
void lex_string(Lexer*lex,char*s)
void lex_string(Lexer*lex,char*input_string)
{
//Reg regex=reg_new();
Str tmpstr=str_new();
char ch[2]={0};
Str tmptokstr=str_new();
size_t current_line=1;
size_t strl;
size_t input_string_len;
if(!lex||!s)return;
if(!lex||!input_string)return;
strl=strlen(s);
input_string_len=strlen(input_string);
// Read each individual byte
for(size_t i=0;i<strl+1;++i)
for(size_t i=0;i<input_string_len+1;++i)
{
/*****
* initmatch(chset,mode,keepch)
* - Match initial character and change lexer to corresponding mode,
* - clear tmpstr and initialize new token
* chset char* set of characters which s[i] must match
* - clear tmptokstr and initialize new token
* chset char* set of characters which input_string[i] must match
* mode uint32_t change lexer mode to this
* keepch bool will we retain this character in the token string?
*****/
#define initmatch(chset,lmode,keepch) if(s[i]&&memchr((chset),s[i],strlen((chset)))){lex->mode=(lmode);if(keepch)--i;Tok _tmptok={.str=str_new(),.type=lex->mode,.line=current_line};vec_push(&lex->tokens,&_tmptok);str_clear(&tmpstr);}
#define initmatch(chset,lmode,keepch) if(input_string[i]&&memchr((chset),input_string[i],strlen((chset)))){lex->mode=(lmode);if(keepch)--i;Tok _tmptok={.str=str_new(),.type=lex->mode,.line=current_line};vec_push(&lex->tokens,&_tmptok);str_clear(&tmptokstr);}
/*****
* modeterminate
* - Finalize current token lexing and set state to LNONE
* keepch bool will we retain this character in the token string?
*****/
#define modeterminate(keepch) do{lex->mode=LNONE;if(keepch)--i;str_assign(&(vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->str),tmpstr.buffer);vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->line=current_line;ch[0]=s[i];str_append(&tmpstr,ch);}while(0)
#define modeterminate(keepch) do{lex->mode=LNONE;if(keepch)--i;str_assign(&(vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->str),tmptokstr.buffer);vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->line=current_line;str_append_n(&tmptokstr,input_string+i,2);}while(0)
/*****
* modematch(chset,logic,keepch)
* - Match lexeme characters following initial character,
* - set token type and return lexer mode to normal
* chset char* set of characters which s[i] must match
* logic bool if false, only modify current token when s[i] does NOT match chset
* chset char* set of characters which input_string[i] must match
* logic bool if false, only modify current token when input_string[i] does NOT match chset
* keepch bool will we retain this character in the token string?
*****/
#define modematch(chset,logic,keepch) do{if(!s[i]||(logic==(!!memchr((chset),s[i],strlen(chset)))) ){modeterminate(keepch);}ch[0]=s[i];str_append(&tmpstr,ch);}while(0)
#define modematch(chset,logic,keepch) do{if(!input_string[i]||(logic==(!!memchr((chset),input_string[i],strlen(chset)))) ){modeterminate(keepch);}str_append_n(&tmptokstr,input_string+i,2);}while(0)
switch(lex->mode)
{
@ -107,10 +106,10 @@ void lex_string(Lexer*lex,char*s)
/* !! FALL THROUGH !! */
default:
// We go past the strl by one to
// We go past the input_string_len by one to
// make sure the fixup stage (above)
// always gets called
if(i>=strl)break;
if(i>=input_string_len)break;
initmatch("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_",LIDENTIFIER,true) else
initmatch("\"",LSTRING,false) else
@ -118,8 +117,8 @@ void lex_string(Lexer*lex,char*s)
initmatch("-",LMINUS,true) else
initmatch(operator_chars,LOPERATOR,true) else
initmatch("#",LCOMMENT,true) else
if(strchr(" \t\n",s[i])){if(s[i]=='\n')++current_line;continue;} else
err_log("%u: unrecognized character '%c' (%x)",current_line,((s[i]>32)?(s[i]):(' ')),s[i]);
if(strchr(" \t\n",input_string[i])){if(input_string[i]=='\n')++current_line;continue;} else
err_log("%u: unrecognized character '%c' (%x)",current_line,((input_string[i]>32)?(input_string[i]):(' ')),input_string[i]);
//initmatch(" \t\n",LNONE,false)
break;
@ -127,7 +126,7 @@ void lex_string(Lexer*lex,char*s)
case LIDENTIFIER:modematch("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789",false,true);break;
case LFLOAT:modematch("0123456789",false,true);break;
case LINTEGER:modematch("0123456789.",false,true);
if(s[i]=='.')
if(input_string[i]=='.')
{
vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->type=LFLOAT;
lex->mode=LFLOAT;
@ -135,10 +134,10 @@ void lex_string(Lexer*lex,char*s)
break;
case LSTRING:modematch("\"",true,false);break;
case LOPERATOR:modematch(operator_chars,false,true);
switch(s[i])
switch(input_string[i])
{
#define opmatch(lstype,keepch) do{vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->subtype=lstype;modeterminate(keepch);}while(0)
#define opmatch_nodup(lstype,keepch) do{lex->mode=LNONE;vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->line=current_line;ch[0]=s[i];str_append(&tmpstr,ch);}while(0)
#define opmatch_nodup(lstype,keepch) do{lex->mode=LNONE;vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->line=current_line;str_append_n(&tmptokstr,input_string+i,2);}while(0)
case '+':opmatch(LADD,false);break;
case ';':opmatch(LENDSTATEMENT,false);break;
case '(':opmatch(LLPAREN,false);break;
@ -164,14 +163,14 @@ void lex_string(Lexer*lex,char*s)
}
break;
case LMINUS://modematch("0123456789=",true,true);
if(s[i]=='-')
if(input_string[i]=='-')
{
vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->type=LOPERATOR;
vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->subtype=LSMINUS;
ch[0]=s[i];str_append(&tmpstr,ch);
str_append_n(&tmptokstr,input_string+i,2);
modeterminate(false);
}
else// if(s[i]=='=')
else// if(input_string[i]=='=')
{
vec_at(&lex->tokens,lex->tokens.size-1,Tok*)->type=LOPERATOR;
lex->mode=LOPERATOR;
@ -186,7 +185,7 @@ void lex_string(Lexer*lex,char*s)
#undef modeterminate
}
str_free(&tmpstr);
str_free(&tmptokstr);
//reg_free(&regex);
}
@ -207,11 +206,11 @@ void lex_print(Lexer*l)
printf("]\n");
}
size_t lex_strchrcount(char*str,char c)
{
size_t count=0;
for(size_t i=0;str[i];++i)
if(str[i]==c)
++count;
return count;
}
/* size_t lex_strchrcount(char*str,char c) */
/* { */
/* size_t count=0; */
/* for(size_t i=0;str[i];++i) */
/* if(str[i]==c) */
/* ++count; */
/* return count; */
/* } */

8
lex.h
View File

@ -14,9 +14,9 @@
enum LEXTYPE {LNONE=0, LIDENTIFIER, LINTEGER, LFLOAT, LSTRING, LOPERATOR, LKEYWORD, LCOMMENT, LMINUS, LFAKE, };
enum LEXSUBTYPE {LENDSTATEMENT=55, LASSIGN, LLPAREN, LRPAREN, LLCBRACE, LRCBRACE, LSMINUS, LADD, LSMUL, LSDIV, };
extern char*lextype_names[];
extern char*lexsubtype_names[];
extern char*lextype_colors[];
extern const char*lextype_names[];
extern const char*lexsubtype_names[];
extern const char*lextype_colors[];
typedef struct Lexer
{
@ -25,7 +25,7 @@ typedef struct Lexer
} Lexer;
Lexer lex_new();
size_t lex_strchrcount(char*str,char c);
// size_t lex_strchrcount(char*str,char c);
void lex_free(Lexer*l);
void lex_print(Lexer*l);
void lex_string(Lexer*l,char*s);

2
main.c
View File

@ -12,7 +12,7 @@
int main(int argc,char**argv)
{
bool generate=true;
const bool generate=true;
bool setoutfile=false;
bool showparsetree=false;
bool showparsetreebrief=false;

View File

@ -7,14 +7,13 @@
#include"mem.h"
#include"state.h"
char*partype_names[]={"PNONE","PEMPTY","PEXPRESSION","PSTATEMENT","PASSIGNMENT","PIF","PCOMMENT","PBLOCK","PWHILE","PVARDECL","PFUNDECL","PRET","PCALL",NULL};
const char*partype_names[]={"PNONE","PEMPTY","PEXPRESSION","PSTATEMENT","PASSIGNMENT","PIF","PCOMMENT","PBLOCK","PWHILE","PVARDECL","PFUNDECL","PRET","PCALL",NULL};
Parser parser_new(void)
{
Parser p={
.root=pnode_new(),
.mode=0,
.mem_locations=vec_new(sizeof(Var)),
};
return p;
}
@ -22,9 +21,6 @@ Parser parser_new(void)
void parser_free(Parser*p)
{
pnode_free(&p->root);
for(size_t i=0;i<p->mem_locations.size;++i)
var_free(vec_at(&p->mem_locations,i,Var*));
vec_free(&p->mem_locations);
}
PNode pnode_new(void)

View File

@ -11,7 +11,7 @@
typedef enum PARTYPE {PNONE, PEMPTY, PEXPRESSION, PSTATEMENT, PASSIGNMENT, PIF, PCOMMENT, PBLOCK, PWHILE, PVARDECL, PFUNDECL, PRET, PCALL, } PARTYPE;
extern char*partype_names[];
extern const char*partype_names[];
/******
* Grammar:
@ -41,7 +41,6 @@ typedef struct Parser
{
PNode root;
uint32_t mode;
Vec mem_locations;
} Parser;
PNode pnode_new(void);

14
str.c
View File

@ -76,6 +76,20 @@ void str_append(Str*s,char*c)
}
void str_append_n(Str*s,char*c,size_t n)
{
if(!s){if(STRVERBOSE)fprintf(stderr,"str_append: NULL Str*\n");return;}
if(!c){if(STRVERBOSE)fprintf(stderr,"str_append: NULL char*\n");return;}
if(s->capacity<s->size+n)
str_grow(s,s->size+n+STRDEFSIZE);
if(!s->buffer){if(STRVERBOSE)fprintf(stderr,"str_append: buffer is NULL after str_grow\n");return;}
strncat(s->buffer,c,n);
s->size+=n-1;
s->buffer[s->size]=0;
}
void str_tr(Str*s,char a,char b)
{
if(!s){if(STRVERBOSE)fprintf(stderr,"str_tr: NULL Str*\n");return;}

1
str.h
View File

@ -22,6 +22,7 @@ Str str_newa(char*c);
bool str_isfloat(char*s);
bool str_isint(char*s);
void str_append(Str*s,char*c);
void str_append_n(Str*s,char*c,size_t n);
void str_assign(Str*s,char*c);
void str_clear(Str*s);
void str_free(Str*s);