MiniDevil As beautiful as a shell
tokenizer.c
Go to the documentation of this file.
1 /* ************************************************************************** */
2 /* */
3 /* ::: :::::::: */
4 /* tokenizer.c :+: :+: :+: */
5 /* +:+ +:+ +:+ */
6 /* By: baelgadi <baelgadi@student.42.fr> +#+ +:+ +#+ */
7 /* +#+#+#+#+#+ +#+ */
8 /* Created: 2025/12/04 22:33:43 by zotaj-di #+# #+# */
9 /* Updated: 2026/03/04 04:43:17 by baelgadi ### ########.fr */
10 /* */
11 /* ************************************************************************** */
12 
13 #include "token.h"
14 #include "libft.h"
15 
24 static char *extract_unquoted(char *str, int *len, t_quote_type *qtype)
25 {
26  int i;
27 
28  i = 0;
29  while (str[i] && !is_whitespace(str[i]) && !is_operator(str[i])
30  && str[i] != '\'' && str[i] != '"')
31  i++;
32  *len = i;
33  *qtype = QUOTE_NONE;
34  return (ft_substr(str, 0, i));
35 }
36 
47 static char *extract_quoted(char *str, int *len, t_quote_type *qtype)
48 {
49  char quote;
50  int end;
51 
52  quote = str[0];
53  end = 1;
54  while (str[end] && str[end] != quote)
55  end++;
56  if (!str[end])
57  {
58  ft_putstr_fd("minishell: unclosed quote\n", 2);
59  return (NULL);
60  }
61  if (quote == '\'')
62  *qtype = QUOTE_SINGLE;
63  else
64  *qtype = QUOTE_DOUBLE;
65  *len = end + 1;
66  return (ft_substr(str, 1, end - 1));
67 }
68 
80 int process_word_token(char *s, t_token **head)
81 {
82  int i;
83  int len;
84  char *chunk;
85  t_quote_type qtype;
86  t_token *token;
87 
88  i = 0;
89  while (s[i] && !is_whitespace(s[i]) && !is_operator(s[i]))
90  {
91  if (s[i] == '\'' || s[i] == '"')
92  chunk = extract_quoted(s + i, &len, &qtype);
93  else
94  chunk = extract_unquoted(s + i, &len, &qtype);
95  if (!chunk)
96  return (-1);
97  token = create_token(TOKEN_WORD, chunk);
98  free(chunk);
99  if (!token)
100  return (-1);
101  token->quote_type = qtype;
102  i += len;
103  token->connected = (s[i] && !is_whitespace(s[i]) && !is_operator(s[i]));
104  add_token(head, token);
105  }
106  return (i);
107 }
108 
118 int process_operator_token(char *input, t_token **head)
119 {
120  int len;
121  t_token_type type;
122  char *op;
123 
124  type = get_operator_token_type(input, &len);
125  op = ft_substr(input, 0, len);
126  if (!op)
127  return (-1);
128  add_token(head, create_token(type, op));
129  free(op);
130  return (len);
131 }
132 
144 t_token *tokenize(char *input)
145 {
146  t_token *head;
147  int i;
148  int len;
149 
150  head = NULL;
151  i = 0;
152  while (input && input[i])
153  {
154  while (input[i] && is_whitespace(input[i]))
155  i++;
156  if (!input[i])
157  break ;
158  if (is_operator(input[i]))
159  len = process_operator_token(input + i, &head);
160  else
161  len = process_word_token(input + i, &head);
162  if (len < 0)
163  {
164  free_token_list(head);
165  return (NULL);
166  }
167  i += len;
168  }
169  return (head);
170 }
t_token_type
Token types produced by Lexer.
Definition: structs.h:24
@ TOKEN_WORD
Definition: structs.h:25
t_quote_type
Quote context of a token.
Definition: structs.h:41
@ QUOTE_SINGLE
Definition: structs.h:43
@ QUOTE_NONE
Definition: structs.h:42
@ QUOTE_DOUBLE
Definition: structs.h:44
Lexer token (singly linked list)
Definition: structs.h:71
t_quote_type quote_type
Quote context.
Definition: structs.h:74
int connected
1 if connected
Definition: structs.h:75
void free_token_list(t_token *head)
Free the entire token linked list.
Definition: token.c:64
void add_token(t_token **head, t_token *new_token)
Add a token to the end of a linked list.
Definition: token.c:84
t_token * create_token(t_token_type type, char *value)
Allocate and initialize a new token.
Definition: token.c:26
Lexer, tokenizer, expander, and quote handling prototypes.
static char * extract_quoted(char *str, int *len, t_quote_type *qtype)
Extract a quoted text part, stripping the quotes.
Definition: tokenizer.c:47
int process_word_token(char *s, t_token **head)
Process a word token composed of quoted and unquoted chunks.
Definition: tokenizer.c:80
static char * extract_unquoted(char *str, int *len, t_quote_type *qtype)
Extract an unquoted text part until any delimiter is reached.
Definition: tokenizer.c:24
t_token * tokenize(char *input)
Tokenize the input string into a linked list of tokens.
Definition: tokenizer.c:144
int process_operator_token(char *input, t_token **head)
Process an operator token.
Definition: tokenizer.c:118
int is_whitespace(char c)
Check if a character is whitespace.
t_token_type get_operator_token_type(char *str, int *len)
Determine operator token type and its length in characters.
int is_operator(char c)
Check if a character is a shell operator.