import re # Define the tokens and their regex patterns tokens = [ ('KEYWORD', r'\b(int|float|return)\b'), ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9]*'), ('NUMBER', r'\d+'), ('OPERATOR', r'[+\-*/=<>!&|]'), ('DELIMITER', r'[;,\(\)\{\}]'), ('WHITESPACE', r'[ \t\n]+'), ] # The lexer will match the patterns and yield the corresponding token def lexer(data): pos = 0 while pos < len(data): match = None for token_type, pattern in tokens: regex = re.compile(pattern) match = regex.match(data, pos) if match: if token_type != 'WHITESPACE': # Ignore whitespace yield (token_type, match.group(0)) pos = match.end() break if not match: raise SyntaxError(f'Illegal character at position {pos}') # Simple parser that builds an AST for a C function def parser(tokens): token = next(tokens, None) if not token: raise SyntaxError("Empty input") def parse_type(): nonlocal token if token[0] == 'KEYWORD' and token[1] in ('int', 'float'): type_token = token token = next(tokens, None) return type_token[1] raise SyntaxError("Expected type") def parse_identifier(): nonlocal token if token[0] == 'IDENTIFIER': identifier_token = token token = next(tokens, None) return identifier_token[1] raise SyntaxError("Expected identifier") def parse_expression(): nonlocal token if token[0] in ('IDENTIFIER', 'NUMBER'): expr_token = token token = next(tokens, None) return expr_token[1] raise SyntaxError("Expected expression") def parse_statement(): nonlocal token if token[0] == 'KEYWORD' and token[1] == 'return': token = next(tokens, None) return_expr = parse_expression() if token and token[0] == 'DELIMITER' and token[1] == ';': token = next(tokens, None) return ('return', return_expr) return None def parse_function(): nonlocal token if token[0] == 'KEYWORD' and token[1] in ('int', 'float'): func_type = parse_type() func_name = parse_identifier() if token and token[0] == 'DELIMITER' and token[1] == '(': token = next(tokens, None) if token and token[0] == 'DELIMITER' and token[1] == ')': token = next(tokens, None) if token and token[0] == 'DELIMITER' and token[1] == '{': token = next(tokens, None) statements = [] while token and token[0] != 'DELIMITER' or token[1] != '}': statement = parse_statement() if statement: statements.append(statement) return ('function', func_type, func_name, statements) raise SyntaxError("Expected function syntax") return parse_function() # Intermediate code generation (for simplicity, we just print the function and statements) def code_generation(ast): if ast[0] == 'function': func_type, func_name, statements = ast[1], ast[2], ast[3] print(f"Function: {func_type} {func_name}() {{") for stmt in statements: if stmt[0] == 'return': print(f" return {stmt[1]};") print("}}") # Main compilation process def compile_c(data): print("Starting compilation...\n") tokens_gen = lexer(data) ast = parser(tokens_gen) code_generation(ast) # Example C program c_code = ''' int main() { int a = 5; return a; } ''' compile_c(c_code)