@ -0,0 +1,22 @@ | |||
# http://releases.llvm.org/6.0.0/tools/clang/docs/ClangFormatStyleOptions.html | |||
BasedOnStyle: Google | |||
AccessModifierOffset: -4 | |||
AllowShortBlocksOnASingleLine: false | |||
AllowShortFunctionsOnASingleLine: None | |||
AllowShortIfStatementsOnASingleLine: false | |||
AllowShortLoopsOnASingleLine: false | |||
BreakBeforeBraces: Allman | |||
BraceWrapping: | |||
AfterNamespace: false | |||
BreakBeforeTernaryOperators: false | |||
ColumnLimit: 100 | |||
ConstructorInitializerIndentWidth: 4 | |||
ContinuationIndentWidth: 4 | |||
IndentWidth: 4 | |||
Standard: Cpp11 | |||
TabWidth: 4 | |||
UseTab: ForIndentation | |||
DerivePointerAlignment: false | |||
PointerAlignment: Left | |||
NamespaceIndentation: None | |||
IndentCaseLabels: true |
@ -0,0 +1,33 @@ | |||
# Prerequisites | |||
*.d | |||
# Compiled Object files | |||
*.slo | |||
*.lo | |||
*.o | |||
#*.obj | |||
# Precompiled Headers | |||
*.gch | |||
*.pch | |||
# Compiled Dynamic libraries | |||
*.so | |||
*.dylib | |||
*.dll | |||
# Fortran module files | |||
*.mod | |||
*.smod | |||
# Compiled Static libraries | |||
*.lai | |||
*.la | |||
*.a | |||
*.lib | |||
# Emacs stuff | |||
TAGS | |||
# Ignore executable | |||
cakelisp |
@ -0,0 +1,2 @@ | |||
SubDir . ; | |||
SubInclude . src ; |
@ -0,0 +1,91 @@ | |||
## | |||
## Compilation | |||
## | |||
C++ = clang++ ; | |||
LINK = clang++ ; | |||
# C++ = g++ ; | |||
# LINK = g++ ; | |||
# If I was building a library, these would be useful | |||
# LINKFLAGS = -shared ; | |||
# if $(UNIX) { SUFSHR = .so ; } | |||
# else if $(NT) { SUFSHR = .dll ; } | |||
## Compiler arguments | |||
# Arguments used on all projects, regardless of any variables | |||
C++FLAGS = -std=c++11 -Wall -Wextra -Wno-unused-parameter | |||
# Only for profiling, i.e. not release builds | |||
# -DTRACY_ENABLE | |||
# BT_USE_DOUBLE_PRECISION solves the Dantzig LCP missing definition | |||
# Disabled now that I'm compiling Bullet in single-precision | |||
# -DBT_USE_DOUBLE_PRECISION | |||
-g ; | |||
# HDRS = src | |||
# # Dependencies/base2.0 | |||
# # Dependencies/glm | |||
# # Dependencies/tracy | |||
# Dependencies/curl/include | |||
# Dependencies/rapidjson/include | |||
# Dependencies/mecab/build/local/include | |||
# Dependencies/parallel-hashmap/parallel_hashmap | |||
# ; | |||
# TODO: Make base hold all this weirdness? | |||
# if $(DEBUG_BUILD) | |||
# { | |||
# SFML_LINKLIBS = -lsfml-audio-d -lsfml-graphics-d -lsfml-window-d -lsfml-system-d ; | |||
# } | |||
# else | |||
# { | |||
# SFML_LINKLIBS = -lsfml-audio -lsfml-graphics -lsfml-window -lsfml-system ; | |||
# } | |||
OPTIM = -O0 ; | |||
## | |||
## Linking | |||
## | |||
LINKLIBS = | |||
# Standard (e.g. for Tracy) | |||
-lpthread -ldl | |||
; | |||
# LINKFLAGS = -Wl,-rpath,. ; | |||
LINKFLAGS = -g ; | |||
# TODO: Copy libs to better directory, or static link? | |||
# -Wl,-rpath,.:Dependencies/curl/local_install/lib:Dependencies/mecab/build/local/lib ; | |||
## | |||
## Jam stuff | |||
## | |||
# Fix for unnecessary rebuilding any Jam project | |||
KEEPOBJS = true ; # This doesn't actually fix anything, though it seems like it should | |||
NOARSCAN = true ; # This actually fixes the problem | |||
#AR = ar rUu ; # I was thinking maybe the AR command was wrong (always outputting deterministically) | |||
# It doesn't seem like this is the problem though | |||
AR = ar cr ; | |||
# Cross compilation | |||
# E.g. | |||
# jam -j4 -q -sCROSS_COMPILE_WINDOWS=true | |||
# if $(CROSS_COMPILE_WINDOWS) | |||
# { | |||
# CC = x86_64-w64-mingw32-gcc ; | |||
# LINK = x86_64-w64-mingw32-gcc ; | |||
# AR = x86_64-w64-mingw32-ar ; | |||
# SUFSHR = .dll ; | |||
# } | |||
# Some helpful Jam commands | |||
# -q : stop on failed target | |||
# -jN : use N cores | |||
# -sVAR=VAL : Set VAR to VAL. Note that setting WINDOWS=false is the same as setting UNREAL=true, | |||
# frustratingly | |||
# -dx : print commands being used | |||
# -n : don't actually run commands |
@ -0,0 +1,21 @@ | |||
#+TITLE:Cakelisp | |||
This is my experiment of writing a Lisp-like language where I [[https://en.wikipedia.org/wiki/You_can%27t_have_your_cake_and_eat_it][can have my cake and eat it (too)]]. | |||
The end goal is a hot-reloadable, non-garbage-collected language ideal for high performance, iteratively-developed programs. | |||
This is the first language project I've embarked on, so it will be riddled with mistakes and naïveté. I wanted to do this after my [[https://macoy.me/code/macoy/LanguageTests][LanguageTests]] experiment revealed just how wacky Common Lisp implementations are in regards to performance. It is more of a learning experience. | |||
* Desired features | |||
- The metaprogramming capabilities of Lisp | |||
- The performance of C | |||
- "Real" types: Types are identical to C types, e.g. ~int~ is 32 bits with no sign bit or anything like other Lisp implementations do | |||
- No garbage collection: I can handle my own memory | |||
- Hot reloading: It should be possible to make modifications to functions *and structures* at runtime to quickly iterate | |||
- Truly seamless C interoperability: No bindings, no wrappers: C types and functions are as easy to call as they are in C | |||
* Plan | |||
- Tokenize and parser written in C++ | |||
- Export AST to C (C++?). Lisp-y constructs shouldn't stray too far from C style | |||
- Compile generated C | |||
- For metaprograms, feed AST into compiled C executables, as long as there are macros to parse |
@ -0,0 +1,5 @@ | |||
Main cakelisp : Main.cpp Tokenizer.cpp ; | |||
MakeLocate cakelisp : ../cakelisp ; | |||
SubDir . src ; |
@ -0,0 +1,53 @@ | |||
#include <stdio.h> | |||
#include <vector> | |||
#include "Tokenizer.hpp" | |||
int main(int argc, char* argv[]) | |||
{ | |||
if (argc != 2) | |||
{ | |||
printf("Need to provide a file to parse\n"); | |||
return 1; | |||
} | |||
FILE* file = nullptr; | |||
const char* filename = argv[1]; | |||
file = fopen(filename, "r"); | |||
if (!file) | |||
{ | |||
printf("Error: Could not open %s\n", filename); | |||
return 1; | |||
} | |||
char lineBuffer[2048] = {0}; | |||
int lineNumber = 1; | |||
std::vector<Token> tokens; | |||
while (fgets(lineBuffer, sizeof(lineBuffer), file)) | |||
{ | |||
printf("%s", lineBuffer); | |||
const char* error = tokenizeLine(lineBuffer, lineNumber, tokens); | |||
if (error != nullptr) | |||
{ | |||
printf("%s:%d: %s\n", filename, lineNumber, error); | |||
return 1; | |||
} | |||
lineNumber++; | |||
} | |||
printf("\nResult:\n"); | |||
for (Token& token : tokens) | |||
{ | |||
printf("%s", tokenTypeToString(token.type)); | |||
if (token.type == TokenType_Symbol && token.contents) | |||
printf("%s\n", token.contents); | |||
else | |||
printf("\n"); | |||
} | |||
fclose(file); | |||
return 0; | |||
} |
@ -0,0 +1,141 @@ | |||
#include "Tokenizer.hpp" | |||
#include <stdio.h> | |||
#include <cctype> | |||
static const char commentCharacter = ';'; | |||
enum TokenizeState | |||
{ | |||
TokenizeState_Normal, | |||
TokenizeState_Symbol, | |||
TokenizeState_InString | |||
}; | |||
// Returns nullptr if no errors, else the error text | |||
const char* tokenizeLine(const char* inputLine, unsigned int lineNumber, | |||
std::vector<Token>& tokensOut) | |||
{ | |||
const char* A_OK = nullptr; | |||
TokenizeState tokenizeState = TokenizeState_Normal; | |||
char previousChar = 0; | |||
char contentsBuffer[1024] = {0}; | |||
char* contentsBufferWrite = contentsBuffer; | |||
#define WriteContents(character) \ | |||
{ \ | |||
if (contentsBufferWrite - contentsBuffer < (long)sizeof(contentsBuffer)) \ | |||
{ \ | |||
*contentsBufferWrite = *currentChar; \ | |||
++contentsBufferWrite; \ | |||
} \ | |||
else \ | |||
{ \ | |||
return "String too long!"; \ | |||
} \ | |||
} | |||
#define CopyContentsAndReset(outputString) \ | |||
{ \ | |||
outputString = nullptr; \ | |||
contentsBufferWrite = contentsBuffer; \ | |||
} | |||
for (const char* currentChar = inputLine; *currentChar != '\0'; ++currentChar) | |||
{ | |||
switch (tokenizeState) | |||
{ | |||
case TokenizeState_Normal: | |||
// The whole rest of the line is ignored | |||
if (*currentChar == commentCharacter) | |||
return A_OK; | |||
else if (*currentChar == '(') | |||
{ | |||
Token openParen = {TokenType_OpenParen, nullptr}; | |||
tokensOut.push_back(openParen); | |||
} | |||
else if (*currentChar == ')') | |||
{ | |||
Token closeParen = {TokenType_CloseParen, nullptr}; | |||
tokensOut.push_back(closeParen); | |||
} | |||
else if (std::isalpha(*currentChar)) | |||
{ | |||
tokenizeState = TokenizeState_Symbol; | |||
WriteContents(*currentChar); | |||
} | |||
else if (*currentChar == '"') | |||
{ | |||
tokenizeState = TokenizeState_InString; | |||
} | |||
break; | |||
case TokenizeState_Symbol: | |||
// Finished the symbol | |||
if (*currentChar == ' ' || *currentChar == '\n') | |||
{ | |||
printf("%s\n", contentsBuffer); | |||
Token symbol = {TokenType_Symbol, nullptr}; | |||
CopyContentsAndReset(symbol.contents); | |||
tokensOut.push_back(symbol); | |||
tokenizeState = TokenizeState_Normal; | |||
} | |||
else | |||
{ | |||
WriteContents(*currentChar); | |||
} | |||
break; | |||
case TokenizeState_InString: | |||
if (*currentChar == '"' && previousChar != '\\') | |||
{ | |||
Token string = {TokenType_String, nullptr}; | |||
CopyContentsAndReset(string.contents); | |||
tokensOut.push_back(string); | |||
contentsBufferWrite = contentsBuffer; | |||
tokenizeState = TokenizeState_Normal; | |||
} | |||
else | |||
{ | |||
WriteContents(*currentChar); | |||
} | |||
break; | |||
} | |||
} | |||
if (tokenizeState != TokenizeState_Normal) | |||
{ | |||
switch (tokenizeState) | |||
{ | |||
case TokenizeState_Symbol: | |||
return "Unterminated symbol (code error?)"; | |||
case TokenizeState_InString: | |||
return "Unterminated string"; | |||
default: | |||
return "Unhandled unexpected state"; | |||
} | |||
} | |||
#undef WriteContents | |||
#undef CopyContentsAndReset | |||
return A_OK; | |||
} | |||
const char* tokenTypeToString(TokenType type) | |||
{ | |||
switch (type) | |||
{ | |||
case TokenType_OpenParen: | |||
return "OpenParen"; | |||
case TokenType_CloseParen: | |||
return "CloseParen"; | |||
case TokenType_Symbol: | |||
return "Symbol"; | |||
case TokenType_Constant: | |||
return "Constant"; | |||
case TokenType_String: | |||
return "String"; | |||
default: | |||
return "Unknown type"; | |||
} | |||
} |
@ -0,0 +1,24 @@ | |||
#pragma once | |||
#include <vector> | |||
enum TokenType | |||
{ | |||
TokenType_OpenParen, | |||
TokenType_CloseParen, | |||
TokenType_Symbol, | |||
TokenType_Constant, | |||
TokenType_String | |||
}; | |||
struct Token | |||
{ | |||
TokenType type; | |||
// Only non-null if type is ambiguous | |||
char* contents; | |||
}; | |||
const char* tokenizeLine(const char* inputLine, unsigned int lineNumber, | |||
std::vector<Token>& tokensOut); | |||
const char* tokenTypeToString(TokenType type); |
@ -0,0 +1,4 @@ | |||
(print "This is a test 0") | |||
(print "This is a test 1") | |||
(print "This is a test 2") | |||
(print "This is a test 3") |