Browse Source

Initial commit

ModuleSystem
Macoy Madson 3 years ago
commit
08f0cd81b8
  1. 22
      .clang-format
  2. 33
      .gitignore
  3. 2
      Jamfile
  4. 91
      Jamrules
  5. 21
      ReadMe.org
  6. 5
      src/Jamfile
  7. 53
      src/Main.cpp
  8. 141
      src/Tokenizer.cpp
  9. 24
      src/Tokenizer.hpp
  10. 4
      test/Test.cake

22
.clang-format

@ -0,0 +1,22 @@
# http://releases.llvm.org/6.0.0/tools/clang/docs/ClangFormatStyleOptions.html
BasedOnStyle: Google
AccessModifierOffset: -4
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: None
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
BreakBeforeBraces: Allman
BraceWrapping:
AfterNamespace: false
BreakBeforeTernaryOperators: false
ColumnLimit: 100
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
IndentWidth: 4
Standard: Cpp11
TabWidth: 4
UseTab: ForIndentation
DerivePointerAlignment: false
PointerAlignment: Left
NamespaceIndentation: None
IndentCaseLabels: true

33
.gitignore

@ -0,0 +1,33 @@
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
#*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Emacs stuff
TAGS
# Ignore executable
cakelisp

2
Jamfile

@ -0,0 +1,2 @@
SubDir . ;
SubInclude . src ;

91
Jamrules

@ -0,0 +1,91 @@
##
## Compilation
##
C++ = clang++ ;
LINK = clang++ ;
# C++ = g++ ;
# LINK = g++ ;
# If I was building a library, these would be useful
# LINKFLAGS = -shared ;
# if $(UNIX) { SUFSHR = .so ; }
# else if $(NT) { SUFSHR = .dll ; }
## Compiler arguments
# Arguments used on all projects, regardless of any variables
C++FLAGS = -std=c++11 -Wall -Wextra -Wno-unused-parameter
# Only for profiling, i.e. not release builds
# -DTRACY_ENABLE
# BT_USE_DOUBLE_PRECISION solves the Dantzig LCP missing definition
# Disabled now that I'm compiling Bullet in single-precision
# -DBT_USE_DOUBLE_PRECISION
-g ;
# HDRS = src
# # Dependencies/base2.0
# # Dependencies/glm
# # Dependencies/tracy
# Dependencies/curl/include
# Dependencies/rapidjson/include
# Dependencies/mecab/build/local/include
# Dependencies/parallel-hashmap/parallel_hashmap
# ;
# TODO: Make base hold all this weirdness?
# if $(DEBUG_BUILD)
# {
# SFML_LINKLIBS = -lsfml-audio-d -lsfml-graphics-d -lsfml-window-d -lsfml-system-d ;
# }
# else
# {
# SFML_LINKLIBS = -lsfml-audio -lsfml-graphics -lsfml-window -lsfml-system ;
# }
OPTIM = -O0 ;
##
## Linking
##
LINKLIBS =
# Standard (e.g. for Tracy)
-lpthread -ldl
;
# LINKFLAGS = -Wl,-rpath,. ;
LINKFLAGS = -g ;
# TODO: Copy libs to better directory, or static link?
# -Wl,-rpath,.:Dependencies/curl/local_install/lib:Dependencies/mecab/build/local/lib ;
##
## Jam stuff
##
# Fix for unnecessary rebuilding any Jam project
KEEPOBJS = true ; # This doesn't actually fix anything, though it seems like it should
NOARSCAN = true ; # This actually fixes the problem
#AR = ar rUu ; # I was thinking maybe the AR command was wrong (always outputting deterministically)
# It doesn't seem like this is the problem though
AR = ar cr ;
# Cross compilation
# E.g.
# jam -j4 -q -sCROSS_COMPILE_WINDOWS=true
# if $(CROSS_COMPILE_WINDOWS)
# {
# CC = x86_64-w64-mingw32-gcc ;
# LINK = x86_64-w64-mingw32-gcc ;
# AR = x86_64-w64-mingw32-ar ;
# SUFSHR = .dll ;
# }
# Some helpful Jam commands
# -q : stop on failed target
# -jN : use N cores
# -sVAR=VAL : Set VAR to VAL. Note that setting WINDOWS=false is the same as setting UNREAL=true,
# frustratingly
# -dx : print commands being used
# -n : don't actually run commands

21
ReadMe.org

@ -0,0 +1,21 @@
#+TITLE:Cakelisp
This is my experiment of writing a Lisp-like language where I [[https://en.wikipedia.org/wiki/You_can%27t_have_your_cake_and_eat_it][can have my cake and eat it (too)]].
The end goal is a hot-reloadable, non-garbage-collected language ideal for high performance, iteratively-developed programs.
This is the first language project I've embarked on, so it will be riddled with mistakes and naïveté. I wanted to do this after my [[https://macoy.me/code/macoy/LanguageTests][LanguageTests]] experiment revealed just how wacky Common Lisp implementations are in regards to performance. It is more of a learning experience.
* Desired features
- The metaprogramming capabilities of Lisp
- The performance of C
- "Real" types: Types are identical to C types, e.g. ~int~ is 32 bits with no sign bit or anything like other Lisp implementations do
- No garbage collection: I can handle my own memory
- Hot reloading: It should be possible to make modifications to functions *and structures* at runtime to quickly iterate
- Truly seamless C interoperability: No bindings, no wrappers: C types and functions are as easy to call as they are in C
* Plan
- Tokenize and parser written in C++
- Export AST to C (C++?). Lisp-y constructs shouldn't stray too far from C style
- Compile generated C
- For metaprograms, feed AST into compiled C executables, as long as there are macros to parse

5
src/Jamfile

@ -0,0 +1,5 @@
Main cakelisp : Main.cpp Tokenizer.cpp ;
MakeLocate cakelisp : ../cakelisp ;
SubDir . src ;

53
src/Main.cpp

@ -0,0 +1,53 @@
#include <stdio.h>
#include <vector>
#include "Tokenizer.hpp"
int main(int argc, char* argv[])
{
if (argc != 2)
{
printf("Need to provide a file to parse\n");
return 1;
}
FILE* file = nullptr;
const char* filename = argv[1];
file = fopen(filename, "r");
if (!file)
{
printf("Error: Could not open %s\n", filename);
return 1;
}
char lineBuffer[2048] = {0};
int lineNumber = 1;
std::vector<Token> tokens;
while (fgets(lineBuffer, sizeof(lineBuffer), file))
{
printf("%s", lineBuffer);
const char* error = tokenizeLine(lineBuffer, lineNumber, tokens);
if (error != nullptr)
{
printf("%s:%d: %s\n", filename, lineNumber, error);
return 1;
}
lineNumber++;
}
printf("\nResult:\n");
for (Token& token : tokens)
{
printf("%s", tokenTypeToString(token.type));
if (token.type == TokenType_Symbol && token.contents)
printf("%s\n", token.contents);
else
printf("\n");
}
fclose(file);
return 0;
}

141
src/Tokenizer.cpp

@ -0,0 +1,141 @@
#include "Tokenizer.hpp"
#include <stdio.h>
#include <cctype>
static const char commentCharacter = ';';
enum TokenizeState
{
TokenizeState_Normal,
TokenizeState_Symbol,
TokenizeState_InString
};
// Returns nullptr if no errors, else the error text
const char* tokenizeLine(const char* inputLine, unsigned int lineNumber,
std::vector<Token>& tokensOut)
{
const char* A_OK = nullptr;
TokenizeState tokenizeState = TokenizeState_Normal;
char previousChar = 0;
char contentsBuffer[1024] = {0};
char* contentsBufferWrite = contentsBuffer;
#define WriteContents(character) \
{ \
if (contentsBufferWrite - contentsBuffer < (long)sizeof(contentsBuffer)) \
{ \
*contentsBufferWrite = *currentChar; \
++contentsBufferWrite; \
} \
else \
{ \
return "String too long!"; \
} \
}
#define CopyContentsAndReset(outputString) \
{ \
outputString = nullptr; \
contentsBufferWrite = contentsBuffer; \
}
for (const char* currentChar = inputLine; *currentChar != '\0'; ++currentChar)
{
switch (tokenizeState)
{
case TokenizeState_Normal:
// The whole rest of the line is ignored
if (*currentChar == commentCharacter)
return A_OK;
else if (*currentChar == '(')
{
Token openParen = {TokenType_OpenParen, nullptr};
tokensOut.push_back(openParen);
}
else if (*currentChar == ')')
{
Token closeParen = {TokenType_CloseParen, nullptr};
tokensOut.push_back(closeParen);
}
else if (std::isalpha(*currentChar))
{
tokenizeState = TokenizeState_Symbol;
WriteContents(*currentChar);
}
else if (*currentChar == '"')
{
tokenizeState = TokenizeState_InString;
}
break;
case TokenizeState_Symbol:
// Finished the symbol
if (*currentChar == ' ' || *currentChar == '\n')
{
printf("%s\n", contentsBuffer);
Token symbol = {TokenType_Symbol, nullptr};
CopyContentsAndReset(symbol.contents);
tokensOut.push_back(symbol);
tokenizeState = TokenizeState_Normal;
}
else
{
WriteContents(*currentChar);
}
break;
case TokenizeState_InString:
if (*currentChar == '"' && previousChar != '\\')
{
Token string = {TokenType_String, nullptr};
CopyContentsAndReset(string.contents);
tokensOut.push_back(string);
contentsBufferWrite = contentsBuffer;
tokenizeState = TokenizeState_Normal;
}
else
{
WriteContents(*currentChar);
}
break;
}
}
if (tokenizeState != TokenizeState_Normal)
{
switch (tokenizeState)
{
case TokenizeState_Symbol:
return "Unterminated symbol (code error?)";
case TokenizeState_InString:
return "Unterminated string";
default:
return "Unhandled unexpected state";
}
}
#undef WriteContents
#undef CopyContentsAndReset
return A_OK;
}
const char* tokenTypeToString(TokenType type)
{
switch (type)
{
case TokenType_OpenParen:
return "OpenParen";
case TokenType_CloseParen:
return "CloseParen";
case TokenType_Symbol:
return "Symbol";
case TokenType_Constant:
return "Constant";
case TokenType_String:
return "String";
default:
return "Unknown type";
}
}

24
src/Tokenizer.hpp

@ -0,0 +1,24 @@
#pragma once
#include <vector>
enum TokenType
{
TokenType_OpenParen,
TokenType_CloseParen,
TokenType_Symbol,
TokenType_Constant,
TokenType_String
};
struct Token
{
TokenType type;
// Only non-null if type is ambiguous
char* contents;
};
const char* tokenizeLine(const char* inputLine, unsigned int lineNumber,
std::vector<Token>& tokensOut);
const char* tokenTypeToString(TokenType type);

4
test/Test.cake

@ -0,0 +1,4 @@
(print "This is a test 0")
(print "This is a test 1")
(print "This is a test 2")
(print "This is a test 3")
Loading…
Cancel
Save