Browse Source

Initial exploration of module system

It turns out I do need a proper dependency resolution system.
I'm still feeling it out, so the code is still in a half-done state.

I also did some work on the RunProcess interface
ModuleSystem
Macoy Madson 10 months ago
parent
commit
5598f4c3b3
9 changed files with 334 additions and 40 deletions
  1. +98
    -17
      src/Evaluator.cpp
  2. +46
    -2
      src/Evaluator.hpp
  3. +9
    -0
      src/EvaluatorEnums.hpp
  4. +90
    -0
      src/Generators.cpp
  5. +2
    -1
      src/Jamfile
  6. +53
    -3
      src/Main.cpp
  7. +21
    -17
      src/RunProcess.cpp
  8. +9
    -0
      src/RunProcess.hpp
  9. +6
    -0
      test/Basic.cake

+ 98
- 17
src/Evaluator.cpp View File

@ -118,22 +118,56 @@ bool HandleInvocation_Recursive(EvaluatorEnvironment& environment, const Evaluat
{
return invokedGenerator(environment, context, tokens, invocationStartIndex, output);
}
else if (context.scope == EvaluatorScope_Module)
{
ErrorAtTokenf(invocationStart,
"Unknown function %s. Only macros and generators may be "
"invoked at top level",
invocationName.contents.c_str());
return false;
}
else
{
// The only hard-coded generator: basic function invocations. We must hard-code this
// because we don't interpret any C/C++ in order to determine which functions are valid
// to call (we just let the C/C++ compiler determine that for us)
return FunctionInvocationGenerator(environment, context, tokens, invocationStartIndex,
output);
// We don't know what this is. We cannot guess it is a C/C++ function yet, because it
// could be a generator or macro invocation that hasn't been defined yet. Leave a note
// for the evaluator to come back to this token once a satisfying answer is found
UnknownReference unknownInvocation = {};
unknownInvocation.tokens = &tokens;
unknownInvocation.startIndex = invocationStartIndex;
unknownInvocation.symbolReference = &invocationName;
unknownInvocation.context = context;
unknownInvocation.type = UnknownReferenceType_Invocation;
// Prepare our output splice, which is where output should go once the symbol is
// resolved. Rather than actually inserting and causing potentially massive shifts in
// the output list, the splice will redirect to an external output list. It is the
// responsibility of the Writer to watch for splices
unknownInvocation.output = &output.source;
// We push in a StringOutMod_Splice as a sentinel that the splice list needs to be
// checked. Otherwise, it will be a no-op to Writer. It's useful to have this sentinel
// so that multiple splices take up space and will then maintain sequential order
output.source.push_back(
{EmptyString, StringOutMod_Splice, &invocationStart, &invocationStart});
unknownInvocation.spliceOutputIndex = output.source.size() - 1;
if (context.isMacroOrGeneratorDefinition)
environment.unknownReferencesForCompileTime.push_back(unknownInvocation);
else
environment.unknownReferences.push_back(unknownInvocation);
// We're going to return true even though evaluation isn't yet done. The topmost
// evaluate call should handle resolving all unknown references
}
// TODO Relocate to symbol resolver
// else if (context.scope == EvaluatorScope_Module)
// {
// ErrorAtTokenf(invocationStart,
// "Unknown function %s. Only macros and generators may be "
// "invoked at top level",
// invocationName.contents.c_str());
// return false;
// }
// else
// {
// // The only hard-coded generator: basic function invocations. We must hard-code this
// // because we don't interpret any C/C++ in order to determine which functions are valid
// // to call (we just let the C/C++ compiler determine that for us)
// return FunctionInvocationGenerator(environment, context, tokens, invocationStartIndex,
// output);
// }
}
return true;
@ -252,20 +286,67 @@ int EvaluateGenerateAll_Recursive(EvaluatorEnvironment& environment,
return numErrors;
}
bool EvaluateResolveReferences(EvaluatorEnvironment& environment)
{
// Stop as soon as we do a loop which made no progress
int numReferencesResolved = 0;
do
{
for (const UnknownReference& reference : environment.unknownReferencesForCompileTime)
{
const std::string& referenceName = reference.symbolReference->contents;
MacroFunc macro = findMacro(environment, referenceName.c_str());
if (macro)
{
++numReferencesResolved;
}
GeneratorFunc generator = findGenerator(environment, referenceName.c_str());
if (generator)
{
++numReferencesResolved;
// TODO Evaluate
}
}
} while (numReferencesResolved);
// No more macros/generators are resolvable. Guess that the remaining references are C/C++
// function calls
// TODO: backtrack if it turns out they aren't? e.g. in the case of a macro which writes another
// macro, which a generator uses
for (const UnknownReference& reference : environment.unknownReferencesForCompileTime)
{
// The only hard-coded generator: basic function invocations. We must hard-code this
// because we don't interpret any C/C++ in order to determine which functions are valid
// to call (we just let the C/C++ compiler determine that for us)
// if (FunctionInvocationGenerator(environment, reference.context, *reference.tokens,
// reference.startIndex, output))
{
}
}
return false;
}
// This serves only as a warning. I want to be very explicit with the lifetime of tokens
EvaluatorEnvironment::~EvaluatorEnvironment()
{
if (!macroExpansions.empty())
{
printf(
"Warning: environmentDestroyMacroExpansionsInvalidateTokens() has not been called. "
"This will leak memory.\n Call it once you are certain no tokens in any expansions "
"will be referenced.\n");
"Warning: environmentDestroyInvalidateTokens() has not been called. This will leak "
"memory.\n Call it once you are certain no tokens in any expansions will be "
"referenced.\n");
}
}
void environmentDestroyMacroExpansionsInvalidateTokens(EvaluatorEnvironment& environment)
void environmentDestroyInvalidateTokens(EvaluatorEnvironment& environment)
{
for (CompileTimeFunctionDefiniton& function : environment.compileTimeFunctions)
{
delete function.output;
}
environment.compileTimeFunctions.clear();
for (const std::vector<Token>* macroExpansion : environment.macroExpansions)
delete macroExpansion;
environment.macroExpansions.clear();


+ 46
- 2
src/Evaluator.hpp View File

@ -62,9 +62,31 @@ struct GeneratorOutput
std::vector<ImportMetadata> imports;
};
// This is frequently copied, so keep it small
struct EvaluatorContext
{
EvaluatorScope scope;
// Macro and generator definitions need to be resolved first
bool isMacroOrGeneratorDefinition;
};
struct UnknownReference
{
// In this list of tokens
const std::vector<Token>* tokens;
// ...at this token
int startIndex;
// (shortcut to symbol with name reference)
const Token* symbolReference;
// ...and in this context
EvaluatorContext context;
// ...there is an unknown reference of type
UnknownReferenceType type;
// Once resolved, output to this list
std::vector<StringOutput>* output;
// ...at this splice
int spliceOutputIndex;
};
struct EvaluatorEnvironment;
@ -87,6 +109,16 @@ typedef std::unordered_map<std::string, GeneratorFunc> GeneratorTable;
typedef MacroTable::iterator MacroIterator;
typedef GeneratorTable::iterator GeneratorIterator;
struct CompileTimeFunctionDefiniton
{
const Token* startInvocation;
const Token* name;
// Note that these don't need headers or metadata because they are found via dynamic linking.
// GeneratorOutput is (somewhat wastefully) used in order to make the API consistent for
// compile-time vs. runtime code generation
GeneratorOutput* output;
};
// Unlike context, which can't be changed, environment can be changed.
// Use care when modifying the environment. Only add things once you know things have succeeded.
// Keep in mind that calling functions which can change the environment may invalidate your pointers
@ -96,12 +128,21 @@ struct EvaluatorEnvironment
MacroTable macros;
GeneratorTable generators;
// Once compiled, these will move into macros and generators lists
std::vector<CompileTimeFunctionDefiniton> compileTimeFunctions;
// We need to keep the tokens macros create around so they can be referenced by StringOperations
// Token vectors must not be changed after they are created or pointers to Tokens will become
// invalid. The const here is to protect from that. You can change the token contents, however
std::vector<const std::vector<Token>*> macroExpansions;
// Will NOT clean up macroExpansions! Use environmentDestroyMacroExpansionsInvalidateTokens()
std::vector<UnknownReference> unknownReferences;
// Macros and generators need their references resolved before any other references can be
// inferred to be C/C++ function calls. This is because macros and generators aren't added to
// the environment until they have been completely resolved, built, and dynamically loaded
std::vector<UnknownReference> unknownReferencesForCompileTime;
// Will NOT clean up macroExpansions! Use environmentDestroyInvalidateTokens()
~EvaluatorEnvironment();
};
@ -109,7 +150,7 @@ struct EvaluatorEnvironment
// Essentially, this means don't call this function unless you will NOT follow any Token pointers in
// GeneratorOutput, StringOutput, etc., because any of them could be pointers to macro-created
// tokens. Essentially, call this as late as possible
void environmentDestroyMacroExpansionsInvalidateTokens(EvaluatorEnvironment& environment);
void environmentDestroyInvalidateTokens(EvaluatorEnvironment& environment);
int EvaluateGenerate_Recursive(EvaluatorEnvironment& environment, const EvaluatorContext& context,
const std::vector<Token>& tokens, int startTokenIndex,
@ -121,4 +162,7 @@ int EvaluateGenerateAll_Recursive(EvaluatorEnvironment& environment,
int startTokenIndex, const StringOutput& delimiterTemplate,
GeneratorOutput& output);
// Returns whether all references were resolved successfully
bool EvaluateResolveReferences(EvaluatorEnvironment& environment);
const char* evaluatorScopeToString(EvaluatorScope expectedScope);

+ 9
- 0
src/EvaluatorEnums.hpp View File

@ -31,6 +31,9 @@ enum StringOutputModifierFlags
// Uses {} for initializer lists etc.
StringOutMod_OpenList = 1 << 12,
StringOutMod_CloseList = 1 << 13,
// Signals the Writer that it needs to splice in another output list
StringOutMod_Splice = 1 << 14,
};
enum ImportLanguage
@ -51,3 +54,9 @@ enum EvaluatorScope
// For example, a C function call cannot have an if statement in its arguments
EvaluatorScope_ExpressionsOnly
};
enum UnknownReferenceType
{
UnknownReferenceType_Symbol,
UnknownReferenceType_Invocation
};

+ 90
- 0
src/Generators.cpp View File

@ -831,6 +831,94 @@ bool ArrayAccessGenerator(EvaluatorEnvironment& environment, const EvaluatorCont
return true;
}
bool DefMacroGenerator(EvaluatorEnvironment& environment, const EvaluatorContext& context,
const std::vector<Token>& tokens, int startTokenIndex,
GeneratorOutput& output)
{
if (!ExpectEvaluatorScope("defmacro", tokens[startTokenIndex], context, EvaluatorScope_Module))
return false;
int endDefunTokenIndex = FindCloseParenTokenIndex(tokens, startTokenIndex);
int endTokenIndex = endDefunTokenIndex;
int startNameTokenIndex = startTokenIndex;
StripInvocation(startNameTokenIndex, endTokenIndex);
int nameIndex = startNameTokenIndex;
const Token& nameToken = tokens[nameIndex];
if (!ExpectTokenType("defmacro", nameToken, TokenType_Symbol))
return false;
int argsIndex = nameIndex + 1;
if (!ExpectInInvocation("defmacro expected arguments", tokens, argsIndex, endDefunTokenIndex))
return false;
const Token& argsStart = tokens[argsIndex];
if (!ExpectTokenType("defmacro", argsStart, TokenType_OpenParen))
return false;
CompileTimeFunctionDefiniton newFunction = {};
// Will be cleaned up when the environment is destroyed
GeneratorOutput* compTimeOutput = new GeneratorOutput;
newFunction.output = compTimeOutput;
newFunction.startInvocation = &tokens[startTokenIndex];
newFunction.name = &nameToken;
// TODO: It would be nice to support global vs. local macros
// This only really needs to be an environment distinction, not a code output distinction
// Macros will be found without headers thanks to dynamic linking
// bool isModuleLocal = tokens[startTokenIndex + 1].contents.compare("defmacro-local") == 0;
// Macros must return success or failure
compTimeOutput->source.push_back(
{"bool", StringOutMod_SpaceAfter, &tokens[startTokenIndex], &tokens[startTokenIndex]});
compTimeOutput->source.push_back(
{nameToken.contents, StringOutMod_ConvertFunctionName, &nameToken, &nameToken});
compTimeOutput->source.push_back({EmptyString, StringOutMod_OpenParen, &argsStart, &argsStart});
// Macros always receive the same arguments
// TODO: Output macro arguments with proper output calls
compTimeOutput->source.push_back(
{"EvaluatorEnvironment& environment, const EvaluatorContext& context, const "
"std::vector<Token>& tokens, int startTokenIndex, std::vector<Token>& output",
StringOutMod_None, &argsStart, &argsStart});
int endArgsIndex = FindCloseParenTokenIndex(tokens, argsIndex);
compTimeOutput->source.push_back(
{EmptyString, StringOutMod_CloseParen, &tokens[endArgsIndex], &tokens[endArgsIndex]});
int startBodyIndex = endArgsIndex + 1;
compTimeOutput->source.push_back(
{EmptyString, StringOutMod_OpenBlock, &tokens[startBodyIndex], &tokens[startBodyIndex]});
// Evaluate our body!
EvaluatorContext macroBodyContext = context;
macroBodyContext.scope = EvaluatorScope_Body;
macroBodyContext.isMacroOrGeneratorDefinition = true;
// TODO Remove this, we don't need it any more
StringOutput bodyDelimiterTemplate = {EmptyString, StringOutMod_None, nullptr, nullptr};
int numErrors =
EvaluateGenerateAll_Recursive(environment, macroBodyContext, tokens, startBodyIndex,
bodyDelimiterTemplate, *compTimeOutput);
if (numErrors)
{
delete compTimeOutput;
return false;
}
compTimeOutput->source.push_back(
{EmptyString, StringOutMod_CloseBlock, &tokens[endTokenIndex], &tokens[endTokenIndex]});
// Takes ownership of output
environment.compileTimeFunctions.push_back(newFunction);
return true;
}
//
// C Statement generation
//
enum CStatementOperationType
{
// Insert keywordOrSymbol between each thing
@ -1209,6 +1297,8 @@ void importFundamentalGenerators(EvaluatorEnvironment& environment)
environment.generators["defun"] = DefunGenerator;
environment.generators["defun-local"] = DefunGenerator;
environment.generators["defmacro"] = DefMacroGenerator;
environment.generators["var"] = VariableDeclarationGenerator;
environment.generators["global-var"] = VariableDeclarationGenerator;
environment.generators["static-var"] = VariableDeclarationGenerator;


+ 2
- 1
src/Jamfile View File

@ -6,6 +6,7 @@ Converters.cpp
Writer.cpp
Generators.cpp
GeneratorHelpers.cpp
RunProcess.cpp
;
MakeLocate cakelisp : ../cakelisp ;
@ -14,7 +15,7 @@ Main Square$(SUFSHR) : Square.cpp ;
Main dynamicLoadTest : DynamicLoader.cpp ;
Main runProcessTest : RunProcess.cpp ;
# Main runProcessTest : RunProcess.cpp ;
# C++FLAGS on src/RunProcess.o = -DUNIX ;
SubDir . src ;

+ 53
- 3
src/Main.cpp View File

@ -1,10 +1,12 @@
#include <stdio.h>
#include <string.h>
#include <vector>
#include "Converters.hpp"
#include "Evaluator.hpp"
#include "Generators.hpp"
#include "RunProcess.hpp"
#include "Tokenizer.hpp"
#include "Utilities.hpp"
#include "Writer.hpp"
@ -108,6 +110,30 @@ int main(int argc, char* argv[])
fclose(file);
// TODO Move
{
char sourceOutputName[MAX_PATH_LENGTH] = {0};
PrintfBuffer(sourceOutputName, "%s.cpp", filename);
char fileToExec[MAX_PATH_LENGTH] = {0};
PrintBuffer(fileToExec, "/usr/bin/clang++");
// PrintBuffer(arguments.fileToExecute, "/usr/bin/ls");
// char arg0[64] = {0};
// PrintBuffer(arg0, "--version");
// If not null terminated, the call will fail
// char* arguments[] = {arguments.fileToExecute, strdup("--version"), nullptr};
char* arguments[] = {fileToExec, strdup("-c"), sourceOutputName, nullptr};
CompilationArguments compilationArguments = {};
compilationArguments.fileToExecute = fileToExec;
compilationArguments.arguments = arguments;
if (compileFile(compilationArguments) != 0)
{
delete tokens;
return 1;
}
}
printf("\nParsing and code generation:\n");
EvaluatorEnvironment environment;
@ -123,11 +149,35 @@ int main(int argc, char* argv[])
generatedOutput);
if (numErrors)
{
environmentDestroyMacroExpansionsInvalidateTokens(environment);
environmentDestroyInvalidateTokens(environment);
delete tokens;
return 1;
}
if (!EvaluateResolveReferences(environment))
{
// TODO Add compile time too
if (!environment.unknownReferences.empty() ||
!environment.unknownReferencesForCompileTime.empty())
{
for (const UnknownReference& reference : environment.unknownReferences)
{
ErrorAtTokenf(*reference.symbolReference, "reference to undefined symbol '%s'",
reference.symbolReference->contents.c_str());
}
for (const UnknownReference& reference : environment.unknownReferencesForCompileTime)
{
ErrorAtTokenf(*reference.symbolReference, "reference to undefined symbol '%s'",
reference.symbolReference->contents.c_str());
}
environmentDestroyInvalidateTokens(environment);
delete tokens;
return 1;
}
}
{
NameStyleSettings nameSettings;
WriterFormatSettings formatSettings;
@ -138,13 +188,13 @@ int main(int argc, char* argv[])
if (!writeGeneratorOutput(generatedOutput, nameSettings, formatSettings, outputSettings))
{
environmentDestroyMacroExpansionsInvalidateTokens(environment);
environmentDestroyInvalidateTokens(environment);
delete tokens;
return 1;
}
}
environmentDestroyMacroExpansionsInvalidateTokens(environment);
environmentDestroyInvalidateTokens(environment);
delete tokens;
return 0;
}

+ 21
- 17
src/RunProcess.cpp View File

@ -1,3 +1,5 @@
#include "RunProcess.hpp"
#include <stdio.h>
#ifdef UNIX
@ -10,35 +12,37 @@
#include "Utilities.hpp"
// Never returns, if success
void systemExecute()
void systemExecuteCompile(const CompilationArguments& arguments)
{
#ifdef UNIX
// pid_t pid;
char fileToExec[MAX_PATH_LENGTH] = {0};
PrintBuffer(fileToExec, "/usr/bin/clang++");
// PrintBuffer(fileToExec, "/usr/bin/ls");
// char arg0[64] = {0};
// PrintBuffer(arg0, "--version");
// If not null terminated, the call will fail
// char* arguments[] = {fileToExec, strdup("--version"), nullptr};
char* arguments[] = {fileToExec, strdup("-c"), strdup("test/Hello.cake.cpp"), nullptr};
printf("Running %s\n", fileToExec);
execvp(fileToExec, arguments);
execvp(arguments.fileToExecute, arguments.arguments);
perror("RunProcess execvp() error: ");
printf("Failed to execute %s\n", fileToExec);
printf("Failed to execute %s\n", arguments.fileToExecute);
#endif
}
void subprocessReceiveStdOut(const char* processOutputBuffer)
{
printf("From process: %s", processOutputBuffer);
printf("%s", processOutputBuffer);
}
int main()
// TODO: Make separate pipe for std err?
// void subprocessReceiveStdErr(const char* processOutputBuffer)
// {
// printf("%s", processOutputBuffer);
// }
int compileFile(const CompilationArguments& arguments)
{
#ifdef UNIX
printf("Compiling file with command:\n");
for (char** arg = arguments.arguments; *arg != nullptr; ++arg)
{
printf("%s ", *arg);
}
printf("\n");
int pipeFileDescriptors[2] = {0};
const int PipeRead = 0;
const int PipeWrite = 1;
@ -65,7 +69,7 @@ int main()
}
// Only write
close(pipeFileDescriptors[PipeRead]);
systemExecute();
systemExecuteCompile(arguments);
// A failed child should not flush parent files
_exit(EXIT_FAILURE); /* */
}


+ 9
- 0
src/RunProcess.hpp View File

@ -0,0 +1,9 @@
#pragma once
struct CompilationArguments
{
const char* fileToExecute;
char** arguments;
};
int compileFile(const CompilationArguments& arguments);

+ 6
- 0
test/Basic.cake View File

@ -73,3 +73,9 @@
(return false))
(var int name-token-index (+ start-token-index 1))
(return true))
(defun test-macro-magic (&return int)
(return (the-answer)))
(defmacro the-answer ()
(return 42))

Loading…
Cancel
Save