pbrain Language Compiler
The pbrain programming language is an extension I made to the Brainf**k programming language that adds the ability to define and call procedures. If you’re wondering about the asterisks in the name of the programming language, it’s because the name is rather rude. That’s why I decided to use a name for my extension that a good bit tamer. After all, my Mom and my kids still read my site from time to time.
After making a rather simplistic interpreter in C++, I decided to go ahead and make a .NET compiler as well, using C#.
Thanks to rdragon on the Undernet #c++ channel and to Daniel Cristofani for their comments and input.
A Nice IDE
There’s a nice integrated development environment written by Tim Rohlfs that includes support for pbrain. I was delighted when he added support. Go download it, try it out, and let him know how much you like it.
Some pbrain Programs
This is the pbrain program referred to in the comment header of the interpreter source.
+([-])
+(-:<<[>>+<<-]>[>+<-]>)
+([-]>++++++++++[<++++>-]<++++++++>[-]++:.)
>+++>+++++>++:
>+++:
This pbrain program initializes a memory location to 65, the ASCII value of the letter ‘A’. It then calls a function for subsequent memory locations to copy the previous location and add one to it. Once a few cells are initialized, it prints all the cells to standard output.
+([-]<[-]<[>+>+<<-]>>[<<+>>-])
+([-]>[-]+:<+)
>>+++++++++++++[<+++++>-]
++:
>++:
>++:
>++:
<<<<.>.>.>.>.
The .NET Compiler
Here is the C# source code for the .NET compiler, which compiles both pbrain and traditional Brainf**k code. It performs some simple optimizations as well:
-
A sequence of
+
and-
instructions will be
concatenated into one operation sequence, so that a sequence like
++++
will add 4 to the current memory location instead of
adding 1 four times. -
Likewise, a sequence of
>
and<
instructions will be concatenated into one operation sequence, so that
a sequence like>>>>
will move the pointer forward four
locations rather than 1 location at a time. -
The null loop,
[-]
, will be compiled to set the
current memory location to zero, since that is the practical effect of
the loop anyway. This will allow for efficiently zeroing out a memory
location whether it holds the value 1 or 10001.
using System;
using System.Collections;
using System.IO;
using System.Reflection;
using System.Reflection.Emit;
using System.Text;
using System.Threading;
namespace ParksComputing.Pbrain {
/// <summary>
/// Compiler implements the pbrain compiler.
/// </summary>
class Compiler {
/// <summary>
/// The main entry point for the application.
/// </summary>
static void Main(string[] args) {
if (args.Length > 0) {
String fileName = args[0];
Compiler compiler;
compiler = new Compiler(fileName);
Type myType = compiler.Compile();
}
}
private String fileName;
private String asmName;
private String asmFileName;
private AssemblyBuilder myAsmBldr;
private FieldBuilder mem;
private FieldBuilder mp;
private FieldBuilder tmp;
private FieldBuilder vtbl;
private TypeBuilder myTypeBldr;
private MethodInfo readMI;
private MethodInfo writeMI;
private MethodInfo hashAddMI;
private MethodInfo hashGetMI;
private int methodCount;
private int callCount;
void Ldc(ILGenerator il, int count) {
switch (count) {
case 0:
il.Emit(OpCodes.Ldc_I4_0);
break;
case 1:
il.Emit(OpCodes.Ldc_I4_1);
break;
case 2:
il.Emit(OpCodes.Ldc_I4_2);
break;
case 3:
il.Emit(OpCodes.Ldc_I4_3);
break;
case 4:
il.Emit(OpCodes.Ldc_I4_4);
break;
case 5:
il.Emit(OpCodes.Ldc_I4_5);
break;
case 6:
il.Emit(OpCodes.Ldc_I4_6);
break;
case 7:
il.Emit(OpCodes.Ldc_I4_7);
break;
case 8:
il.Emit(OpCodes.Ldc_I4_8);
break;
default:
il.Emit(OpCodes.Ldc_I4, count);
break;
}
}
void Forward(ILGenerator il, int count) {
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldc.i4 1
Ldc(il, count);
//add
il.Emit(OpCodes.Add);
//stsfld int32 pbout.mp
il.Emit(OpCodes.Stsfld, mp);
}
void Back(ILGenerator il, int count) {
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldc.i4 1
Ldc(il, count);
//sub
il.Emit(OpCodes.Sub);
//stsfld int32 pbout.mp
il.Emit(OpCodes.Stsfld, mp);
}
void Plus(ILGenerator il, int count) {
//ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldelem.i4
il.Emit(OpCodes.Ldelem_I4);
//ldc.i4 1
Ldc(il, count);
//add
il.Emit(OpCodes.Add);
//stsfld int32 pbout.tmp
il.Emit(OpCodes.Stsfld, tmp);
//ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldsfld int32 pbout.tmp
il.Emit(OpCodes.Ldsfld, tmp);
//stelem.i4
il.Emit(OpCodes.Stelem_I4);
}
void Minus(ILGenerator il, int count) {
//ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldelem.i4
il.Emit(OpCodes.Ldelem_I4);
//ldc.i4 1
Ldc(il, count);
//sub
il.Emit(OpCodes.Sub);
//stsfld int32 pbout.tmp
il.Emit(OpCodes.Stsfld, tmp);
//ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldsfld int32 pbout.tmp
il.Emit(OpCodes.Ldsfld, tmp);
//stelem.i4
il.Emit(OpCodes.Stelem_I4);
}
void Read(ILGenerator il) {
//ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//call void [mscorlib]System.Console.Write(char)
il.EmitCall(OpCodes.Call, readMI, null);
//stelem.i4
il.Emit(OpCodes.Stelem_I4);
}
void Write(ILGenerator il) {
//ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldelem.i4
il.Emit(OpCodes.Ldelem_I4);
//call void [mscorlib]System.Console.Write(char)
il.EmitCall(OpCodes.Call, writeMI, null);
}
void LoopBegin(ILGenerator il, Label endLabel) {
//ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldelem.i4
il.Emit(OpCodes.Ldelem_I4);
//brfalse loop_1_end
il.Emit(OpCodes.Brfalse, endLabel);
}
void LoopEnd(ILGenerator il, Label beginLabel) {
//br loop_1_start
il.Emit(OpCodes.Br, beginLabel);
}
void Call(ILGenerator il) {
//ldsfld object pbout.vtbl
il.Emit(OpCodes.Ldsfld, vtbl);
//ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldelem.i4
il.Emit(OpCodes.Ldelem_I4);
//box int32
il.Emit(OpCodes.Box, typeof(int));
//call instance object [mscorlib]System.Collections.Hashtable.get_Item(object)
il.EmitCall(OpCodes.Call, hashGetMI, null);
//calli void()
il.EmitCalli(OpCodes.Calli, System.Runtime.InteropServices.CallingConvention.StdCall, null, null);
}
void Zero(ILGenerator il) {
//ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldc.i4.0
il.Emit(OpCodes.Ldc_I4_0);
//stelem.i4
il.Emit(OpCodes.Stelem_I4);
}
Type Compile() {
// .field private static int32 mp
mp = myTypeBldr.DefineField("mp", typeof(int), FieldAttributes.Private | FieldAttributes.Static);
// .field private static int32[] mem
mem = myTypeBldr.DefineField("mem", typeof(Array), FieldAttributes.Private | FieldAttributes.Static);
// .field private static int32 tmp
tmp = myTypeBldr.DefineField("tmp", typeof(int), FieldAttributes.Private | FieldAttributes.Static);
// .method private static int32 main() cil managed
MethodBuilder mainBldr = myTypeBldr.DefineMethod(
"main",
(MethodAttributes)(MethodAttributes.Private | MethodAttributes.Static),
typeof(int),
null
);
ILGenerator il = mainBldr.GetILGenerator();
// ldc.i4 30000
il.Emit(OpCodes.Ldc_I4, 30000);
// newarr [mscorlib]System.Int32
il.Emit(OpCodes.Newarr, typeof(int));
// stsfld int32[] pbout.mem
il.Emit(OpCodes.Stsfld, mem);
// ldc.i4 0
il.Emit(OpCodes.Ldc_I4_0);
// stsfld int32 pbout.mp
il.Emit(OpCodes.Stsfld, mp);
Parse(il);
// ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
// ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
// ldelem.i4
il.Emit(OpCodes.Ldelem_I4);
// ret
il.Emit(OpCodes.Ret);
Type pboutType = myTypeBldr.CreateType();
myAsmBldr.SetEntryPoint(mainBldr);
myAsmBldr.Save(asmFileName);
Console.WriteLine("Assembly saved as '{0}'.", asmFileName);
return pboutType;
}
void Parse(ILGenerator il) {
using (FileStream fs = File.OpenRead(fileName)) {
char c;
int n;
Queue q = new Queue();
while ((n = fs.ReadByte()) != -1) {
c = (char)n;
q.Enqueue(c);
if (c == ':') {
++callCount;
}
}
if (callCount > 0) {
// .field private static object vtbl
vtbl = myTypeBldr.DefineField("vtbl", typeof(Object), FieldAttributes.Private | FieldAttributes.Static);
//newobj instance void [mscorlib]System.Collections.Hashtable..ctor()
Type hashtableType = typeof(System.Collections.Hashtable);
ConstructorInfo constructorInfo = hashtableType.GetConstructor(
(BindingFlags.Instance | BindingFlags.Public),
null,
CallingConventions.HasThis,
System.Type.EmptyTypes,
null
);
il.Emit(OpCodes.Newobj, constructorInfo);
//stsfld object pbout.vtbl
il.Emit(OpCodes.Stsfld, vtbl);
}
Interpret(q, il);
}
}
MethodBuilder Procedure(Queue q) {
Type[] temp0 = { myTypeBldr };
StringBuilder sb = new StringBuilder();
sb.Append("pb_");
sb.Append(methodCount);
String name = sb.ToString();
MethodBuilder procBldr = myTypeBldr.DefineMethod(
name,
(MethodAttributes.Private | MethodAttributes.Static),
null,
System.Type.EmptyTypes
);
ILGenerator il = procBldr.GetILGenerator();
Interpret(q, il);
// ret
il.Emit(OpCodes.Ret);
return procBldr;
}
int CountDuplicates(Queue q, char c) {
int count = 1;
char inst = c;
while (c == inst && q.Count > 0) {
c = (char)q.Peek();
if (c == inst) {
c = (char)q.Dequeue();
++count;
}
}
return count;
}
void Interpret(Queue q, ILGenerator il) {
System.Collections.IEnumerator myEnumerator = q.GetEnumerator();
char c;
byte b;
while (q.Count > 0) {
c = (char)q.Dequeue();
switch (c) {
case '+':
Plus(il, CountDuplicates(q, c));
break;
case '-':
Minus(il, CountDuplicates(q, c));
break;
case '>':
Forward(il, CountDuplicates(q, c));
break;
case '<':
Back(il, CountDuplicates(q, c));
break;
case ',':
Read(il);
break;
case '.':
Write(il);
break;
case '[': {
if (q.Count > 0) {
Queue lq = new Queue();
int nest = 0;
int startPos = q.Count;
bool pair = false;
bool zero = false;
bool opt = true;
// Find the matching ]
while (q.Count > 0) {
c = (char)q.Dequeue();
if (c == '[') {
++nest;
}
else if (c == ']') {
if (nest > 0) {
--nest;
}
else {
pair = true;
break;
}
}
// Check for null loop, [-], which set the current cell
// to zero. There's no need to loop. Just store a zero
// and move on.
else if (opt && c == '-' && (startPos - q.Count) == 1) {
opt = false;
// If the next character is the end of the loop...
if ((char)q.Peek() == ']') {
// Eat the ] and stop the loop
c = (char)q.Dequeue();
zero = true;
break;
}
}
lq.Enqueue(c);
}
if (zero) {
Zero(il);
break;
}
// If no matching ] is found in source block, report error.
if (q.Count != 0 && !pair) {
// throw System.Exception();
}
Label beginLabel = il.DefineLabel();
Label endLabel = il.DefineLabel();
il.MarkLabel(beginLabel);
LoopBegin(il, endLabel);
Interpret(lq, il);
LoopEnd(il, beginLabel);
il.MarkLabel(endLabel);
}
}
break;
case '(': {
// LoopBegin(il, endLabel);
if (q.Count > 0) {
bool pair = false;
Queue lq = new Queue();
int nest = 0;
// Find the matching )
while (q.Count > 0) {
c = (char)q.Dequeue();
if (c == '(') {
++nest;
}
else if (c == ')') {
if (nest > 0) {
--nest;
}
else {
pair = true;
break;
}
}
lq.Enqueue(c);
}
// If no matching ) is found in source block, report error.
if (q.Count != 0 && !pair) {
// throw 5;
}
MethodBuilder procBldr = Procedure(lq);
//ldsfld object pbout.vtbl
il.Emit(OpCodes.Ldsfld, vtbl);
//ldsfld int32[] pbout.mem
il.Emit(OpCodes.Ldsfld, mem);
//ldsfld int32 pbout.mp
il.Emit(OpCodes.Ldsfld, mp);
//ldelem.i4
il.Emit(OpCodes.Ldelem_I4);
//box int32
il.Emit(OpCodes.Box, typeof(int));
//ldftn void pbout.pb_0()
il.Emit(OpCodes.Ldftn, procBldr);
//call instance void [mscorlib]System.Collections.Hashtable.Add(object,object)
il.EmitCall(OpCodes.Call, hashAddMI, null);
}
++methodCount;
}
break;
case ':':
Call(il);
break;
default:
break;
}
}
}
Compiler(String fileNameInit) {
fileName = fileNameInit;
methodCount = 0;
callCount = 0;
asmName = Path.GetFileNameWithoutExtension(fileName);
asmFileName = Path.GetFileName(Path.ChangeExtension(fileName, ".exe"));
AssemblyName myAsmName = new AssemblyName();
myAsmName.Name = asmName;
myAsmBldr = Thread.GetDomain().DefineDynamicAssembly(myAsmName, AssemblyBuilderAccess.RunAndSave);
Type[] temp1 = { typeof(Char) };
writeMI = typeof(Console).GetMethod("Write", temp1);
readMI = typeof(Console).GetMethod("Read");
Type[] temp2 = { typeof(Object), typeof(Object) };
hashAddMI = typeof(System.Collections.Hashtable).GetMethod("Add", temp2);
Type[] temp3 = { typeof(Object) };
hashGetMI = typeof(System.Collections.Hashtable).GetMethod("get_Item", temp3);
// .class private auto ansi pbout extends [mscorlib]System.Object
ModuleBuilder myModuleBldr = myAsmBldr.DefineDynamicModule(asmFileName, asmFileName);
myTypeBldr = myModuleBldr.DefineType(asmName);
}
};
}
The Interpreter
Here is the C++ source code for the interpreter. If you want to compile it with g++ make sure you get version 3.4 or later so it won’t choke on the wchar_t
bits.
I don’t claim that this is a particularly efficient interpreter. I thought of it, designed it, and wrote it one evening/morning between the hours of 9:45 PM and 6:15 AM, in a single session.
This interpreter will also work with traditional Brainf**k code.
/*
Interpreter for the pbrain programming language (procedural Brainf**k)
Copyright(C) Paul M. Parks
All Rights Reserved.
v1.4.3
2004/07/15 12:10
paul@parkscomputing.com
https://www.parkscomputing.com/
The syntax is the same as traditional Brainf**k, with the following
symbols added:
(
Begin procedure
)
End procecure
:
Call procedure identified by the value at the current location
Procedures are identified by numeric ID:
+([-])
Assuming the current location is zero, defines a procedure number 1 that
sets the current location to zero when called.
++(<<[>>+<<-]>[>+<-]>)
Assuming the current location is zero, defines a procedure number 2 that
accepts two parameters. It adds parameter 1 and parameter 2 and places
the result in the location that was current when the procedure was
called, zeroing out parameters 1 and 2 in the process.
+++([-]>++++++++++[<++++>-]<++++++++>[-]++:.)
Assuming the current location is zero, defines a procedure 3 that prints
the ASCII equivalent of the numeral at the current location, between 0
and 9.
+++>+++++>++:
Calls procedure 2, passing in parameters 3 and 5.
All of the above examples may be combined into the program below. Note that
the procedures are numbered 1, 2, and 3 because the current location is
incremented prior to each procedure definition.
+([-])
+(-:<<[>>+<<-]>[>+<-]>)
+([-]>++++++++++[<++++>-]<++++++++>[-]++:.)
>+++>+++++>++:
>+++:
An error condition is reported with a short diagnostic to stderr and an
error number returned from the executable. Errors reported by the
interpreter are as follows:
1 - Out of memory
2 - Unknown procedure
3 - Memory address out of range
4 - Cannot find matching ] for beginning [
999 - Unknown exception
*/
#include <vector>
#include <iostream>
#include <fstream>
#include <iterator>
#include <map>
#if defined(_MSC_VER)
#pragma warning(disable: 4571)
#endif
// Define the type contained in the memory array
#ifndef PBRAIN_MEM_TYPE
#define PBRAIN_MEM_TYPE int
#endif
// Define the character input/output type.
#ifndef PBRAIN_CHARACTER_TYPE
#define PBRAIN_CHARACTER_TYPE wchar_t
#endif
// Set the initial size of the memory array, if not defined externally.
#ifndef PBRAIN_INIT_MEM_SIZE
#define PBRAIN_INIT_MEM_SIZE 30000
#endif
// By default, use a dynamic array to store memory locations.
#ifndef PBRAIN_STATIC_MEMORY
typedef std::vector<pbrain_mem_type> Mem;
Mem mem(PBRAIN_INIT_MEM_SIZE);
Mem::size_type mp = 0;
#else
PBRAIN_MEM_TYPE mem[PBRAIN_INIT_MEM_SIZE];
size_t mp = 0;
#endif
// Placeholder template class to be specialized below.
template<typename Ch> struct io_types {};
// Define appropriate I/O and stream iterator types for working with byte
// characters.
template<> struct io_types<char> {
static std::istream& cin;
static std::ostream& cout;
typedef std::basic_ifstream<char, std::char_traits<char> > ifstream;
typedef std::istream_iterator<char, char> istream_iterator;
};
std::istream& io_types<char>::cin = std::cin;
std::ostream& io_types<char>::cout = std::cout;
// Define appropriate I/O and stream iterator types for working with wide
// characters.
template<> struct io_types<wchar_t> {
static std::wistream& cin;
static std::wostream& cout;
typedef std::basic_ifstream<wchar_t, std::char_traits<wchar_t> > ifstream;
typedef std::istream_iterator<wchar_t, wchar_t> istream_iterator;
};
std::wistream& io_types<wchar_t>::cin = std::wcin;
std::wostream& io_types<wchar_t>::cout = std::wcout;
// Useful type that chooses the appropriate typedefs for the character width
typedef io_types<pbrain_character_type> io;
// Type for storing a string of instructions; used for procedures and loops
typedef std::vector<pbrain_character_type> SourceBlock;
// Type for storing procedures indexed by number
typedef std::map<pbrain_mem_type, std::vector<PBRAIN_CHARACTER_TYPE> > Procedures;
// Map of procedure IDs to procedures
Procedures procedures;
// Interpret a container of instructions
template<typename It> void interpret(It ii, It eos)
{
while (ii != eos) {
switch (*ii) {
case '+':
++mem[mp];
break;
case '-':
--mem[mp];
break;
case '>':
++mp;
#ifndef PBRAIN_STATIC_MEMORY
// If memory is kept in a dynamic array, the array will grow as
// needed.
try {
if (mp == mem.size()) {
mem.resize(mem.size() * 2);
}
}
catch (...) {
// Ostensibly an out-of-memory condition.
throw 1;
}
#else
// Static memory cannot grow, so throw when limit reached
if (mp == sizeof(mem) / sizeof(PBRAIN_MEM_TYPE)) {
throw 1;
}
#endif
break;
case '<':
--mp;
// Throw out-of-range error if cell location is decremented below 0
if (static_cast<int>(mp) < 0) {
throw 3;
}
break;
case '.':
io::cout.put(static_cast<PBRAIN_CHARACTER_TYPE>(mem[mp]));
break;
case ',':
mem[mp] = static_cast<pbrain_mem_type>(io::cin.get());
break;
case '[':
// Move to first instruction in the loop
++ii;
{
int nest = 0;
It begin = ii;
// Find the matching ]
while (ii != eos) {
if (*ii == '[') {
++nest;
}
else if (*ii == ']') {
if (nest) {
--nest;
}
else {
break;
}
}
++ii;
}
// If no matching ] is found in source block, report error.
if (ii == eos) {
throw 4;
}
// At this point the iterator will point at the matching ]
// character, which is one instruction past the end of the range
// of instructions to be processed in a loop.
loop(begin, ii);
}
break;
case '(':
++ii;
{
SourceBlock sourceBlock;
while (ii != eos && *ii != ')') {
sourceBlock.push_back(*ii);
++ii;
}
procedures.insert(std::make_pair(mem[mp], sourceBlock));
}
break;
case ':':
{
// Look up the source block that matches the value at the current
// location. If found, execute it.
Procedures::iterator i = procedures.find(mem[mp]);
if (i != procedures.end()) {
interpret(i->second.begin(), i->second.end());
}
else {
throw 2;
}
}
break;
default:
break;
}
++ii;
}
}
template<typename It> void loop(It ii, It eos)
{
// Interpret instructions until the value in the current memory location
// is zero
while (mem[mp]) {
interpret(ii, eos);
}
}
template<typename C> void parse(C& c)
{
io::istream_iterator ii(c);
io::istream_iterator eos;
SourceBlock sourceBlock;
// Copy instructions from the input stream to a source block.
while (ii != eos) {
sourceBlock.push_back(*ii);
++ii;
}
// Execute the instructions in the source block
interpret(sourceBlock.begin(), sourceBlock.end());
}
int main(int argc, char** argv)
try {
// Read from a file if a filename is provided as an argument.
if (argc > 1) {
io::ifstream source(argv[1]);
if (source.is_open()) {
parse(source);
}
}
// Otherwise interpret code from stdin
else {
parse(io::cin);
}
}
catch (int e) {
std::cerr << "Error " << e << ", cell " << unsigned int(mp) << "\n";
exit(e);
}
catch (...) {
std::cerr << "Error " << 999 << ", cell " << unsigned int(mp) << "\n";
exit(999);
}