-
Simulation of 8-bit increment
08/17/2018 at 06:16 • 0 commentsThis is test3.cpp program:
#include <TRCMath.hpp> #undef DEBUG #include <time.h> #include <stdio.h> #include <stdlib.h> using namespace std; using namespace TRC; class HalfAdder : public Entity { protected: // indecies: int iA,iB,iS,iC; // inputs: Signal A,B; // outputs: Signal S,C; public: HalfAdder(const char* s) : Entity(s) { // empty constructor for generic unit } void step() { A = io(iA).read(); B = io(iB).read(); if(A==TRUE && B==TRUE) S = FALSE; else if(A==FALSE && B==TRUE) S = TRUE; else if(A==TRUE && B==FALSE) S = TRUE; else // if(A==FALSE && B==FALSE) S = FALSE; if(A==TRUE && B==TRUE) C = TRUE; else C = FALSE; // cout << name() << ":" << A << B << "->" << C << S << endl; io(iS) << S; io(iC) << C; } }; class World : public Entity { // indecies: int i_increment,i_input,i_output,i_carry; public: // internal counter: long long counter; World() : Entity("World") { i_increment = at("INC"); i_input = at("I",8); i_output = at("O",8); i_carry = at("COUT"); counter = 0; } void step() // World works backwards - read outputs and write inputs { Wire<9> vec; // temporary vector vec[8] = io(i_output+0).read(); vec[7] = io(i_output+1).read(); vec[6] = io(i_output+2).read(); vec[5] = io(i_output+3).read(); vec[4] = io(i_output+4).read(); vec[3] = io(i_output+5).read(); vec[2] = io(i_output+6).read(); vec[1] = io(i_output+7).read(); vec[0] = io(i_carry).read(); #ifdef DEBUG cout << "Case " << counter << " output=" << vec.binarize() << endl; #endif io(i_increment) << TRUE; /* below we change inputs every 16 steps */ io(i_input+0) << ((counter&(1<<4))?TRUE:FALSE); io(i_input+1) << ((counter&(1<<5))?TRUE:FALSE); io(i_input+2) << ((counter&(1<<6))?TRUE:FALSE); io(i_input+3) << ((counter&(1<<7))?TRUE:FALSE); io(i_input+4) << ((counter&(1<<8))?TRUE:FALSE); io(i_input+5) << ((counter&(1<<9))?TRUE:FALSE); io(i_input+6) << ((counter&(1<<10))?TRUE:FALSE); io(i_input+7) << ((counter&(1<<11))?TRUE:FALSE); counter++; } }; unsigned char BYTE = 0; int main() { System *sys = System::getInstance(); World world; INSTANCE(HalfAdder,0); iA = at("INC"); iB = at("I[0]"); iS = at("O[0]"); iC = at("C0"); NAMED(ha0); INSTANCE(HalfAdder,1); iA = at("C0"); iB = at("I[1]"); iS = at("O[1]"); iC = at("C1"); NAMED(ha1); INSTANCE(HalfAdder,2); iA = at("C1"); iB = at("I[2]"); iS = at("O[2]"); iC = at("C2"); NAMED(ha2); INSTANCE(HalfAdder,3); iA = at("C2"); iB = at("I[3]"); iS = at("O[3]"); iC = at("C3"); NAMED(ha3); INSTANCE(HalfAdder,4); iA = at("C3"); iB = at("I[4]"); iS = at("O[4]"); iC = at("C4"); NAMED(ha4); INSTANCE(HalfAdder,5); iA = at("C4"); iB = at("I[5]"); iS = at("O[5]"); iC = at("C5"); NAMED(ha5); INSTANCE(HalfAdder,6); iA = at("C5"); iB = at("I[6]"); iS = at("O[6]"); iC = at("C6"); NAMED(ha6); INSTANCE(HalfAdder,7); iA = at("C6"); iB = at("I[7]"); iS = at("O[7]"); iC = at("COUT"); NAMED(ha7); unsigned long t1,t2; int i,n = 100000000; t1 = clock(); for(i=0;i<n;i++) { BYTE++; BYTE++; BYTE++; BYTE++; BYTE++; BYTE++; BYTE++; BYTE++; BYTE++; BYTE++; } t2 = clock(); printf("BYTE=0x%2.2X (%6.6fs or %2.2fns per increment)\n",BYTE, (double)(t2-t1)/CLOCKS_PER_SEC, (double)(t2-t1)/(n/1e8)/CLOCKS_PER_SEC /* 1e8 because we have 10 increments per iteration */ ); n = 0x100000; t1 = clock(); while(world.counter!=n) { sys->prepare(); ha0.step(); ha1.step(); ha2.step(); ha3.step(); ha4.step(); ha5.step(); ha6.step(); ha7.step(); world.step(); } t2 = clock(); printf("%4.4fs or %2.2fns per step\n", (double)(t2-t1)/CLOCKS_PER_SEC, (double)(t2-t1)/(n/1e9)/CLOCKS_PER_SEC ); }
Output:
INC <- World (idx=0) I[0] <- World (idx=1) I[1] <- World (idx=2) I[2] <- World (idx=3) I[3] <- World (idx=4) I[4] <- World (idx=5) I[5] <- World (idx=6) I[6] <- World (idx=7) I[7] <- World (idx=8) O[0] <- World (idx=9) O[1] <- World (idx=10) O[2] <- World (idx=11) O[3] <- World (idx=12) O[4] <- World (idx=13) O[5] <- World (idx=14) O[6] <- World (idx=15) O[7] <- World (idx=16) COUT <- World (idx=17) INC <- HalfAdder0 (idx=0) I[0] <- HalfAdder0 (idx=1) O[0] <- HalfAdder0 (idx=9) C0 <- HalfAdder0 (idx=18) C0 <- HalfAdder1 (idx=18) I[1] <- HalfAdder1 (idx=2) O[1] <- HalfAdder1 (idx=10) C1 <- HalfAdder1 (idx=19) C1 <- HalfAdder2 (idx=19) I[2] <- HalfAdder2 (idx=3) O[2] <- HalfAdder2 (idx=11) C2 <- HalfAdder2 (idx=20) C2 <- HalfAdder3 (idx=20) I[3] <- HalfAdder3 (idx=4) O[3] <- HalfAdder3 (idx=12) C3 <- HalfAdder3 (idx=21) C3 <- HalfAdder4 (idx=21) I[4] <- HalfAdder4 (idx=5) O[4] <- HalfAdder4 (idx=13) C4 <- HalfAdder4 (idx=22) C4 <- HalfAdder5 (idx=22) I[5] <- HalfAdder5 (idx=6) O[5] <- HalfAdder5 (idx=14) C5 <- HalfAdder5 (idx=23) C5 <- HalfAdder6 (idx=23) I[6] <- HalfAdder6 (idx=7) O[6] <- HalfAdder6 (idx=15) C6 <- HalfAdder6 (idx=24) C6 <- HalfAdder7 (idx=24) I[7] <- HalfAdder7 (idx=8) O[7] <- HalfAdder7 (idx=16) COUT <- HalfAdder7 (idx=17) BYTE=0x00 (2.308623s or 2.31ns per increment) 1.7257s or 1645.74ns per step
Here it's also measuring performance to compare with native increment of BYTE (unsigned char) that was 2.31ns per increment on my Linux AMD64 machine. Because in worst case propagation delay for carry may take up to 8 steps of simulation (below is partial DEBUG output with switch from 011111111 to 100000000 binary or 255+1=256 where 9th bit is overflow bit):
... Case 1048561 output=011111111 Case 1048562 output=011111110 Case 1048563 output=011111100 Case 1048564 output=011111000 Case 1048565 output=011110000 Case 1048566 output=011100000 Case 1048567 output=011000000 Case 1048568 output=010000000 Case 1048569 output=100000000 ...
we may tell that 1 increment takes 8 steps of simulations so 1646ns per step is 13168ns per byte increment or 5700 times slower of native BYTE increment, but if we try optimizations options we get:
-O1: 0.2054s or 195.87ns per step -O2: 0.2087s or 199.00ns per step -O3: 0.1822s or 173.73ns per step
This is a little better - 600 times slower :)
From other point of view it is about 720 thousands 8-bit increments per second! Fully emulated bit by bit ;)
Or 5.78 millions steps of simulation of 18 connection points (so it's about 100 millions connection points per second per core and potentially it could be easily parallelized to utilize all cores of your PC)...
-
Different instances of the same unit
08/15/2018 at 07:24 • 0 commentsOk, it may look a little tricky - in the beginning we should create a generic unit class that we want to clone multiple times:
class HalfAdder : public Entity { protected: // indecies: int iA,iB,iS,iC; // inputs: Signal A,B; // outputs: Signal S,C; public: HalfAdder(const char* s) : Entity(s) { // empty constructor for generic unit } void step() { A = io(iA).read(); B = io(iB).read(); if(A==TRUE && B==TRUE) S = FALSE; else if(A==FALSE && B==TRUE) S = TRUE; else if(A==TRUE && B==FALSE) S = TRUE; else // if(A==FALSE && B==FALSE) S = FALSE; if(A==TRUE && B==TRUE) C = TRUE; else C = FALSE; // cout << name() << ":" << A << B << "->" << C << S << endl; io(iS) << S; io(iC) << C; } };
Then in the program (inside function main) we will create actual implementations (3 instances):
class HalfAdder0 : public HalfAdder { public: HalfAdder0() : HalfAdder("HalfAdder0") { iA = at("INC"); iB = at("I[0]"); iS = at("O[0]"); iC = at("C0"); } } ha0; class HalfAdder1 : public HalfAdder { public: HalfAdder1() : HalfAdder("HalfAdder1") { iA = at("C0"); iB = at("I[1]"); iS = at("O[1]"); iC = at("C1"); } } ha1; class HalfAdder2 : public HalfAdder { public: HalfAdder2() : HalfAdder("HalfAdder2") { iA = at("C1"); iB = at("I[2]"); iS = at("O[2]"); iC = at("C2"); } } ha2;
As you can see 3 half-adders connected to perform optional increment (INC=1) of 3-bit number.
Controls will be done by special unit World that simulate outside world (so all inputs are outputs and outputs are inputs from world's point of view):
class World : public Entity { // indecies: int i_increment,i_input,i_output,i_carry; // internal counter: int counter; public: World() : Entity("World") { i_increment = at("INC"); i_input = at("I",3); i_output = at("O",3); i_carry = at("C2"); counter = 0; } void step() // test cases { Wire<4> vec; // temporary vector vec[0] = io(i_output+0).read(); vec[1] = io(i_output+1).read(); vec[2] = io(i_output+2).read(); vec[3] = io(i_carry).read(); cout << "Case " << counter << " output=" << vec << endl; switch(counter++) { case 0: case 1: case 2: case 3: io(i_increment) << FALSE; io(i_input+0) << TRUE; io(i_input+1) << TRUE; io(i_input+2) << TRUE; break; case 4: case 5: case 6: case 7: io(i_increment) << TRUE; io(i_input+0) << TRUE; io(i_input+1) << TRUE; io(i_input+2) << TRUE; break; case 8: case 9: case 10: case 11: io(i_increment) << TRUE; io(i_input+0) << FALSE; io(i_input+1) << FALSE; io(i_input+2) << FALSE; break; } } };
Object of class World ( World world; ) should be created before HalfAdders to make sure that input/output vectors are attached to global circuits before units that attach itself to separate bits of those vectors (otherwise indices will not be connected properly). Main simulation loop will look like this:
for(int i=0;i<=12;i++) { sys->prepare(); ha0.step(); ha1.step(); ha2.step(); world.step(); }
Order of calling step-methods of the objects is not actually matter, because all changes on outputs are done in different "dimension" that becomes available for inputs only after call sys->prepare() that shifts "time axis" 1 step forward to the future.
And this is output:
INC <- World (idx=0) I[0] <- World (idx=1) I[1] <- World (idx=2) I[2] <- World (idx=3) O[0] <- World (idx=4) O[1] <- World (idx=5) O[2] <- World (idx=6) C2 <- World (idx=7) INC <- HalfAdder0 (idx=0) I[0] <- HalfAdder0 (idx=1) O[0] <- HalfAdder0 (idx=4) C0 <- HalfAdder0 (idx=8) C0 <- HalfAdder1 (idx=8) I[1] <- HalfAdder1 (idx=2) O[1] <- HalfAdder1 (idx=5) C1 <- HalfAdder1 (idx=9) C1 <- HalfAdder2 (idx=9) I[2] <- HalfAdder2 (idx=3) O[2] <- HalfAdder2 (idx=6) C2 <- HalfAdder2 (idx=7) Case 0 output=ZZZZ Case 1 output=NNNN Case 2 output=PPPN Case 3 output=PPPN Case 4 output=PPPN Case 5 output=PPPN Case 6 output=NPPN Case 7 output=NNPN Case 8 output=NNNP Case 9 output=NNNP Case 10 output=PPPN Case 11 output=PNNN Case 12 output=PNNN
It's interesting that from 0111 to 1000 it took 3 cycles to calculate 0111+1 because half adders are connected sequentially and carry goes through all 3 of them in 3 simulation cycles :)
Source code of this test program: https://gitlab.com/ternary/trcm/blob/master/tests/test2.cpp
P.S. Some idea how to make life easier - we can hide some repeating things behind macros so instead:
class HalfAdder0 : public HalfAdder { public: HalfAdder0() : HalfAdder("HalfAdder0") { iA = at("INC"); iB = at("I[0]"); iS = at("O[0]"); iC = at("C0"); } } ha0;
it might be
INSTANCE(HalfAdder,0); iA = at("INC"); iB = at("I[0]"); iS = at("O[0]"); iC = at("C0"); NAMED(ha0);
where used 2 pre-defined macros:
#define STRING(s) #s #define INSTANCE(x,y) class x##y : public x { public: x##y() : x(STRING(x##y)) { #define NAMED(z) }}z
but it may look like C hack above C++ :)
-
Creating Entities
08/12/2018 at 20:58 • 0 commentsIn order to make independent blocks that communicate with each other user should create derived class from class Entity for every such block:
class Unit1 : public Entity { protected: // indecies: int i0,i1,i2; // inputs: Wire<8> net; Uint<8> bus; // outputs: Signal sig; public: Unit1() { // attach your entity to global wires i0 = at("bus",8,PULLUP); i1 = at("network",8); i2 = at("single"); } void step() { // read your inputs for(int i=0;i<8;i++) bus[i] = io(i0+i).read(); for(int i=0;i<8;i++) net[i] = io(i1+i).read(); // do something sig[0] = TRUE; // apply your outputs io(i0) << sig[0]; io(i0+1) << sig[0]; io(i2) << sig[0]; // additional trigger logic if(posedge(i1+5)) { // here we can have logic that should work on positive edge of network[5] } } };
In future method step() of each user entity (that executes 1 step of simulation) will run in a separate thread...
Source code is available on GitLab: https://gitlab.com/ternary/trcm
-
Data Types
08/12/2018 at 05:09 • 0 commentsI started with a few template classes:
Wire<N> - generic wires (base class for everything else)
Uint<N> - unsigned N-bit integer
Sint<N> - signed N-bit integer
Tint<N> - balanced ternary N-trit integer
Also there is class Signal that is the same thing as Wire<1> (a single wire)
Every wire may be in one of the states listed below:
NC - not connected (internally represented by character 'Z')
TRUE - connected to positive power line (internally represented by character 'P')
FALSE - connected to negative power line (internally represented by character 'N')
MAYBE - intermediate state to simulate ternary logic (internally represented by character 'O')
PULLUP - weak pull-up to positive (character '1')
PULLDOWN - weak pull-down to negative (character '0')
INVALID - invalid state that prevents simulated circuit to work properly (character '?')
In future I want to support fixed point and floating point numbers (but not right now).
Sample code to test basic types:
#include "TRCMath.hpp" using namespace std; using namespace TRC; int main() { Wire<5> a; Wire<32> b; Wire<1> signal,out; Signal signal2; Uint<32> u; Sint<32> s; s[0] = TRUE; b[31] = FALSE; s = b&&u; signal = MAYBE; signal2 = TRUE; out = signal & signal2; cout << "signal=" << signal << endl; cout << "signal2=" << signal2 << endl; cout << "out=" << out << endl; cout << "a=" << a << endl; cout << "b=" << b << endl; cout << "u=" << u << endl; cout << "s=" << s << endl; }
Objects Wire<N> will support only logical operations and all integer simulated types will support also arithmetic. Above you can see how operator && (bitwise AND) was applied to two Wire<32> objects and operator & (logical AND) was applied to two Signal objects.
Source code is available under GPLv3 on GitLab (it is still work in progress):
https://gitlab.com/ternary/trcm/blob/master/TRCMath.hpp
Now I'm thinking about the way to support reliable multiple state machine simulation (with ability to run simulation concurrently to occupy all available cores of host PC).