Asynchronous Processor
From Lyra
Contents |
Introduction
This example presents the modeling of a MIPS like simple processor using asynchronous design style. The dataflow of the processor shown below is exactly the same as the previous synchronous one.
Modeling
The asynchronous model has a half-buffered 5-stage pipeline. Every stage is modeled with a two-state process. The rendezvous used to communicate with the previous and the next stages are decoupled as two transitions. The way of handling data hazards and control hazards are exactly the same as the synchronous processor.
The following graphical representation of the processes can help readers visualize the code. Some of the transition edges are omitted in the figure for easy visualization and description.
CPU
module cpu (oi < uint<10>, uint<32> > fetch,
oi < uint<10>, uint<32> > load,
out <(uint<10>, uint<32>)> store)
{
rendv decode;
rendv execute;
rendv access_mem;
rendv write_back;
rendv rs, rt;
barrier rd;
barrier jump(10);
reg uint<10> pc;
reg uint<32> instr;
reg (uint<6>, uint<32>, uint<32>, uint<5>, uint<5>, uint<5>, uint<10>) dec_reg;
reg (uint<6>, uint<32>, uint<5>, uint<10>) exe_reg;
reg uint<5> exe_rs, exe_rt;
reg (uint<6>, uint<32>, uint<5>) mem_reg;
reg uint<6> wb_opcode;
reg uint<32> wb_data;
reg uint<5> wb_rd_addr;
init
{
pc = 0;
}
fsm IF_stage(out < uint<32> > decode, in < uint<10> > jump)
{
init S0:
{
when (fetch)
{
fetch.write(pc);
when (jump)
{
instr = 0;
pc = jump.read();
goto S0;
}
instr = fetch.read();
pc = pc + 1;
goto S1;
}
}
state S1:
{
when (decode)
{
decode.write(instr);
when (jump)
{
instr = 0;
pc = jump.read();
goto S0;
}
goto S0;
}
when (jump)
{
instr = 0;
pc = jump.read();
goto S0;
}
}
}
fsm ID_stage(in < uint<32> > decode, in <uint<10> > jump, in <(uint<5>, uint<32>) >rd,
out <(uint<6>, uint<32>, uint<32>, uint<5>, uint<5>, uint<5>, uint<10>)> execute,
oi <uint<5>, uint<32> > rs, oi <uint<5>, uint<32> > rt)
{
init S0:
{
val (uint<5>, uint<32>) bypass;
val uint<32> opa, opb;
when (decode)
{
when (jump)
{
dec_reg = (0, 0, 0, 0, 0, 0, 0);
goto S0;
}
when (rs, rt)
{
val uint<6> opcode;
val uint<32> inst;
inst = decode.read();
opcode = inst{31:26};
rs.write(inst{25:21});
rt.write(inst{20:16});
when (rd)
{
bypass = rd.read();
opa = (bypass.$1 == inst{25:21}) ? bypass.$2 : rs.read();
opb = (bypass.$1 == inst{20:16}) ? bypass.$2 : rt.read();
dec_reg = (opcode, opa, opb, inst{25:21}, inst{20:16}, inst{15:11}, inst{9:0});
goto S1;
}
dec_reg = (opcode, rs.read(), rt.read(), inst{25:21}, inst{20:16},
inst{15:11}, inst{9:0});
goto S1;
}
}
}
state S1:
{
when(execute)
{
execute.write(dec_reg);
when (jump)
{
dec_reg = (0, 0, 0, 0, 0, 0, 0);
goto S0;
}
goto S0;
}
}
}
fsm EX_stage (in <(uint<6>, uint<32>, uint<32>, uint<5>, uint<5>, uint<5>, uint<10>)> execute,
out <(uint<6>, uint<32>, uint<5>, uint<10>)> access_mem, out <uint<10> > jump)
{
val uint<6> opcode;
val uint<32> opa, opb;
val uint<32> va, vb, res;
val uint<5> rs_addr, rt_addr, rd_addr;
val uint<10> imm;
init S0:
{
when (execute)
{
opcode = execute.read().$1;
rs_addr = execute.read().$4;
rt_addr = execute.read().$5;
rd_addr = execute.read().$6;
imm = execute.read().$7;
//data forwarding
opa = (exe_reg.$1 !=4 && exe_reg.$1 != 5 && exe_reg.$1 != 0 &&
exe_reg.$3 == rs_addr) ? exe_reg.$2 : execute.read().$2;
opb = (exe_reg.$1 !=4 && exe_reg.$1 != 5 && exe_reg.$1 != 0 &&
exe_reg.$3 == rt_addr) ? exe_reg.$2 : execute.read().$3;
va = (mem_reg.$1 != 4 && mem_reg.$1 != 5 && exe_reg.$1 != 0 &&
mem_reg.$3 == rs_addr && exe_reg.$3 != rs_addr) ? mem_reg.$2 : opa;
vb = (mem_reg.$1 != 4 && mem_reg.$1 != 5 && exe_reg.$1 != 0 &&
mem_reg.$3 == rt_addr && exe_reg.$3 != rt_addr) ? mem_reg.$2 : opb;
exe_reg = (opcode, res, rd_addr, imm);
exe_rs = rs_addr;
exe_rt = rt_addr;
switch (opcode)
{
case 1://add
{
res = va + vb;
goto S1;
}
case 2: //sub
{
res = va - vb;
goto S1;
}
case 3: //load
{
res = 0;
goto S1;
}
case 4: //store
{
res = vb;
goto S1;
}
case 5: //beq
{
when (jump, va == vb)
{
jump.write(imm);
res = 0;
goto S1;
}
when (va != vb)
{
res = 0;
goto S1;
}
}
case 6: //multiply
{
res = va * vb;
goto S2;
}
default:
{
res = 0;
goto S1;
}
}
}
}
state S1:
{
when (access_mem)
{
access_mem.write(exe_reg);
goto S0;
}
}
state S2:
{
goto S3;
}
state S3:
{
goto S1;
}
}
fsm MEM_stage (in <(uint<6>, uint<32>, uint<5>, uint<10>)> access_mem,
out <(uint<6>, uint<32>, uint<5>)> write_back)
{
val uint<6> opcode;
val uint<32> res;
val uint<32> wbdata;
val uint<5> rd_addr;
val uint<10> imm;
init S0:
{
when (access_mem)
{
(opcode, res, rd_addr, imm) = access_mem.read();
mem_reg = (opcode, wbdata, rd_addr);
switch (opcode)
{
case 3: //load
when (load)
{
load.write(imm);
wbdata = load.read();
goto S1;
}
case 4: //store
when (store)
{
wbdata = 0;
store.write(imm, res);
goto S1;
}
default:
{
wbdata = res;
goto S1;
}
}
}
}
state S1:
{
when (write_back)
{
write_back.write(mem_reg);
goto S0;
}
}
}
fsm WB_stage (in <(uint<6>, uint<32>, uint<5>)> write_back, out <(uint<5>, uint<32>)> rd)
{
init S0:
{
when (write_back)
{
(wb_opcode, wb_data, wb_rd_addr) = write_back.read();
goto S1;
}
}
state S1:
{
switch (wb_opcode)
{
case 1:
when (rd)
{
rd.write(wb_rd_addr, wb_data);
goto S0;
}
case 2:
when (rd)
{
rd.write(wb_rd_addr, wb_data);
goto S0;
}
case 3:
when (rd)
{
rd.write(wb_rd_addr, wb_data);
goto S0;
}
case 6:
when (rd)
{
rd.write(wb_rd_addr, wb_data);
goto S0;
}
default: goto S0;
}
}
}
regfile reg_file(rs, rt, rd);
}
module regfile
(io <uint<5>, uint<32> > rs,
io <uint<5>, uint<32> > rt,
in <(uint<5>, uint<32>)> rd)
{
/*state variables*/
reg uint<32>[32] regfile; //32 32-bit registers
init
{
regfile = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
}
fsm rs_read {
init idle:
when (rs)
{
rs.write(regfile[rs.read()]);
goto idle;
}
}
fsm rt_read {
init idle:
when (rt)
{
rt.write(regfile[rt.read()]);
goto idle;
}
}
fsm rd_write {
init idle:
when (rd)
{
val (uint<5>, uint<32>) vrd;
vrd = rd.read();
regfile[vrd.$1] = vrd.$2;
goto idle;
}
}
}
Memory
module memory ( io <uint<10>, uint<32> > fetch,
io <uint<10>, uint<32> > load,
in <(uint<10>,uint<32>)> store)
{
reg uint<32>[1024] ram;
init
{
/*
sub r0, r0, r0;
load r1, [128]; #1
load r2, [129]; #11
load r3, [130]; #1
loop:
add r0, r0, r1;
add r1, r1, r3;
mul r4, r3, r3;
beq r1, r2, finish;
beq r0, r0, loop;
finish:
store r0, [131]
beq r0, r0, finish;
*/
ram[0] = 32'd2 << 26;
ram[1] = (32'd3 << 26) | (32'd1 << 11) | 128;
ram[2] = (32'd3 << 26) | (32'd2 << 11) | 129;
ram[3] = (32'd3 << 26) | (32'd3 << 11) | 130 ;
ram[4] = (32'd1 << 26) | (32'd1 << 16);
ram[5] = (32'd1 << 26) | (32'd1 << 21) | (32'd3 << 16) | (32'd1 << 11);
ram[6] = (32'd6 << 26) | (32'd3 << 21) | (32'd3 << 16) | (32'd4 << 11);
ram[7] = (32'd5 << 26) | (32'd1 << 21) | (32'd2 << 16) | 9;
ram[8] = (32'd5 << 26) | 4;
ram[9] = (32'd4 << 26) | 131;
ram[10] = (32'd5 << 26) | 9;
ram[128] = 1;
ram[129] = 11;
ram[130] = 1;
}
fsm inst
{
val uint<10> address; //10 bit value for address
init ready: //->in_read:
/* memory read begin */
when (fetch)
{
address = fetch.read();
fetch.write(ram[address]);
goto ready;
}
}
fsm data
{
val uint<10> address; //10 bit value for address
val uint<32> inval;
init ready: //->in_read:
/* memory read begin */
when (load)
{
address = load.read();
load.write(ram[address]);
goto ready;
}
when (store)
{
(address, inval) = store.read();
ram[address] = inval;
goto ready;
}
}
}
toplevel
#include "librfsm.rfm"
#include "cpu.rfm"
#include "memory.rfm"
module toplevel
{
rendv fetch, load, store;
cpu mycpu(fetch, load, store);
memory mymem(fetch, load, store);
}



