Skip to content

Commit

Permalink
fixed reordering pcie dma
Browse files Browse the repository at this point in the history
  • Loading branch information
sangwoojun committed Jul 8, 2020
1 parent 3c1f984 commit ebce948
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 65 deletions.
33 changes: 13 additions & 20 deletions examples/streaming/HwMain.bsv
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ module mkHwMain#(PcieUserIfc pcie)

FIFO#(DMAWord) inputQ <- mkSizedBRAMFIFO(512); // 8KBs
FIFO#(DMAWord) outputQ <- mkSizedBRAMFIFO(512); // 8KBs
Reg#(Bit#(10)) outputCntUp <- mkReg(0);
Reg#(Bit#(10)) outputCntDn <- mkReg(0);
Reg#(Bit#(16)) outputCntUp <- mkReg(0);
Reg#(Bit#(16)) outputCntDn <- mkReg(0);

StreamKernelIfc kernel <- mkStreamKernelTest;
DeSerializerIfc#(128, 2) des <- mkDeSerializer;
Expand Down Expand Up @@ -99,42 +99,35 @@ module mkHwMain#(PcieUserIfc pcie)
pcie.dataSend(req,truncate(r));
endrule

Reg#(Bit#(16)) readWordsLeft <- mkReg(0);
rule dmaReadReq ( readWordsLeft == 0 );
rule dmaReadReq;
streamReadQ.deq;
let poff = streamReadQ.first;
pcie.dmaReadReq( (zeroExtend(poff)<<10), 64); // offset, words
readWordsLeft <= 64;
streamReadCnt <= streamReadCnt + (1<<24);
//$write("DMA Read req\n" );
endrule
//Reg#(Bit#(6)) offset <- mkReg(0);
rule dmaReadData (readWordsLeft != 0 );
rule dmaReadDatal;
DMAWord rd <- pcie.dmaReadWord;
//$write("+++ %x\n", rd.word);
//offset <= offset + 1;
page.portA.request.put(BRAMRequest{write:True,responseOnWrite:False,address:truncate(streamReadCnt),datain:rd});
readWordsLeft <= readWordsLeft - 1;
inputQ.enq(rd);
//$write("DMA Read\n" );
streamReadCnt <= streamReadCnt + 1;

inputQ.enq(rd);
endrule

Reg#(Bit#(10)) curOutLeft <- mkReg(0);
rule dmaWriteReq (outputCntUp - outputCntDn >= 64 && curOutLeft == 0);
Reg#(Bit#(16)) curOutLeftUp <- mkReg(0);
Reg#(Bit#(16)) curOutLeftDn <- mkReg(0);
rule dmaWriteReq (outputCntUp - outputCntDn >= 64 && curOutLeftUp-curOutLeftDn < 128);
streamWriteQ.deq;
let woff = streamWriteQ.first;
pcie.dmaWriteReq((zeroExtend(woff)<<10), 64);

////outputQ.deq;
////pcie.dmaWriteData(outputQ.first);
curOutLeft <= 64;
curOutLeftUp <= curOutLeftUp + 64;
outputCntDn <= outputCntDn + 64;
$write("Starting DMA Write\n" );
streamWriteCnt <= streamWriteCnt + (1<<24);
endrule
rule dmaWriteData(curOutLeft != 0);
curOutLeft <= curOutLeft - 1;
rule dmaWriteData(curOutLeftUp != curOutLeftDn);
curOutLeftDn <= curOutLeftDn + 1;

//outputCntDn <= outputCntDn + 1;

outputQ.deq;
Expand Down
28 changes: 11 additions & 17 deletions examples/streaming/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ int main(int argc, char** argv) {

//uint8_t* dmabuf = (uint8_t*)dma->dmaBuffer();
uint8_t* dmabuf = (uint8_t*)pcie->dmaBuffer();
for ( uint32_t i = 0; i < 8*1024/4; i++ ) {
((uint32_t*)dmabuf)[i] = 0;
for ( uint32_t i = 0; i < 32*1024/4; i++ ) {
((uint32_t*)dmabuf)[i] = 0xcc;
//dmabuf[i] = (char)i;
}
for ( uint32_t i = 0; i < 4*1024/4; i++ ) {
Expand All @@ -51,32 +51,26 @@ int main(int argc, char** argv) {
for ( int i = 0; i < 32; i++ ) {
printf( "++ %d %x\n", i, ((uint32_t*)dmabuf)[i] );
}
pcie->userWriteWord(0,0);
pcie->userWriteWord(0,1);
/*
for ( int i = 0; i < 8; i++ ) {
printf( "r %x\n", pcie->userReadWord(0) );
printf( "w %x\n", pcie->userReadWord(4) );
pcie->userWriteWord(1*4,4+i);
}
for ( int i = 0; i < 8; i++ ) {
pcie->userWriteWord(0,i);
}
*/
pcie->userWriteWord(0,2);
pcie->userWriteWord(0,3);
//pcie->userWriteWord(0,8);
//pcie->userWriteWord(0,9);
//pcie->userWriteWord(0,10);
//pcie->userWriteWord(0,11);
sleep(1);
for ( int i = 0; i < 16; i++ ) {
pcie->userWriteWord(1*4,4+i);
pcie->userWriteWord(0,i);
}
//sleep(1);
/*
for ( int i = 0; i < 8; i++ ) {
printf( "r %x\n", pcie->userReadWord(0) );
printf( "w %x\n", pcie->userReadWord(4) );
}
*/
printf( "----\n" );
pcie->userWriteWord(1*4,4);
pcie->userWriteWord(1*4,5);
pcie->userWriteWord(1*4,6);
pcie->userWriteWord(1*4,7);
/*
for ( int i = 0; i < 8; i++ ) {
printf( "r %x\n", pcie->userReadWord(0) );
Expand Down
75 changes: 47 additions & 28 deletions src/PcieCtrl.bsv
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,9 @@ module mkPcieCtrl#(PcieImportUser user) (PcieCtrlIfc);

//FIFO#(Tuple2#(Bit#(8),Bit#(10))) readBurstQ <- mkSizedFIFO(4);
FIFO#(Tuple2#(Bit#(8),Bit#(10))) readBurstQ <- mkFIFO;
FIFO#(Tuple2#(Bit#(8),Bit#(10))) readBurst2Q <- mkFIFO;
BRAM2Port#(Bit#(8),Tuple2#(Bit#(10),Bit#(10))) tagMap <- mkBRAM2Server(defaultValue); // tag, total words,words recv
BRAM2Port#(Bit#(13), Bit#(128)) readReorder <- mkBRAM2Server(defaultValue);
BRAM2Port#(Bit#(10), Bit#(128)) readReorder <- mkBRAM2Server(defaultValue); // 7 bit tag, 3 bit burst offset (max 8 words per burst)
ScoreboardIfc#(4,Bit#(8)) readCompletionsb <- mkScoreboard;
Reg#(Bit#(8)) freeTagCnt <- mkReg(0);
FIFO#(Bit#(8)) freeReadTagQ <- mkSizedBRAMFIFO(128);
Expand All @@ -169,19 +170,25 @@ module mkPcieCtrl#(PcieImportUser user) (PcieCtrlIfc);
freeWriteTagQ.enq(freeTagCnt+128);
endrule

rule relayReadBurst;
readBurstQ.deq;
readBurst2Q.enq(readBurstQ.first);
endrule


FIFO#(DMAWordTagged) dmaReadWordQ <- mkSizedFIFO(16);
FIFO#(DMAWordTagged) dmaReadWordQ <- mkSizedBRAMFIFO(128);
FIFO#(DMAWordTagged) dmaReadWordRQ <- mkFIFO;
FIFO#(Tuple2#(Bit#(8),Bit#(10))) burstUpdReqQ <-mkFIFO;
FIFO#(Tuple2#(Bit#(8),Bit#(10))) readDoneTagQ <- mkSizedFIFO(4); //TODO
Reg#(Tuple4#(Bit#(8),Bit#(10),Bit#(10),Bit#(10))) tagWordsLeft <- mkReg(tuple4(0,0,0,0));
Reg#(Tuple5#(Bit#(8),Bit#(10),Bit#(10),Bit#(10),Bit#(10))) tagWordsLeft <- mkReg(tuple5(0,0,0,0,0));
rule updateReadBurst1 ( freeTagCnt == 128 );
let burst = readBurstQ.first;
let burst = readBurst2Q.first;
let tag = tpl_1(burst);
let words = tpl_2(burst);

if ( !readCompletionsb.search1(tag) ) begin
tagMap.portA.request.put(BRAMRequest{write:False, responseOnWrite:False, address:tag, datain:?});
readBurstQ.deq;
readBurst2Q.deq;
readCompletionsb.enq(tpl_1(burst));
burstUpdReqQ.enq(burst);
end
Expand All @@ -208,41 +215,51 @@ module mkPcieCtrl#(PcieImportUser user) (PcieCtrlIfc);
let burst = burstUpdReqQ.first;
let tag = tpl_1(burst);
let words = tpl_2(burst);
//debugCode <= debugCode + (zeroExtend(words/4)<<16) + (zeroExtend(done/4)<<8);

let newdone = done;
if ( done + words > req ) begin // read v should never be 0!
let newwords = words;
if ( done + words >= req ) begin // read v should never be 0!
newdone = req;
words = req - done;
//debugCode <= debugCode + (1<<16);
//freeReadTagFQ.enq(tag);
newwords = req - done;
end
else newdone = done + words;

if ( done != 0 ) debugCode <= debugCode + ((zeroExtend(done)/4)<<16);

else begin
newdone = done + words;
end


readCompletionsb.deq;
tagWordsLeft <= tuple4(tag,done,words,0);
//debugCode <= debugCode + (zeroExtend(words)<<16);

tagWordsLeft <= tuple5(tag,done,newwords,0,req);
debugCode <= debugCode + (zeroExtend(newwords)<<16);

tagMap.portB.request.put(BRAMRequest{write:True,responseOnWrite:False,address:tag,datain:tuple2(req,newdone)});
endrule
rule relayDmaReadrQ;
dmaReadWordQ.deq;
dmaReadWordRQ.enq(dmaReadWordQ.first);
endrule
rule writeReadBuffer (tpl_3(tagWordsLeft) > 0);
let tag = tpl_1(tagWordsLeft);
let off = tpl_2(tagWordsLeft);
let words = tpl_3(tagWordsLeft);
let ioff = tpl_4(tagWordsLeft);
if ( words <= 4 ) begin
let req = tpl_5(tagWordsLeft);


if ( words <= 4 && off+ioff+4 >= req ) begin
words = 0;
readDoneTagQ.enq(tuple2(tag, off+ioff+4));
end
else words = words - 4;

tagWordsLeft <= tuple5(tag,off,words,ioff+4, req);
dmaReadWordRQ.deq;
let word = dmaReadWordRQ.first;

tagWordsLeft <= tuple4(tag,off,words,ioff+4);
dmaReadWordQ.deq;
let word = dmaReadWordQ.first;

Bit#(13) writeoff = (zeroExtend(tag)<<5)|((zeroExtend(off)+zeroExtend(ioff))>>2);

Bit#(10) writeoff = (zeroExtend(tag)<<3)|((zeroExtend(off)+zeroExtend(ioff))>>2);
readReorder.portA.request.put(BRAMRequest{write:True,responseOnWrite:False,address:writeoff,datain:word.word});
endrule
Reg#(Tuple3#(Bit#(8),Bit#(10),Bit#(10))) readFlushTag <- mkReg(tuple3(0,0,0)); //tag, req, curword
Expand All @@ -257,11 +274,16 @@ module mkPcieCtrl#(PcieImportUser user) (PcieCtrlIfc);
let r_ = readDoneTagQ.first;
let tag = tpl_1(r_);
let words = tpl_2(r_);
Bit#(13) readoff = (zeroExtend(tag)<<5);

debugCode <= debugCode + zeroExtend(words);


Bit#(10) readoff = (zeroExtend(tag)<<3);
readReorder.portB.request.put(BRAMRequest{write:False,responseOnWrite:False,address:readoff,datain:?});
let wordsleft = 0;
if ( words > 4 ) wordsleft = words - 4;
readFlushTag <= tuple3(tag,words, wordsleft);

dmaReadOutCntUp <= dmaReadOutCntUp + 1;

freeReadTagFQ.enq(tag);
Expand All @@ -273,8 +295,10 @@ module mkPcieCtrl#(PcieImportUser user) (PcieCtrlIfc);

if ( wleft > 4 ) wordsleft = wleft - 4;

//debugCode <= debugCode + 1;

readFlushTag <= tuple3(tag,words, wordsleft);
Bit#(13) readoff = (zeroExtend(tag)<<5)|((zeroExtend(words-wleft))>>2);
Bit#(10) readoff = (zeroExtend(tag)<<3)|((zeroExtend(words-wleft))>>2);
readReorder.portB.request.put(BRAMRequest{write:False,responseOnWrite:False,address:readoff,datain:?});
dmaReadOutCntUp <= dmaReadOutCntUp + 1;
end
Expand Down Expand Up @@ -379,8 +403,6 @@ module mkPcieCtrl#(PcieImportUser user) (PcieCtrlIfc);

dmaReadWordQ.enq(DMAWordTagged{word:{data2,data1,data0,dmaReadBuffer}, tag:completionRecvTag});

debugCode <= debugCode + 1;


if ( completionRecvLength >= 4 ) begin
completionRecvLength <= completionRecvLength - 4;
Expand Down Expand Up @@ -471,7 +493,6 @@ module mkPcieCtrl#(PcieImportUser user) (PcieCtrlIfc);
completionRecvTag <= tag;
dmaReadBuffer <= reverseEndian(data);
readBurstQ.enq(tuple2(tag, length));
//debugCode <= debugCode + ((zeroExtend(length)>>2)<<16);
end
else begin
tlp2Q.enq(tlp);
Expand Down Expand Up @@ -694,8 +715,6 @@ module mkPcieCtrl#(PcieImportUser user) (PcieCtrlIfc);
//FIXME maybe this needs to be in bytes?
Bit#(10) dmaWords = req.words;

//debugCode <= debugCode + (zeroExtend(dmaWords)<<24);

Bit#(32) cdw0 = {
1'b0,
2'b00, //read
Expand Down

0 comments on commit ebce948

Please sign in to comment.