#include <stdio.h>
#include <stdlib.h>

// This program generates the ucode of M16. This ucode contains the u-program 
// that interpretes the M16 instruction set.  It is stored in a ROM
// inside the control unit (cu2 module) and stores 64 u-instructions with
// the control signals to execute the M16 instruction set.
//
// This program also generates the inst.h header file
// defining the opcodes of M16 instructions.

// Each u-instruction of the ucode ROM is 32 bit wide with the following format:
//
// bits 31-7: control signals for the M16 datapath (cpu module)
// bit 6: control signal sethi needed to implement the prefix sethi instruction
// bit 5: control signal decode. If active, the adress of the next u-instruction
//        is computed from the opcode or s1 fields in IR.
// bits 4-0: For any u-instruction not decoding IR, it is the address of the
//        u-instruction to be executed in the next cycle
//
// The map of M16 instruction implementation is:
// u-instructions 24-31: 2 operand instructions (mov, neg, cmp, call, jmp).
//   They require fields d and s2. 3 opcodes are available for extension
// u-instructions 31-59: other instructions (add, ldw, etc.).
// u-instruction 60: unsigned conditional branch instruction blu, bgeu, ...
// u-instruction 60: signed conditional branch instruction ba, be, bl, bg, ...
// u-instructions 62-63: reserved for prefix instruction sethi

// Control signals

// OPALU: define the operations computed by the ALU (alu module)
#define CSADD     (8U<<28)
#define CSSUB     (1<<28)
#define CSSHL     (2<<28)
#define CSSHR     (3<<28)
#define CSSAR     (4<<28)
#define CSAND     (5<<28)
#define CSOR      (6<<28)
#define CSXOR     (7<<28)
#define CSMUL     (11U<<28)
#define CSDIV     (13U<<28)
#define CSREM     (14U<<28)

// OPYSEL: define the ways of selecting the ALU Y operand
#define CSY0      (0<<23)
#define CSY2      (1<<23)
#define CSIDX     (2<<23)
#define CSIDXx2   (4<<23)
#define CSINST    (7<<23)

// OPDBI: define the operations needed by the data bus interface (dbi module)
#define CSLDW     (1<<19)
#define CSLDUB    (2<<19)
#define CSLDSB    (3<<19)
#define CSSTW     (5<<19)
#define CSSTB     (7<<19)

// Single bit control signals needed by the CPU datapath (cpu module)
#define CSSELPC   (1<<27)
#define CSX0      (1<<26)
#define CSSELS1   0
#define CSWRRD    (1<<18)
#define CSSWAPDS1 (1<<16)
#define CSWRPC    (1<<15)
#define CSBRANCH  (1<<14)
#define CSWRIR    (1<<13)
#define CSSELT    (1<<11)
#define CSSELD    (1<<10)
#define CSWRAR    (1<<9)
#define CSRD      (1<<8)
#define CSWR      (1<<7)

// Control signals needed internally by the control unit (cu2 module)
#define CSDECODE  (1<<5)
#define CSSETHI   (1<<6)
#define CSCLRHI   (1<<0)

// Mask for the u-address field of the u-instruction
#define UADDRMASK 0x1F
#define OPYSELMASK  (7<<23)
#define OPALUMASK   (0xf<<28)
#define OPDBIMASK   (7<<19)

#define SIZEROM 64

// A uInst is a u-instruction in the ucode ROM
typedef unsigned int uInst;

// A uAddress is an address of a u-instruction in the ucode ROM
typedef int uAddress;

// A uLabel tags the specific u-address of a u-instruction
// Needed by u-instructions that are the target of a u-jump.
typedef int uLabel;

void dumpUcode(uAddress i);

uInst ucode[SIZEROM];        // The microcode ROM content

char *opnames[SIZEROM];      // Names of M16 instructions (for inst.h)
#define OPCODEBEGIN (SIZEROM/2)
#define OPCODEEND (SIZEROM-1)

#define UNARYBEGIN (OPCODEBEGIN-8)
#define UNARYEND   (OPCODEBEGIN-1)

uAddress upc= 0; // The u-instruction address
uAddress nextInstAddr= OPCODEBEGIN+1; // opcode 0 is for 2 operand instructions
uAddress nextUnaryInstAddr= UNARYBEGIN;
uAddress spareAddress= 0; // u-address 1 to UNARYBEGIN-1

// u-address of the fetch u-instruction
#define LBFETCH 1

// Defines the M16 instruction opname (not 2 operand)
void defInst(char *opname) {
  upc= nextInstAddr++;
  if (upc>OPCODEEND) {
    fprintf(stderr, "Opcode overflow for %s\n", opname);
    exit(1);
  }
  if (opnames[upc]!=NULL) {
    fprintf(stderr, "Opcode for %s already used\n", opname);
    exit(1);
  }
  opnames[upc]= opname;
}

// Define instruction for unary operation: primary opcode is 0 and
// secondary opcode uses field for first source register s1 (bits 10:8)
void defUnaryInst(char *opname) {
  upc= nextUnaryInstAddr++;
  if (upc>=UNARYEND) {
    fprintf(stderr, "Opcode overflow for %s\n", opname);
    exit(1);
  }
  if (opnames[upc]!=NULL) {
    fprintf(stderr, "Opcode for %s already used\n", opname);
    exit(1);
  }
  opnames[upc]= opname;
}

// Add a new u-instruction at address upc.  The upc is incremented by 1.
// Parameter sig is a bit mask with control signals (bits 31-5)
// Parameter next is the address of the next u-instruction to be executed
//   (bits 4-0)
void sig(uInst sig) {
  uAddress next= sig & UADDRMASK;

  if (next==0 && !(sig & CSDECODE))
    next= upc>=UNARYBEGIN ? spareAddress : upc+1;

  if (upc<UNARYBEGIN) {
    if (upc!=spareAddress) {
      fprintf(stderr, "Fatal error: consistency check\n");
      exit(1);
    }
    spareAddress++;
  }

  if (UNARYBEGIN<=next && next<=UNARYEND) {
    fprintf(stderr, "Can't jump to a unary u-instruction\n");
    exit(1);
  }
  if (OPCODEBEGIN<=next && next<=OPCODEEND) {
    fprintf(stderr, "Can't jump to a unary u-instruction\n");
    exit(1);
  }
  if (upc>=SIZEROM) {
    fprintf(stderr, "uROM size overflow\n");
    exit(1);
  }
  if (ucode[upc]!=0) {
    fprintf(stderr, "ROM index %d already used\n", upc);
    exit(1);
  }

  ucode[upc]= sig | next;

  upc= upc>=UNARYBEGIN ? spareAddress : upc+1;
}

int main() {
  // First u-instruction with u-address 0 (upc is 0)
  // Bootstrap: address of first M16 instruction must be at ROM address 0xfffe
  sig(CSX0 | CSY2 | CSSUB | CSWRAR | CSLDW | CSRD | CSSELD | CSWRPC);

  // Fetch cycle
  sig(CSSELPC | CSY2 | CSADD | CSWRPC | CSWRAR | CSLDW | CSRD | CSWRIR);

  // Decode cycle
  // For unary instructions, next ucode address (upc) is 24 + the 3 bits in the
  // s1 field in IR
  // Otherwise, next ucode address (upc) is 32 + the 5 bit opcode field in IR
  // There is a trick here: next ucode address field in uIR is not needed,
  // so that field is used to store control signal CSCLRHI
  sig(CSDECODE | CSCLRHI);

  // Logic and arithmetic instructions (require one cycle)

  defInst("add");
  sig(CSINST | CSADD | CSWRRD | LBFETCH);

  defInst("sub");
  sig(CSINST | CSSUB | CSWRRD | LBFETCH);

  defInst("shl");
  sig(CSINST | CSSHL | CSWRRD | LBFETCH);

  defInst("shr");
  sig(CSINST | CSSHR | CSWRRD | LBFETCH);

  defInst("sar");
  sig(CSINST | CSSAR | CSWRRD | LBFETCH);

  defInst("and");
  sig(CSINST | CSAND | CSWRRD | LBFETCH);

  defInst("or");
  sig(CSINST | CSOR | CSWRRD | LBFETCH);

  defInst("xor");
  sig(CSINST | CSXOR | CSWRRD | LBFETCH);

  defInst("mul");
  sig(CSINST | CSMUL | CSWRRD | LBFETCH);

  defInst("div");
  sig(CSINST | CSDIV | CSWRRD | LBFETCH);

  defInst("rem");
  sig(CSINST | CSREM | CSWRRD | LBFETCH);

  // Load from memory (require one cycle)
  // This is unrealistic but works for M16, making it faster in logisim
  defInst("ldw");
  sig(CSIDXx2 | CSADD | CSWRAR | CSRD | CSLDW | CSSELD | CSWRRD | LBFETCH);

  defInst("ldub");
  sig(CSIDX | CSADD | CSWRAR | CSRD | CSLDUB | CSSELD | CSWRRD | LBFETCH);

  defInst("ldsb");
  sig(CSIDX | CSADD | CSWRAR | CSRD | CSLDSB | CSSELD | CSWRRD | LBFETCH);

  // Store in memory (require 2 cycles)
  defInst("stw");
  sig(CSIDXx2 | CSADD | CSWRAR);
  sig(CSSWAPDS1 | CSY0 | CSOR | CSWR | CSSTW | LBFETCH);

  defInst("stb");
  sig(CSIDX | CSADD | CSWRAR);
  sig(CSSWAPDS1 | CSY0 | CSOR | CSWR | CSSTB | LBFETCH);

  // Add here experimental binary instructions

  defInst("swap");
  sig(CSIDXx2 | CSADD | CSWRAR | CSRD | CSLDW | CSSELD | CSSELT | CSWRPC);
  sig(CSSWAPDS1 | CSY0 | CSOR | CSWR | CSSTW);
  sig(CSSELPC | CSSELT | CSY0 | CSOR | CSWRRD | LBFETCH);

  // defInst("blabla");
  // sig( ... );
  // ...
  // sig( ... | LBFETCH); Last u-instruction must jump to fetch cycle!

  defInst("ldwpp");
  sig(CSY0 | CSOR | CSWRAR | CSRD | CSLDW | CSSELD | CSWRRD);
  sig(CSIDXx2 | CSADD | CSWRPC | CSSELT );
  sig(CSSWAPDS1 | CSSELT | CSSELPC | CSY0 | CSOR | CSWRRD | LBFETCH);

  // Instructions for unary operations

  defUnaryInst("mov"); // One cycle
  sig(CSX0 | CSINST | CSOR | CSWRRD | LBFETCH);

  defUnaryInst("neg"); // One cycle
  sig(CSX0 | CSINST | CSSUB | CSWRRD | LBFETCH);

  defUnaryInst("jmpl"); // This is a call to a function pointer, 2 cycles
  sig(CSSELPC | CSY0 | CSOR | CSWRRD);
  sig(CSX0 | CSINST | CSOR | CSWRPC | LBFETCH);

  defUnaryInst("callr"); // callr fun, R0 is call instruction, 2 cycles
  sig(CSSELPC | CSY2 | CSADD | CSWRRD | CSWRAR);
  sig(CSLDW | CSRD | CSSELD | CSWRPC | LBFETCH);

  defUnaryInst("jmpr"); // jmpr R0 is ret instruction, 1 cycle
  sig(CSX0 | CSINST | CSOR | CSWRPC | LBFETCH);

  // Add here experimental unary instructions

  // defUnaryInst("blabla");
  // sig( ... );
  // ...
  // sig( ... | LBFETCH); Last u-instruction must jump to fetch cycle!

  // =====================================
  // Beware: do not modify following lines
  // =====================================
  // These instructions need be placed in special opcodes

  nextInstAddr= OPCODEEND-3; // 60 (for opcode 28)
  // Conditional branch instructions: branch must be pair, branchs even
  defInst("branchu"); // opcode 28:
  sig(CSINST | CSSUB);
  uLabel LBBRANCH2= upc;
  sig(CSSELPC | CSY2 | CSADD | CSWRAR | CSLDW | CSRD | CSBRANCH | CSWRPC |
      LBFETCH);

  defInst("branchs"); // opcode 29
  sig(CSINST | CSSUB | LBBRANCH2);
  
  // sethi must be opcodes pair and sethi_dummy even
  // To ensure it they use last 2 opcodes: 62 and 63
  // These instructions just set the HI register in the control unit.
  // As to set HI only CSSETHI is needed, the remaining signal are used
  // to fetch the next instruction
  defInst("sethi"); // opcode 30
  sig(CSSETHI | CSSELPC | CSY2 | CSADD | CSWRPC | CSWRAR | CSLDW | CSRD |
      CSWRIR);
  uLabel LBSETHI2= upc; // Second cycle for sethi: just decode instruction
  sig(CSDECODE);        // Same as decode cycle but without clearing hi register

  defInst("sethi_dummy"); // opcode 31
  sig(CSSETHI | CSSELPC | CSY2 | CSADD | CSWRPC | CSWRAR | CSLDW | CSRD |
      CSWRIR | LBSETHI2);

  // Write opcodes
  FILE *ucodeROM= fopen("ucode.rom", "w");
  if (ucodeROM==NULL) {
    perror("ucode.rom");
    exit(1);
  }

  printf("Content of ucode.rom.  Each line corresponds to a u-instruction.\n");
  printf("The fields in the listing are:\n");
  printf("  <u-address>:\n");
  printf("  <u-instruction content>\n");
  printf("  (<address of next u-instruction to be executed>)\n");
  printf("  <control signals>\n");
  printf("  / Inst:<name of implemented instruction> (opt. field)\n");
  printf("(all numbers are in hexadecimal notation)\n\n");
  fprintf(ucodeROM,"v2.0 raw\n");
  for (int i= 0; i<SIZEROM; i++) {
    dumpUcode(i);
    fprintf(ucodeROM,"%x\n", ucode[i]);
  }
  fclose(ucodeROM);

  FILE *instHdr= fopen("inst.h", "w");
  if (instHdr==NULL) {
    perror("inst.h");
    exit(1);
  }

  for (int i= OPCODEBEGIN; i<=OPCODEEND; i++) {
    if (opnames[i]!=NULL)
      fprintf(instHdr, "#define OP%s %d\n", opnames[i], i-OPCODEBEGIN);
  }

  for (int i= UNARYBEGIN; i<=UNARYEND; i++) {
    if (opnames[i]!=NULL)
      fprintf(instHdr, "#define UNARY%s %d\n", opnames[i], i-UNARYBEGIN);
  }

  fclose(instHdr);

  return 0;
}

void prtCs(char *cs) {
  printf(" %s", cs);
}

void dumpUcode(uAddress i) {
  uInst sig= ucode[i];
  if (sig==0)
    return;

  printf("%02x: %08x (%02x)", i, sig, sig & UADDRMASK); 

  uInst opysel= OPYSELMASK & sig;
  uInst opalu= OPALUMASK & sig;

  if (CSSELPC & sig) prtCs("selpc");
  else if (CSX0 & sig) prtCs("x0");
  else if (opalu!=0) prtCs("selrs1");

  switch (opysel) {
    case CSY0: if (opalu!=0) prtCs("y0"); break;
    case CSY2: prtCs("y2"); break;
    case CSIDX: prtCs("idx"); break;
    case CSIDXx2: prtCs("idx*2"); break;
    case CSINST: prtCs("inst"); break;
  }

  switch(opalu) {
    case CSADD: prtCs("add"); break;
    case CSSUB: prtCs("sub"); break;
    case CSMUL: prtCs("mul"); break;
    case CSDIV: prtCs("div"); break;
    case CSREM: prtCs("rem"); break;
    case CSSHL: prtCs("shl"); break;
    case CSSHR: prtCs("shr"); break;
    case CSSAR: prtCs("sar"); break;
    case CSAND: prtCs("and"); break;
    case CSOR: prtCs("or"); break;
    case CSXOR: prtCs("xor"); break;
  }

  uInst opdbi= OPDBIMASK & sig;
  switch (opdbi) {
    case CSLDW: prtCs("ldw"); break;
    case CSLDUB: prtCs("ldub"); break;
    case CSLDSB: prtCs("ldsb"); break;
    case CSSTW: prtCs("stw"); break;
    case CSSTB: prtCs("stb"); break;
  }

  if (CSWRRD & sig) prtCs("wrrd");
  if (CSWRPC & sig) prtCs("wrpc");
  if (CSWRIR & sig) prtCs("wrir");
  if (CSWRAR & sig) prtCs("wrar");
  if (CSSELD & sig) prtCs("seld");
  if (CSSWAPDS1 & sig) prtCs("swapds1");
  if (CSBRANCH & sig) prtCs("branch");
  if (CSRD & sig) prtCs("RD");
  if (CSWR & sig) prtCs("WR");
  if (CSDECODE & sig) prtCs("dec");
  if (CSSETHI & sig) prtCs("sethi");
  if ((CSDECODE & sig) && (CSCLRHI & sig)) prtCs("clrhi");

  if (opnames[i]!=NULL)
    printf(" / Inst: %s", opnames[i]);
  printf("\n");
}
