CPU, die Fibonacci Reihe berechnet

Die im folgende beschriebene CPU ist so programmiert, dass sie eine Fibonacci-Reihe berechnet.

Die CPU verfügt über folgende Register:

reg [7:0] 	DATA_ADR;	// Daten-ROM Adressregister
reg [7:0] 	PROG_ADR;	// Programm-ROM Adressregister (PC)
reg [15:0]      CMD;		// Befehlsregister
reg [31:0]      ARG1;		// Argument 1
reg [15:0]      ARG2;		// Argument 2
reg [31:0] 	RESULT;		// Resultat (Akkumulator)
reg [15:0]	AUX1;		// Hilfsregister 1-4
reg [15:0]	AUX2;
reg [15:0]	AUX3;
reg [15:0]	AUX4;

Ein Steuerregister kontrolliert die sechsstufige Abarbeitung der Befehle:

reg [5:0]   STATE;
always @(posedge SYS_CLOCK)
	begin
		case (STATE)
			6'b000000:	STATE <= 6'b000001;
			// 1. Load program counter
			6'b000001:
...			// 2. Load command from program memory
			6'b000010:
...			// 3. Load Argument 1
			6'b000100:
...			// 4. Load Argument 2
			6'b001000:
...			// 5. Load Result
			6'b010000:
...			// 6. Store Result
			6'b100000:
		endcase
	end

Ein Assembler Befehl besteht aus 4 Nibbles und ist wie folgt strukturiert:

[Argument 1] [Argument 2] [Operator] [Ziel]

Folgende Werte sind hier möglich:

Wert  Arg.1/2   Ziel      Operator
0     RESULTAT  -         NOP (tu nichts)
1     DATA_IN   -         ADD (+, Addition)
2     DATA_ADR  DATA_ADR  SUB (-, Subtraktion)
3     PROG_ADR  PROG_ADR  MUL (*, Multiplikation)
4     AUX1      AUX1      DIV (/, Division)
5     AUX2      AUX2      LEFTSHIFT
6     AUX3      AUX3      RIGHTSHIFT
7     AUX4      AUX4      -

Das Programm für die Fibonacci-Reihe sieht wie folgt aus:

Adr Bytecode  Mnemonic                     Kommentar
1   0114      RESULTAT + DATA_IN => AUX1   AUX1 = 1
2   4516      AUX1 + AUX2 => AUX3
3   5614      AUX2 + AUX3 => AUX1
4   6415      AUX3 + AUX1 => AUX2
5   7113      AUX4 + DATA_IN => PROG_ADR   Spring zu Adr 2

Das Programm liefert folgende Resultate, die ich mit Zahlen aus einer Tabellenkalkulation verglichen habe:

OO Calc      CPU
 1     0001
 1     0001    1
 2     0002    2
 3     0003    3
 5     0005    5
 8     0008    8
 13    000D    D
 21    0015   15
 34    0022   22
 55    0037   37
 89    0059   59
 144   0090   90
 233   00E9   E9
 377   0179  179
 610   0262  262
 987   03DB  3DB
 1597  063D  63D
 2584  0A18  A18
 4181  1055 1055
 6765  1A6D 1A6D
 10946 2AC2 2AC2

Quartus Screenshot:

Das Projekt zum download: ex2 _fibonacci.zip

EX2_CPU.v:

//
//  CPU Core
//
module EX2_CPU
(SYS_CLOCK, DATA_IN, PROG_IN,
DATA_ADR, PROG_ADR, CMD, ARG1, ARG2, RESULT, AUX1, AUX2, AUX3, AUX4, STATE );
//
input	SYS_CLOCK;
input	DATA_IN;
input	PROG_IN;
//
reg [7:0] 	DATA_ADR;
reg [7:0] 	PROG_ADR;
reg [15:0]  CMD;
reg [31:0]  ARG1;
reg [15:0]  ARG2;
reg [31:0] 	RESULT;
reg [15:0]	AUX1;
reg [15:0]	AUX2;
reg [15:0]	AUX3;
reg [15:0]	AUX4;
//
reg [5:0]   STATE;
//
output [7:0] 	DATA_ADR;
output [7:0] 	PROG_ADR;
output [15:0] 	CMD;
output [31:0] 	ARG1;
output [15:0] 	ARG2;
output [31:0] 	RESULT;
output [15:0] 	AUX1;
output [15:0] 	AUX2;
output [15:0] 	AUX3;
output [15:0] 	AUX4;
output [5:0]    STATE;
//
wire [15:0] DATA_IN;
wire [15:0] PROG_IN;
//
always @(posedge SYS_CLOCK)
	begin
		case (STATE)
			6'b000000:	STATE <= 6'b000001;
			// 1. Load program counter
			6'b000001:
					begin
						STATE <= 6'b000010;
						PROG_ADR <= PROG_ADR+1;
					end
			// 2. Load command from program memory
			6'b000010:
					begin
						STATE <= 6'b000100;
						CMD <= PROG_IN;
					end
			// 3. Load Argument 1
			6'b000100:
					begin
						STATE <= 6'b001000;
						case (CMD[15:12])
							4'b0000:	ARG1 <= RESULT;
							4'b0001:	ARG1 <= DATA_IN;
							4'b0010:	ARG1 <= DATA_ADR;
							4'b0011:	ARG1 <= PROG_ADR;
							4'b0100:	ARG1 <= AUX1;
							4'b0101:	ARG1 <= AUX2;
							4'b0110:	ARG1 <= AUX3;
							4'b0111:	ARG1 <= AUX4;
						endcase
					end
			// 4. Load Argument 2
			6'b001000:
					begin
						STATE <= 6'b010000;
						case (CMD[11:8])
							4'b0000:	ARG2 <= RESULT;
							4'b0001:	ARG2 <= DATA_IN;
							4'b0010:	ARG2 <= DATA_ADR;
							4'b0011:	ARG2 <= PROG_ADR;
							4'b0100:	ARG2 <= AUX1;
							4'b0101:	ARG2 <= AUX2;
							4'b0110:	ARG2 <= AUX3;
							4'b0111:	ARG2 <= AUX4;
						endcase
					end
			// 5. Load Result
			6'b010000:
					begin
						STATE <= 6'b100000;
						case (CMD[7:4])
							4'b0001:	RESULT <= ARG1+ARG2;   // ADD
							4'b0010:	RESULT <= ARG1-ARG2;   // SUB
							4'b0011:	RESULT <= ARG1*ARG2;   // MUL
							4'b0100:	RESULT <= ARG1/ARG2;   // DIV
							4'b0101:	RESULT <= ARG1<<ARG2;  // left shift
							4'b0110:	RESULT <= ARG1>>ARG2;  // right shift
						endcase
					end
			// 6. Store Result
			6'b100000:
					begin
						STATE <= 6'b000001;
						case (CMD[3:0])
							4'b0010:	DATA_ADR <= RESULT[7:0];
							4'b0011:	PROG_ADR <= RESULT[7:0];
							4'b0100:	AUX1 <= RESULT[15:0];
							4'b0101:	AUX2 <= RESULT[15:0];
							4'b0110:	AUX3 <= RESULT[15:0];
							4'b0111:	AUX4 <= RESULT[15:0];
						endcase
					end
		endcase
	end
endmodule

12 Cores in amd64 Linux 3.0

vmk@vmk-12cc:~/makehuman/render/renderman_output/ribFiles$ cat /proc/cpuinfo
processor	: 0
vendor_id	: GenuineIntel
cpu family	: 6
model		: 44
model name	: Intel(R) Xeon(R) CPU           E5645  @ 2.40GHz
stepping	: 2
cpu MHz		: 1600.000
cache size	: 12288 KB
physical id	: 0
siblings	: 12
core id		: 0
cpu cores	: 6
apicid		: 0
initial apicid	: 0
fpu		: yes
fpu_exception	: yes
cpuid level	: 11
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm dca sse4_1 sse4_2 popcnt aes lahf_lm ida arat epb dts tpr_shadow vnmi flexpriority ept vpid
bogomips	: 4800.21
clflush size	: 64
cache_alignment	: 64
address sizes	: 40 bits physical, 48 bits virtual
power management:
 Continue reading 

Einführung VERILOG

Projekt IR eines HARVARD-Mikroprozessors

             Clock, Synchronisation

Programm  <-> uP <-> Daten

            State Machine 

            Vektor Tabelle
  1. Synchronisierter Reset initialisiert State Machine
  2. StateMachine holt Reset Vektor aus Tabelle (Fix festgelegt, kann z.T. verschoben werden)
  3. Befehl an Reset Vektor kommt in Pipeline, Prefetch Register
Was ist Verilog:
VHDL vs. Verilog: verschiedene Sprachansätze:
Verilog: up to date, letzte Version von 2007, entstand aus C und VHDL, hat Funktionen, die in VHDL nicht vorhanden sind.
Schreibweise unterschiedlich, kein Deckungsgrad da Befehle unterschiedlich
Internal Error: Sub-system: AMERGE, File: /quartus/atm/amerge/amerge_kpt_op.cpp, Line: 220
cmp_merge_kpt_db
Stack Trace:
0x3DD57   : amerge_mini_merge + 0x3A977 (atm_amerge)

End-trace

Quartus II Version 9.0 Build 235 06/17/2009 SJ Web Edition
Service Pack Installed:  2
Internal Error: Sub-system: AMERGE, File: /quartus/atm/amerge/amerge_kpt_op.cpp, Line: 220
cmp_merge_kpt_db
Stack Trace:
0x3DD57   : amerge_mini_merge + 0x3A977 (atm_amerge)
0x30D5    : cfg_force_qexe_mode_off + 0x1E15 (ccl_cfg_ini)
0x3D4F2   : amerge_mini_merge + 0x3A112 (atm_amerge)
0x444DD   : amerge_mini_merge + 0x410FD (atm_amerge)

0x33F1    : amerge_mini_merge + 0x11 (atm_amerge)

0x4B0D    : MEM_SEGMENT_INTERNAL::splay_heap + 0x14D (ccl_mem)
0xA128    : mem_realloc_wrapper + 0x188 (ccl_mem)

0x94F2    : MEM_SEGMENT_INTERNAL::locked_allocate + 0x62 (ccl_mem)
0x2931    : RDB_WAVEFORM_OBJECT::RDB_WAVEFORM_OBJECT + 0x211 (db_rdb)
0x20A6    : QCU_ACF_SETTING::~QCU_ACF_SETTING + 0x126 (comp_qcu)
0x5A66    : QCU_ACF_SETTING::operator= + 0x3366 (comp_qcu)

0xCE32    : qexe_get_command_line + 0x3A2 (comp_qexe)
0x38E3    : QCU_ACF_SETTING::operator= + 0x11E3 (comp_qcu)
0x7C29    : QCU_FRAMEWORK::check_license + 0x139 (comp_qcu)
0xFBBD    : qexe_process_cmdline_arguments + 0x34D (comp_qexe)

End-trace

Quartus II Version 9.0 Build 235 06/17/2009 SJ Web Edition
Service Pack Installed:  2