To: Timothy Liu who wrote (150881 ) 12/3/2001 9:03:45 PM From: kapkan4u Read Replies (1) | Respond to of 186894 <The difference is IMO just due to processor stalls.> You can see that the P4/P3 IPC disadvantage increased from 1.375 to 1.93 when the number of cases went from 2000 to 3000, which apparently coincides with reaching the limit of the trace cache. Only the 1.375 ratio (but not 1.93) can be explained by longer pipeline. 1000 45/32 == 1.4 2000 132/69 == 1.375 3000 199/103 == 1.93 5000 349/180 == 1.93 10000 716/369 == 1.94 Kap BTW, there was a cut and paste mangle. The correct program to generate test cases is: #include <stdio.h> int main(int argc, char* argv[]) { int i, iterations = 100000, max_index = 1000; if (argc != 3) { printf("invocation error: p4_id.exe iterations max_index\n"); printf("iterations: number of times the switch statement is executed in the loop\n"); printf("max_index: number of cases the program will generate in the switch statement\n"); return 1; } sscanf(argv[1], "%d", &iterations); sscanf(argv[2], "%d", &max_index); printf("#include <stdlib.h>\n"); printf("#include <stdio.h>\n"); printf("unsigned long x;\n"); printf("#define get_stamp __asm RDTSC __asm mov [x], eax\n"); printf("#define get_count __asm RDTSC __asm sub eax, [x] __asm mov [x], eax\n"); printf("int i, sum;\n"); printf("int main() {\n"); printf("srand( 123456 );\n"); printf("get_stamp;\n"); printf("for( i = 0; i < %d; i++ )\n",iterations); printf("switch ( rand() %% %d ){\n", max_index); for (i = 0; i < max_index; i++) printf("case %d: sum += %d;\n", i, i + 1); printf("default: sum = sum; }\n"); printf("get_count;\n"); printf("printf(\"%%d, time=%%d\\n\", sum, x);\n"); printf("return 0;\n}\n"); return 0; }