D issues are now tracked on GitHub. This Bugzilla instance remains as a read-only archive.
Issue 4447 - order of functions greatly affects execution time
Summary: order of functions greatly affects execution time
Status: RESOLVED INVALID
Alias: None
Product: D
Classification: Unclassified
Component: dmd (show other issues)
Version: D2
Hardware: Other Linux
: P3 normal
Assignee: No Owner
URL:
Keywords: bootcamp, performance
Depends on:
Blocks:
 
Reported: 2010-07-11 09:03 UTC by Brad Roberts
Modified: 2023-01-30 15:36 UTC (History)
3 users (show)

See Also:


Attachments

Note You need to log in before you can comment on or make changes to this issue.
Description Brad Roberts 2010-07-11 09:03:17 UTC
(split from bug 859)

=========== code ============
extern(C) int printf(const char*, ...);
extern(C) uint sleep(uint);

struct vector3 {
  float x;
  float y;
  float z;
}

class Timer {
  static long getTime() {
    asm {
      naked;
      rdtsc;
      ret;
    }
  }
  
  long starttime;
  string label;

  this() {
    starttime = getTime();
  }
  this(string label) {
    starttime = getTime();
    this.label = label;
  }
  ~this() {
    long endTime = getTime();
    if (label !is null) {
      printf("%*s ", label);
    }
    printf("time: %ld\n", endTime - starttime);
  }
}

float DOT(ref vector3 A, ref vector3 B) {
  return A.x * B.x + A.y * B.y + A.z * B.z;
}

void fooCompiler(ref vector3 a, ref vector3 b)
{
  scope Timer t = new Timer();

  float d = DOT(a, b);

  printf("compiler: d = %f, %p    ", d, &d);
}

void fooManual(ref vector3 a, ref vector3 b)
{
  scope Timer t = new Timer();

  float d = a.x * b.x + a.y * b.y + a.z * b.z;

  printf("manual:   d = %f, %p    ", d, &d);
}

void main() {
  vector3 a = { 1, 2, 3 };
  vector3 b = { 4, 5, 6 };

  version(warm)
  {{
    scope t = new Timer();
    float d = 1.0;
    printf("float rewarm: %f\n", d);
  }}

  version(one)
  {
    fooManual(a, b);
    fooCompiler(a, b);
  }
  version(two)
  {
    fooCompiler(a, b);
    fooManual(a, b);
  }
}
=====================

$ dmd -inline -O -release -version=one odd.d 
$ ./odd
manual:   d = 32.000000, 0xbf96e2e0    time: 218169
compiler: d = 32.000000, 0xbf96e2e0    time: 11543

$ dmd -inline -O -release -version=two odd.d 
$ ./odd
compiler: d = 32.000000, 0xbf8e98e0    time: 217847
manual:   d = 32.000000, 0xbf8e98e0    time: 11452

$ dmd -inline -O -release -version=one -version=warm odd.d 
$ ./odd
float rewarm: 1.000000
time: 227647
manual:   d = 32.000000, 0xbf9e86b0    time: 27762
compiler: d = 32.000000, 0xbf9e86b0    time: 8316

$ dmd -inline -O -release -version=two -version=warm odd.d 
$ ./odd 
float rewarm: 1.000000
time: 229782
compiler: d = 32.000000, 0xbf9ed650    time: 27664
manual:   d = 32.000000, 0xbf9ed650    time: 7987
Comment 1 Heywood Floyd 2010-07-12 08:58:39 UTC
// - - 8< - -
  version(one)
  {
    fooManual(a, b);
    fooCompiler(a, b);
    fooManual(a, b); 
    fooCompiler(a, b);
    fooManual(a, b); 
    fooCompiler(a, b);
 }
 version(two)
  {
    fooCompiler(a, b);
    fooManual(a, b); 
    fooCompiler(a, b);
    fooManual(a, b); 
    fooCompiler(a, b);
    fooManual(a, b);
 }
// - - 8< - -

$ dmd -inline -O -release -version=one -run odd
manual:   d = 32.000000, BFFFF578    time: 126120
compiler: d = 32.000000, BFFFF578    time: 8200
manual:   d = 32.000000, BFFFF578    time: 5920
compiler: d = 32.000000, BFFFF578    time: 5960
manual:   d = 32.000000, BFFFF578    time: 5690
compiler: d = 32.000000, BFFFF578    time: 5620
$ dmd -inline -O -release -version=two -run odd
compiler: d = 32.000000, BFFFF578    time: 130200
manual:   d = 32.000000, BFFFF578    time: 8140
compiler: d = 32.000000, BFFFF578    time: 5800
manual:   d = 32.000000, BFFFF578    time: 5730
compiler: d = 32.000000, BFFFF578    time: 5590
manual:   d = 32.000000, BFFFF578    time: 5600
$ _

I can't see that the order of the functions have any impact, rather, the first couple of 100-thousand CPU ticks the program is not at 100% of its potential performance. This could be anything, maybe the OS must set some things up for the first write to stdout, or, the D runtime must do some things, like expanding the heap for the first allocation etc? I've no idea. Just guessing.