ECDL2K/MMX on FreeBSD

From: Jacques Garrigue (garrigue@pauillac.inria.fr)
Date: Thu Dec 09 1999 - 15:11:43 MET

Next message: Rene Marc Dolhen: "Compilation problem on IRIX 6.5"
Previous message: Alan Schmitt: "Re: Speed..."
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

If you are running ECDL2K with MMX code on FreeBSD,
you may have noticed that the speed depends on the length of the
command line...

This seems to be due to a lack of double-word alignment in the
stack. Here is a patch ecdl2k version 1.0.1, which declares all mmx64
variables to be static, so that they will appear in the data segment,
and be correctly aligned. It improves speed by about 5% (lucky case,
most data was already aligned) to 50% (bad case, essential data was
not aligned).

By the way, this does not hurt on other architectures, so even if you
are not using FreeBSD you may give it a try.

Regards,

Jacques
------------------------------------------------------
Jacques Garrigue, visiting INRIA from Kyoto University
                          Jacques.Garrigue at inria.fr
------------------------------------------------------
*** ecdl2K-108.32bit.c.orig Tue Dec 7 15:55:44 1999
--- ecdl2K-108.32bit.c Thu Dec 9 15:02:44 1999
***************
*** 1434,1440 ****

    uint i;
    u32 s, t;
! mmx64 yhi, ylo;
    const mmx128 *p;

    p = &matrix[0];
--- 1434,1440 ----

    uint i;
    u32 s, t;
! static mmx64 yhi, ylo;
    const mmx128 *p;

    p = &matrix[0];
***************
*** 1897,1903 ****

    /* Two at a time. */
    for ( ; n >= 2; n -= 2) {
! mmx64 yh, yl;

      LOD(mm_T, tab4[xt>> 8 /* & 31 */ ]);
      LOD(mm_YH, tab4[xh>>24]);
--- 1897,1903 ----

    /* Two at a time. */
    for ( ; n >= 2; n -= 2) {
! static mmx64 yh, yl;

      LOD(mm_T, tab4[xt>> 8 /* & 31 */ ]);
      LOD(mm_YH, tab4[xh>>24]);
***************
*** 1961,1967 ****

    /* Last one (if n was odd). */
    if (n) {
! mmx64 yh,yl;

      LOD(mm_U, tab2[xt>> 8 /* & 31 */ ]);
      LOD(mm_T, tab2[xh>>24]);
--- 1961,1967 ----

    /* Last one (if n was odd). */
    if (n) {
! static mmx64 yh,yl;

      LOD(mm_U, tab2[xt>> 8 /* & 31 */ ]);
      LOD(mm_T, tab2[xh>>24]);
***************
*** 2276,2282 ****
    ( u32 xh, u32 xl, const mmx64 *py, mmx64 *ph, mmx64 *pl
    ) {
    u32 w;
! mmx64 tab[16];

    /* Note: No START_MMX() here. It is done at start of product(). */

--- 2276,2282 ----
    ( u32 xh, u32 xl, const mmx64 *py, mmx64 *ph, mmx64 *pl
    ) {
    u32 w;
! static mmx64 tab[16];

    /* Note: No START_MMX() here. It is done at start of product(). */

***************
*** 2620,2632 ****
  /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */
  #ifdef MMX

! mmx64 hh,hl, lh,ll, mh,ml;

    START_MMX();

    { const u32 mask = (1UL<<24)-1;
      u32 xt,xh, yt,yh;
! mmx64 t;

      xt = x.t<<8 | x.h>>24; xh = x.h<<8 | x.m>>24;
      yt = y.t<<8 | y.h>>24; yh = y.h<<8 | y.m>>24;
--- 2620,2632 ----
  /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */
  #ifdef MMX

! static mmx64 hh,hl, lh,ll, mh,ml;

    START_MMX();

    { const u32 mask = (1UL<<24)-1;
      u32 xt,xh, yt,yh;
! static mmx64 t;

      xt = x.t<<8 | x.h>>24; xh = x.h<<8 | x.m>>24;
      yt = y.t<<8 | y.h>>24; yh = y.h<<8 | y.m>>24;
***************
*** 2691,2697 ****
    SLL(mm_TMP, 1); XOR(mm_T0, mm_TMP);
    SLL(mm_TMP, 7); XOR(mm_T0, mm_TMP);

! { mmx64 rh, rl;
      poly128 r;

      STR(rh, mm_T1); STR(rl, mm_T0);
--- 2691,2697 ----
    SLL(mm_TMP, 1); XOR(mm_T0, mm_TMP);
    SLL(mm_TMP, 7); XOR(mm_T0, mm_TMP);

! { static mmx64 rh, rl;
      poly128 r;

      STR(rh, mm_T1); STR(rl, mm_T0);

Next message: Rene Marc Dolhen: "Compilation problem on IRIX 6.5"
Previous message: Alan Schmitt: "Re: Speed..."
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This archive was generated by hypermail 2b29 : Sat Jan 01 2000 - 15:26:57 MET