ECDL2K/MMX on FreeBSD

From: Jacques Garrigue (garrigue@pauillac.inria.fr)
Date: Thu Dec 09 1999 - 15:11:43 MET


If you are running ECDL2K with MMX code on FreeBSD,
you may have noticed that the speed depends on the length of the
command line...

This seems to be due to a lack of double-word alignment in the
stack. Here is a patch ecdl2k version 1.0.1, which declares all mmx64
variables to be static, so that they will appear in the data segment,
and be correctly aligned. It improves speed by about 5% (lucky case,
most data was already aligned) to 50% (bad case, essential data was
not aligned).

By the way, this does not hurt on other architectures, so even if you
are not using FreeBSD you may give it a try.

Regards,

Jacques
------------------------------------------------------
Jacques Garrigue, visiting INRIA from Kyoto University
                          Jacques.Garrigue at inria.fr
------------------------------------------------------
*** ecdl2K-108.32bit.c.orig Tue Dec 7 15:55:44 1999
--- ecdl2K-108.32bit.c Thu Dec 9 15:02:44 1999
***************
*** 1434,1440 ****
  
    uint i;
    u32 s, t;
! mmx64 yhi, ylo;
    const mmx128 *p;
  
    p = &matrix[0];
--- 1434,1440 ----
  
    uint i;
    u32 s, t;
! static mmx64 yhi, ylo;
    const mmx128 *p;
  
    p = &matrix[0];
***************
*** 1897,1903 ****
  
    /* Two at a time. */
    for ( ; n >= 2; n -= 2) {
! mmx64 yh, yl;
  
      LOD(mm_T, tab4[xt>> 8 /* & 31 */ ]);
      LOD(mm_YH, tab4[xh>>24]);
--- 1897,1903 ----
  
    /* Two at a time. */
    for ( ; n >= 2; n -= 2) {
! static mmx64 yh, yl;
  
      LOD(mm_T, tab4[xt>> 8 /* & 31 */ ]);
      LOD(mm_YH, tab4[xh>>24]);
***************
*** 1961,1967 ****
  
    /* Last one (if n was odd). */
    if (n) {
! mmx64 yh,yl;
  
      LOD(mm_U, tab2[xt>> 8 /* & 31 */ ]);
      LOD(mm_T, tab2[xh>>24]);
--- 1961,1967 ----
  
    /* Last one (if n was odd). */
    if (n) {
! static mmx64 yh,yl;
  
      LOD(mm_U, tab2[xt>> 8 /* & 31 */ ]);
      LOD(mm_T, tab2[xh>>24]);
***************
*** 2276,2282 ****
    ( u32 xh, u32 xl, const mmx64 *py, mmx64 *ph, mmx64 *pl
    ) {
    u32 w;
! mmx64 tab[16];
  
    /* Note: No START_MMX() here. It is done at start of product(). */
  
--- 2276,2282 ----
    ( u32 xh, u32 xl, const mmx64 *py, mmx64 *ph, mmx64 *pl
    ) {
    u32 w;
! static mmx64 tab[16];
  
    /* Note: No START_MMX() here. It is done at start of product(). */
  
***************
*** 2620,2632 ****
  /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */
  #ifdef MMX
  
! mmx64 hh,hl, lh,ll, mh,ml;
  
    START_MMX();
  
    { const u32 mask = (1UL<<24)-1;
      u32 xt,xh, yt,yh;
! mmx64 t;
  
      xt = x.t<<8 | x.h>>24; xh = x.h<<8 | x.m>>24;
      yt = y.t<<8 | y.h>>24; yh = y.h<<8 | y.m>>24;
--- 2620,2632 ----
  /*# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # */
  #ifdef MMX
  
! static mmx64 hh,hl, lh,ll, mh,ml;
  
    START_MMX();
  
    { const u32 mask = (1UL<<24)-1;
      u32 xt,xh, yt,yh;
! static mmx64 t;
  
      xt = x.t<<8 | x.h>>24; xh = x.h<<8 | x.m>>24;
      yt = y.t<<8 | y.h>>24; yh = y.h<<8 | y.m>>24;
***************
*** 2691,2697 ****
    SLL(mm_TMP, 1); XOR(mm_T0, mm_TMP);
    SLL(mm_TMP, 7); XOR(mm_T0, mm_TMP);
  
! { mmx64 rh, rl;
      poly128 r;
  
      STR(rh, mm_T1); STR(rl, mm_T0);
--- 2691,2697 ----
    SLL(mm_TMP, 1); XOR(mm_T0, mm_TMP);
    SLL(mm_TMP, 7); XOR(mm_T0, mm_TMP);
  
! { static mmx64 rh, rl;
      poly128 r;
  
      STR(rh, mm_T1); STR(rl, mm_T0);



This archive was generated by hypermail 2b29 : Sat Jan 01 2000 - 15:26:57 MET