1
0
forked from ports/contrib
contrib/openblas/openblas-0.3.21-clang16.patch

582 lines
17 KiB
Diff

https://github.com/xianyi/OpenBLAS/commit/f703846ad9400a8ea175cb8dd43e18c152aeab93
https://github.com/xianyi/OpenBLAS/commit/515cf269291bec0d43651fe7bf99a71fb074a0ad
https://github.com/xianyi/OpenBLAS/commit/91110f92d218492d0efbdc1fdf34277ca45f4b36
https://github.com/xianyi/OpenBLAS/commit/9402df5604e69f86f58953e3883f33f98c930baf
https://github.com/xianyi/OpenBLAS/commit/101a2c77c3f3610933f450cefca3e312edab2186
https://src.fedoraproject.org/rpms/openblas/c/5f27d51cebe1c1bb6598d38326ece8dc0ac71ec7?branch=rawhide
From f703846ad9400a8ea175cb8dd43e18c152aeab93 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Sat, 13 Aug 2022 11:38:27 +0200
Subject: [PATCH] Add function prototypes
--- a/exports/gensymbol
+++ b/exports/gensymbol
@@ -4000,6 +4000,22 @@ case "$p1" in
no_underscore_objs="$no_underscore_objs $misc_common_objs"
printf 'int main(void){\n'
+ for obj in $underscore_objs; do
+ [ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \
+ "$symbolprefix" "$obj" "$bu" "$symbolsuffix"
+ done
+
+ for obj in $need_2underscore_objs; do
+ printf 'extern void %s%s%s%s%s();\n' \
+ "$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix"
+ done
+
+ for obj in $no_underscore_objs; do
+ printf 'extern void %s%s%s();\n' \
+ "$symbolprefix" "$obj" "$symbolsuffix"
+ done
+
+ printf '\n'
for obj in $underscore_objs; do
[ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \
"$symbolprefix" "$obj" "$bu" "$symbolsuffix"
--- a/exports/gensymbol.pl
+++ b/exports/gensymbol.pl
@@ -3955,6 +3955,18 @@
@no_underscore_objs = (@no_underscore_objs, @misc_common_objs);
print "int main(void){\n";
+ foreach $objs (@underscore_objs) {
+ print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
+ }
+
+ foreach $objs (@need_2underscore_objs) {
+ print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n";
+ }
+
+ foreach $objs (@no_underscore_objs) {
+ print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n";
+ }
+
foreach $objs (@underscore_objs) {
print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla";
}
From 515cf269291bec0d43651fe7bf99a71fb074a0ad Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 14 Sep 2022 11:48:36 +0200
Subject: [PATCH] Fix pointer/integer argument mismatch in calls to pow()
--- a/lapack-netlib/SRC/claed0.c
+++ b/lapack-netlib/SRC/claed0.c
@@ -796,10 +796,10 @@ f"> */
temp = log((real) (*n)) / log(2.f);
lgn = (integer) temp;
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
iprmpt = indxq + *n + 1;
--- a/lapack-netlib/SRC/claed7.c
+++ b/lapack-netlib/SRC/claed7.c
@@ -864,11 +864,11 @@ f"> */
/* Form the z-vector which consists of the last row of Q_1 and the */
/* first row of Q_2. */
- ptr = pow_ii(&c__2, tlvls) + 1;
+ ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *tlvls - i__;
- ptr += pow_ii(&c__2, &i__2);
+ ptr += pow_ii(c__2, i__2);
/* L10: */
}
curr = ptr + *curpbm;
--- a/lapack-netlib/SRC/clalsa.c
+++ b/lapack-netlib/SRC/clalsa.c
@@ -1051,7 +1051,7 @@ f"> */
/* Finally go through the left singular vector matrices of all */
/* the other subproblems bottom-up on the tree. */
- j = pow_ii(&c__2, &nlvl);
+ j = pow_ii(c__2, nlvl);
sqre = 0;
for (lvl = nlvl; lvl >= 1; --lvl) {
@@ -1065,7 +1065,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
- lf = pow_ii(&c__2, &i__1);
+ lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
@@ -1110,7 +1110,7 @@ f"> */
ll = 1;
} else {
i__2 = lvl - 1;
- lf = pow_ii(&c__2, &i__2);
+ lf = pow_ii(c__2, i__2);
ll = (lf << 1) - 1;
}
i__2 = lf;
--- a/lapack-netlib/SRC/cstedc.c
+++ b/lapack-netlib/SRC/cstedc.c
@@ -836,10 +836,10 @@ f"> */
lrwmin = *n - 1 << 1;
} else if (icompz == 1) {
lgn = (integer) (log((real) (*n)) / log(2.f));
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
lwmin = *n * *n;
--- a/lapack-netlib/SRC/dlaed0.c
+++ b/lapack-netlib/SRC/dlaed0.c
@@ -827,10 +827,10 @@ f"> */
temp = log((doublereal) (*n)) / log(2.);
lgn = (integer) temp;
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
iprmpt = indxq + *n + 1;
--- a/lapack-netlib/SRC/dlaed7.c
+++ b/lapack-netlib/SRC/dlaed7.c
@@ -885,11 +885,11 @@ f"> */
/* Form the z-vector which consists of the last row of Q_1 and the */
/* first row of Q_2. */
- ptr = pow_ii(&c__2, tlvls) + 1;
+ ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *tlvls - i__;
- ptr += pow_ii(&c__2, &i__2);
+ ptr += pow_ii(c__2, i__2);
/* L10: */
}
curr = ptr + *curpbm;
--- a/lapack-netlib/SRC/dlaeda.c
+++ b/lapack-netlib/SRC/dlaeda.c
@@ -754,7 +754,7 @@ f"> */
/* scheme */
i__1 = *curlvl - 1;
- curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
+ curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;
/* Determine size of these matrices. We add HALF to the value of */
/* the SQRT in case the machine underestimates one of these square */
@@ -781,12 +781,12 @@ f"> */
/* rotations and permutation and then multiplying the center matrices */
/* against the current Z. */
- ptr = pow_ii(&c__2, tlvls) + 1;
+ ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (k = 1; k <= i__1; ++k) {
i__2 = *curlvl - k;
i__3 = *curlvl - k - 1;
- curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
+ curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
1;
psiz1 = prmptr[curr + 1] - prmptr[curr];
psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
@@ -847,7 +847,7 @@ f"> */
c__1);
i__2 = *tlvls - k;
- ptr += pow_ii(&c__2, &i__2);
+ ptr += pow_ii(c__2, i__2);
/* L70: */
}
--- a/lapack-netlib/SRC/dlalsa.c
+++ b/lapack-netlib/SRC/dlalsa.c
@@ -951,7 +951,7 @@ f"> */
/* Finally go through the left singular vector matrices of all */
/* the other subproblems bottom-up on the tree. */
- j = pow_ii(&c__2, &nlvl);
+ j = pow_ii(c__2, nlvl);
sqre = 0;
for (lvl = nlvl; lvl >= 1; --lvl) {
@@ -965,7 +965,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
- lf = pow_ii(&c__2, &i__1);
+ lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
@@ -1010,7 +1010,7 @@ f"> */
ll = 1;
} else {
i__2 = lvl - 1;
- lf = pow_ii(&c__2, &i__2);
+ lf = pow_ii(c__2, i__2);
ll = (lf << 1) - 1;
}
i__2 = lf;
--- a/lapack-netlib/SRC/dlasd0.c
+++ b/lapack-netlib/SRC/dlasd0.c
@@ -824,7 +824,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
- lf = pow_ii(&c__2, &i__1);
+ lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
--- a/lapack-netlib/SRC/dlasda.c
+++ b/lapack-netlib/SRC/dlasda.c
@@ -1027,7 +1027,7 @@ f"> */
/* Now conquer each subproblem bottom-up. */
- j = pow_ii(&c__2, &nlvl);
+ j = pow_ii(c__2, nlvl);
for (lvl = nlvl; lvl >= 1; --lvl) {
lvl2 = (lvl << 1) - 1;
@@ -1039,7 +1039,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
- lf = pow_ii(&c__2, &i__1);
+ lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
--- a/lapack-netlib/SRC/dstedc.c
+++ b/lapack-netlib/SRC/dstedc.c
@@ -806,10 +806,10 @@ f"> */
lwmin = *n - 1 << 1;
} else {
lgn = (integer) (log((doublereal) (*n)) / log(2.));
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (icompz == 1) {
--- a/lapack-netlib/SRC/slaed0.c
+++ b/lapack-netlib/SRC/slaed0.c
@@ -823,10 +823,10 @@ f"> */
temp = log((real) (*n)) / log(2.f);
lgn = (integer) temp;
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
iprmpt = indxq + *n + 1;
--- a/lapack-netlib/SRC/slaed7.c
+++ b/lapack-netlib/SRC/slaed7.c
@@ -883,11 +883,11 @@ f"> */
/* Form the z-vector which consists of the last row of Q_1 and the */
/* first row of Q_2. */
- ptr = pow_ii(&c__2, tlvls) + 1;
+ ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *tlvls - i__;
- ptr += pow_ii(&c__2, &i__2);
+ ptr += pow_ii(c__2, i__2);
/* L10: */
}
curr = ptr + *curpbm;
--- a/lapack-netlib/SRC/slaeda.c
+++ b/lapack-netlib/SRC/slaeda.c
@@ -753,7 +753,7 @@ f"> */
/* scheme */
i__1 = *curlvl - 1;
- curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
+ curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1;
/* Determine size of these matrices. We add HALF to the value of */
/* the SQRT in case the machine underestimates one of these square */
@@ -779,12 +779,12 @@ f"> */
/* rotations and permutation and then multiplying the center matrices */
/* against the current Z. */
- ptr = pow_ii(&c__2, tlvls) + 1;
+ ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (k = 1; k <= i__1; ++k) {
i__2 = *curlvl - k;
i__3 = *curlvl - k - 1;
- curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
+ curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) -
1;
psiz1 = prmptr[curr + 1] - prmptr[curr];
psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
@@ -844,7 +844,7 @@ f"> */
c__1);
i__2 = *tlvls - k;
- ptr += pow_ii(&c__2, &i__2);
+ ptr += pow_ii(c__2, i__2);
/* L70: */
}
--- a/lapack-netlib/SRC/slalsa.c
+++ b/lapack-netlib/SRC/slalsa.c
@@ -946,7 +946,7 @@ f"> */
/* Finally go through the left singular vector matrices of all */
/* the other subproblems bottom-up on the tree. */
- j = pow_ii(&c__2, &nlvl);
+ j = pow_ii(c__2, nlvl);
sqre = 0;
for (lvl = nlvl; lvl >= 1; --lvl) {
@@ -960,7 +960,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
- lf = pow_ii(&c__2, &i__1);
+ lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
@@ -1005,7 +1005,7 @@ f"> */
ll = 1;
} else {
i__2 = lvl - 1;
- lf = pow_ii(&c__2, &i__2);
+ lf = pow_ii(c__2, i__2);
ll = (lf << 1) - 1;
}
i__2 = lf;
--- a/lapack-netlib/SRC/slasd0.c
+++ b/lapack-netlib/SRC/slasd0.c
@@ -821,7 +821,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
- lf = pow_ii(&c__2, &i__1);
+ lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
--- a/lapack-netlib/SRC/slasda.c
+++ b/lapack-netlib/SRC/slasda.c
@@ -1023,7 +1023,7 @@ f"> */
/* Now conquer each subproblem bottom-up. */
- j = pow_ii(&c__2, &nlvl);
+ j = pow_ii(c__2, nlvl);
for (lvl = nlvl; lvl >= 1; --lvl) {
lvl2 = (lvl << 1) - 1;
@@ -1035,7 +1035,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
- lf = pow_ii(&c__2, &i__1);
+ lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
--- a/lapack-netlib/SRC/sstedc.c
+++ b/lapack-netlib/SRC/sstedc.c
@@ -804,10 +804,10 @@ f"> */
lwmin = *n - 1 << 1;
} else {
lgn = (integer) (log((real) (*n)) / log(2.f));
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
if (icompz == 1) {
--- a/lapack-netlib/SRC/zlaed0.c
+++ b/lapack-netlib/SRC/zlaed0.c
@@ -793,10 +793,10 @@ f"> */
temp = log((doublereal) (*n)) / log(2.);
lgn = (integer) temp;
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
iprmpt = indxq + *n + 1;
--- a/lapack-netlib/SRC/zlaed7.c
+++ b/lapack-netlib/SRC/zlaed7.c
@@ -864,11 +864,11 @@ f"> */
/* Form the z-vector which consists of the last row of Q_1 and the */
/* first row of Q_2. */
- ptr = pow_ii(&c__2, tlvls) + 1;
+ ptr = pow_ii(c__2, *tlvls) + 1;
i__1 = *curlvl - 1;
for (i__ = 1; i__ <= i__1; ++i__) {
i__2 = *tlvls - i__;
- ptr += pow_ii(&c__2, &i__2);
+ ptr += pow_ii(c__2, i__2);
/* L10: */
}
curr = ptr + *curpbm;
--- a/lapack-netlib/SRC/zlalsa.c
+++ b/lapack-netlib/SRC/zlalsa.c
@@ -1051,7 +1051,7 @@ f"> */
/* Finally go through the left singular vector matrices of all */
/* the other subproblems bottom-up on the tree. */
- j = pow_ii(&c__2, &nlvl);
+ j = pow_ii(c__2, nlvl);
sqre = 0;
for (lvl = nlvl; lvl >= 1; --lvl) {
@@ -1065,7 +1065,7 @@ f"> */
ll = 1;
} else {
i__1 = lvl - 1;
- lf = pow_ii(&c__2, &i__1);
+ lf = pow_ii(c__2, i__1);
ll = (lf << 1) - 1;
}
i__1 = ll;
@@ -1110,7 +1110,7 @@ f"> */
ll = 1;
} else {
i__2 = lvl - 1;
- lf = pow_ii(&c__2, &i__2);
+ lf = pow_ii(c__2, i__2);
ll = (lf << 1) - 1;
}
i__2 = lf;
--- a/lapack-netlib/SRC/zstedc.c
+++ b/lapack-netlib/SRC/zstedc.c
@@ -836,10 +836,10 @@ f"> */
lrwmin = *n - 1 << 1;
} else if (icompz == 1) {
lgn = (integer) (log((doublereal) (*n)) / log(2.));
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
- if (pow_ii(&c__2, &lgn) < *n) {
+ if (pow_ii(c__2, lgn) < *n) {
++lgn;
}
lwmin = *n * *n;
From 91110f92d218492d0efbdc1fdf34277ca45f4b36 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 14 Sep 2022 14:03:31 +0200
Subject: [PATCH] fix missing return type in function declaration
--- a/ctest/c_sblat1c.c
+++ b/ctest/c_sblat1c.c
@@ -969,7 +969,7 @@ real *sfac;
1.17 };
/* Local variables */
- extern /* Subroutine */ srottest_();
+ extern /* Subroutine */ void srottest_();
static integer i__, k, ksize;
extern /* Subroutine */ int stest_(), srotmtest_();
static integer ki, kn;
From 9402df5604e69f86f58953e3883f33f98c930baf Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 14 Sep 2022 21:44:34 +0200
Subject: [PATCH] Fix missing external declaration
--- a/driver/others/blas_server_omp.c
+++ b/driver/others/blas_server_omp.c
@@ -69,6 +69,8 @@
int blas_server_avail = 0;
+extern int openblas_omp_adaptive_env();
+
static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
#ifdef HAVE_C11
static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
From 101a2c77c3f3610933f450cefca3e312edab2186 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Thu, 15 Sep 2022 09:19:19 +0200
Subject: [PATCH] Fix warnings
--- a/kernel/x86_64/dgemm_ncopy_8_skylakex.c
+++ b/kernel/x86_64/dgemm_ncopy_8_skylakex.c
@@ -52,18 +52,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT * __restrict a, BLASLONG lda, FLOAT * __
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
- FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
- FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
- FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
- FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
- FLOAT ctemp33, ctemp34, ctemp35, ctemp36;
- FLOAT ctemp37, ctemp38, ctemp39, ctemp40;
- FLOAT ctemp41, ctemp42, ctemp43, ctemp44;
- FLOAT ctemp45, ctemp46, ctemp47, ctemp48;
- FLOAT ctemp49, ctemp50, ctemp51, ctemp52;
- FLOAT ctemp53, ctemp54, ctemp55, ctemp56;
- FLOAT ctemp57, ctemp58, ctemp59, ctemp60;
- FLOAT ctemp61, ctemp62, ctemp63, ctemp64;
+ FLOAT ctemp17 /*, ctemp18, ctemp19, ctemp20*/ ;
+ FLOAT /*ctemp21, ctemp22,*/ ctemp23, ctemp24;
+ FLOAT ctemp25 /*, ctemp26, ctemp27, ctemp28*/ ;
+ FLOAT /*ctemp29, ctemp30,*/ ctemp31, ctemp32;
+ FLOAT ctemp33 /*, ctemp34, ctemp35, ctemp36*/ ;
+ FLOAT /*ctemp37, ctemp38,*/ ctemp39, ctemp40;
+ FLOAT ctemp41 /*, ctemp42, ctemp43, ctemp44*/ ;
+ FLOAT /*ctemp45, ctemp46,*/ ctemp47, ctemp48;
+ FLOAT ctemp49 /*, ctemp50, ctemp51, ctemp52*/ ;
+ FLOAT /*ctemp53, ctemp54,*/ ctemp55, ctemp56;
+ FLOAT ctemp57 /*, ctemp58, ctemp59, ctemp60*/ ;
+ FLOAT /*ctemp61, ctemp62,*/ ctemp63, ctemp64;
aoffset = a;
--- a/kernel/x86_64/omatcopy_rt.c
+++ b/kernel/x86_64/omatcopy_rt.c
@@ -142,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15");\
}
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb){
- float *src, *dst, *dst_tmp, *src_base, *dst_base;
+ float *src, *dst, *dst_tmp=0, *src_base, *dst_base;
uint64_t src_ld_bytes = (uint64_t)lda * sizeof(float), dst_ld_bytes = (uint64_t)ldb * sizeof(float), num_rows = 0;
BLASLONG cols_left, rows_done; float ALPHA = alpha;
if(ALPHA==0.0){