// gcc-14 -c -O2 -w -march=rv64gcv test.c
#include <riscv_vector.h>
typedef int npy_intp;
static inline __attribute__(()) int vaddq_f32();
static inline __attribute__(()) int npyv_load2_tillz_f32(int nlane) {
vint32m1_t __trans_tmp_9;
{
int __trans_tmp_10 = nlane;
{
vint32m1_t __trans_tmp_8;
vint64m1_t __trans_tmp_6;
vint64m1_t __trans_tmp_4;
vint64m1_t __trans_tmp_3;
vint64m1_t __trans_tmp_2;
vint64m1_t __trans_tmp_1;
if (__trans_tmp_10 == 1) {
{
__trans_tmp_3 =
__riscv_vslideup_vx_i64m1(__trans_tmp_1, __trans_tmp_2, 1, 2);
}
__trans_tmp_4 = __trans_tmp_3;
}
__trans_tmp_6 = __trans_tmp_3;
__trans_tmp_8 = __riscv_vreinterpret_v_i64m1_i32m1(__trans_tmp_3);
__trans_tmp_9 = __trans_tmp_8;
}
}
return vaddq_f32(__trans_tmp_9);
}
char CFLOAT_add_args_2;
char CFLOAT_add_args_1;
char CFLOAT_add_args_0;
const npy_intp *CFLOAT_add_dimensions;
const npy_intp *CFLOAT_add_steps;
const npy_intp CFLOAT_add_steps_0;
__attribute__(()) void CFLOAT_add() {
npy_intp len = CFLOAT_add_dimensions[0];
char *b_src0 = &CFLOAT_add_args_0, *b_src1 = &CFLOAT_add_args_1,
*b_dst = &CFLOAT_add_args_2;
const float *src1 = (float *)b_src1;
float *dst = (float *)b_dst;
const npy_intp ssrc1 = CFLOAT_add_steps[1] / sizeof(float);
const npy_intp sdst = CFLOAT_add_steps[2] / sizeof(float);
const int hstep = 4 / 2;
{
vfloat32m1x2_t a;
ssrc1 == 2 && sdst == ssrc1;
for (; len > 0; len -= hstep, src1 += 4, dst += 4) {
int b = npyv_load2_tillz_f32(len);
int r = vaddq_f32(a.__val[0], b);
}
}
for (; len > 0; --len, b_src0 += CFLOAT_add_steps_0,
b_src1 += CFLOAT_add_steps[1], b_dst += CFLOAT_add_steps[2])
;
}