// gcc-14 -c -O2 -w -march=rv64gcv test.c

#include <riscv_vector.h>
typedef int npy_intp;
static inline __attribute__(()) int vaddq_f32();
static inline __attribute__(()) int npyv_load2_tillz_f32(int nlane) {
  vint32m1_t __trans_tmp_9;
  {
    int __trans_tmp_10 = nlane;
    {
      vint32m1_t __trans_tmp_8;
      vint64m1_t __trans_tmp_6;
      vint64m1_t __trans_tmp_4;
      vint64m1_t __trans_tmp_3;
      vint64m1_t __trans_tmp_2;
      vint64m1_t __trans_tmp_1;
      if (__trans_tmp_10 == 1) {
        {
          __trans_tmp_3 =
              __riscv_vslideup_vx_i64m1(__trans_tmp_1, __trans_tmp_2, 1, 2);
        }
        __trans_tmp_4 = __trans_tmp_3;
      }
      __trans_tmp_6 = __trans_tmp_3;
      __trans_tmp_8 = __riscv_vreinterpret_v_i64m1_i32m1(__trans_tmp_3);
      __trans_tmp_9 = __trans_tmp_8;
    }
  }
  return vaddq_f32(__trans_tmp_9);
}
char CFLOAT_add_args_2;
char CFLOAT_add_args_1;
char CFLOAT_add_args_0;
const npy_intp *CFLOAT_add_dimensions;
const npy_intp *CFLOAT_add_steps;
const npy_intp CFLOAT_add_steps_0;
__attribute__(()) void CFLOAT_add() {
  npy_intp len = CFLOAT_add_dimensions[0];
  char *b_src0 = &CFLOAT_add_args_0, *b_src1 = &CFLOAT_add_args_1,
       *b_dst = &CFLOAT_add_args_2;
  const float *src1 = (float *)b_src1;
  float *dst = (float *)b_dst;
  const npy_intp ssrc1 = CFLOAT_add_steps[1] / sizeof(float);
  const npy_intp sdst = CFLOAT_add_steps[2] / sizeof(float);
  const int hstep = 4 / 2;
  {
    vfloat32m1x2_t a;
    ssrc1 == 2 && sdst == ssrc1;
    for (; len > 0; len -= hstep, src1 += 4, dst += 4) {
      int b = npyv_load2_tillz_f32(len);
      int r = vaddq_f32(a.__val[0], b);
    }
  }
  for (; len > 0; --len, b_src0 += CFLOAT_add_steps_0,
                  b_src1 += CFLOAT_add_steps[1], b_dst += CFLOAT_add_steps[2])
    ;
}