7.13.14.4 Directly-mapped SX and ASX Conversion Builtin Functions

For convenience, the lsxintrin.h file was imported into lasxintrin.h and 18 new interface functions for 128 and 256 vector conversions were added, using the -mlasx option.

__m256 __lasx_cast_128_s (__m128);
__m256d __lasx_cast_128_d (__m128d);
__m256i __lasx_cast_128 (__m128i);
__m256 __lasx_concat_128_s (__m128, __m128);
__m256d __lasx_concat_128_d (__m128d, __m128d);
__m256i __lasx_concat_128 (__m128i, __m128i);
__m128 __lasx_extract_128_lo_s (__m256);
__m128 __lasx_extract_128_hi_s (__m256);
__m128d __lasx_extract_128_lo_d (__m256d);
__m128d __lasx_extract_128_hi_d (__m256d);
__m128i __lasx_extract_128_lo (__m256i);
__m128i __lasx_extract_128_hi (__m256i);
__m256 __lasx_insert_128_lo_s (__m256, __m128);
__m256 __lasx_insert_128_hi_s (__m256, __m128);
__m256d __lasx_insert_128_lo_d (__m256d, __m128d);
__m256d __lasx_insert_128_hi_d (__m256d, __m128d);
__m256i __lasx_insert_128_lo (__m256i, __m128i);
__m256i __lasx_insert_128_hi (__m256i, __m128i);

When gcc does not support interfaces for 128 and 256 conversions, use the following code for equivalent substitution.


  #ifndef __loongarch_asx_sx_conv

  #include <lasxintrin.h>
  #include <lsxintrin.h>
  __m256 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_cast_128_s (__m128 src)
  {
    __m256 dest;
    asm ("" : "=f"(dest) : "0"(src));
    return dest;
  }

  __m256d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_cast_128_d (__m128d src)
  {
    __m256d dest;
    asm	("" : "=f"(dest) : "0"(src));
    return dest;
  }

  __m256i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_cast_128 (__m128i src)
  {
    __m256i dest;
    asm	("" : "=f"(dest) : "0"(src));
    return dest;
  }

  __m256 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_concat_128_s (__m128 src1, __m128 src2)
  {
    __m256 dest;
    asm	("xvpermi.q %u0,%u2,0x02\n"
          : "=f"(dest)
          : "0"(src1), "f"(src2));
    return dest;
  }

  __m256d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_concat_128_d (__m128d src1, __m128d src2)
  {
    __m256d dest;
    asm	("xvpermi.q %u0,%u2,0x02\n"
          : "=f"(dest)
          : "0"(src1), "f"(src2));
    return dest;
  }

  __m256i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_concat_128 (__m128i src1, __m128i src2)
  {
    __m256i dest;
    asm	("xvpermi.q %u0,%u2,0x02\n"
          : "=f"(dest)
          : "0"(src1), "f"(src2));
    return dest;
  }

  __m128 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_extract_128_lo_s (__m256 src)
  {
    __m128 dest;
    asm	("" : "=f"(dest) : "0"(src));
    return dest;
  }

  __m128d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_extract_128_lo_d (__m256d src)
  {
    __m128d dest;
    asm	("" : "=f"(dest) : "0"(src));
    return dest;
  }

  __m128i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_extract_128_lo (__m256i src)
  {
    __m128i dest;
    asm	("" : "=f"(dest) : "0"(src));
    return dest;
  }

  __m128 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_extract_128_hi_s (__m256 src)
  {
    __m128 dest;
    asm	("xvpermi.d %u0,%u1,0xe\n"
          : "=f"(dest)
          : "f"(src));
    return dest;
  }

  __m128d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_extract_128_hi_d (__m256d src)
  {
    __m128d dest;
    asm	("xvpermi.d %u0,%u1,0xe\n"
          : "=f"(dest)
          : "f"(src));
    return dest;
  }

  __m128i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_extract_128_hi (__m256i src)
  {
    __m128i dest;
    asm	("xvpermi.d %u0,%u1,0xe\n"
          : "=f"(dest)
          : "f"(src));
    return dest;
  }

  __m256 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_insert_128_lo_s (__m256 src1, __m128 src2)
  {
    __m256 dest;
    asm	("xvpermi.q %u0,%u2,0x30\n"
          : "=f"(dest)
          : "0"(src1), "f"(src2));
    return dest;
  }

  __m256d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_insert_128_lo_d (__m256d a, __m128d b)
  {
    __m256d dest;
    asm	("xvpermi.q %u0,%u2,0x30\n"
          : "=f"(dest)
          : "0"(src1), "f"(src2));
    return dest;
  }

  __m256i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_insert_128_lo (__m256i src1, __m128i src2)
  {
    __m256i dest;
    asm	("xvpermi.q %u0,%u2,0x30\n"
          : "=f"(dest)
          : "0"(src1), "f"(src2));
    return dest;
  }

  __m256 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_insert_128_hi_s (__m256 src1, __m128 src2)
  {
    __m256 dest;
    asm	("xvpermi.q %u0,%u2,0x02\n"
          : "=f"(dest)
          : "0"(src1), "f"(src2));
    return dest;
  }

  __m256d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_insert_128_hi_d (__m256d src1, __m128d src2)
  {
    __m256d dest;
    asm	("xvpermi.q %u0,%u2,0x02\n"
          : "=f"(dest)
          : "0"(src1), "f"(src2));
    return dest;
  }

  __m256i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  __lasx_insert_128_hi (__m256i src1, __m128i src2)
  {
    __m256i dest;
    asm	("xvpermi.q %u0,%u2,0x02\n"
          : "=f"(dest)
          : "0"(src1), "f"(src2));
    return dest;
  }
  #endif