TYPE TYPE_load(const unsigned char *S) {
  TYPE Z = 0;
  int K;
  for (K = 0;K < N;K += 8)
    Z |= ((TYPE) (*S++)) << K;
  return Z;
}

TYPE TYPE_load_bigendian(const unsigned char *S) {
  TYPE Z = 0;
  int K;
  for (K = N - 8;K >= 0;K -= 8)
    Z |= ((TYPE) (*S++)) << K;
  return Z;
}

void TYPE_store(unsigned char *S,TYPE X) {
  int K;
  for (K = 0;K < N;K += 8)
    *S++ = X >> K;
}

void TYPE_store_bigendian(unsigned char *S,TYPE X) {
  int K;
  for (K = N - 8;K >= 0;K -= 8)
    *S++ = X >> K;
}

SIGNED SIGNED_negative_mask(SIGNED X) {
#if amd64
 8:  readasm("amd64; int8 X; X signed>>= 7");
16:  readasm("amd64; int16 X; X signed>>= 15");
32:  readasm("amd64; int32 X; X signed>>= 31");
64:  readasm("amd64; int64 X; X signed>>= 63");
  return X;
#elif arm64
  SIGNED Y;
 8:  readasm("arm64; int8 X Y; Y = -(1 & (X unsigned>> 7))");
16:  readasm("arm64; int16 X Y; Y = -(1 & (X unsigned>> 15))");
32:  readasm("arm64; int32 X Y; Y = X signed>> 31");
64:  readasm("arm64; int64 X Y; Y = X signed>> 63");
  return Y;
#elif arm32
  SIGNED Y;
 8:  readasm("arm32; int8 X Y; Y = (int8) X; Y = Y signed>> 31");
16:  readasm("arm32; int16 X Y; Y = (int16) X; Y = Y signed>> 31");
32:  readasm("arm32; int32 X Y; Y = X signed>> 31");
64:  readasm("arm32; int64 X Y; Y.lo = X.hi signed>> 31; Y.hi = Y.lo");
  return Y;
#elif sparc32
  SIGNED Y;
 8:  readasm("sparc32; int8 X Y; Y = X << 24; Y = Y signed>> 31");
16:  readasm("sparc32; int16 X Y; Y = X << 16; Y = Y signed>> 31");
32:  readasm("sparc32; int32 X Y; Y = X signed>> 31");
64:  readasm("sparc32; int64 X Y; Y.lo = X.hi signed>> 31; Y.hi = Y.lo");
  return Y;
#else
  X >>= N-6;
  X += SIGNED_optblocker;
  X >>= 5;
  return X;
#endif
}

UNSIGNED UNSIGNED_topbit_01(UNSIGNED X) {
#if amd64
 8:  readasm("amd64; int8 X; X unsigned>>= 7");
16:  readasm("amd64; int16 X; X unsigned>>= 15");
32:  readasm("amd64; int32 X; X unsigned>>= 31");
64:  readasm("amd64; int64 X; X unsigned>>= 63");
  return X;
#elif arm64
  UNSIGNED Y;
 8:  readasm("arm64; int8 X Y; Y = 1 & (X unsigned>> 7)");
16:  readasm("arm64; int16 X Y; Y = 1 & (X unsigned>> 15)");
32:  readasm("arm64; int32 X Y; Y = X unsigned>> 31");
64:  readasm("arm64; int64 X Y; Y = X unsigned>> 63");
  return Y;
#elif arm32
  SIGNED Y;
 8:  readasm("arm32; int8 X Y; Y = (uint8) X; Y = Y unsigned>> 7");
16:  readasm("arm32; int16 X Y; Y = (uint16) X; Y = Y unsigned>> 15");
32:  readasm("arm32; int32 X Y; Y = X unsigned>> 31");
64:  readasm("arm32; int64 X Y; Y.lo = X.hi unsigned>> 31; Y.hi = 0");
  return Y;
#elif sparc32
  UNSIGNED Y;
 8:  readasm("sparc32; int8 X Y; Y = (uint8) X; Y = Y unsigned>> 7");
16:  readasm("sparc32; int16 X Y; Y = (uint16) X; Y = Y unsigned>> 15");
32:  readasm("sparc32; int32 X Y; Y = X unsigned>> 31");
64:  readasm("sparc32; int64 X Y; Y.lo = X.hi unsigned>> 31; Y.hi = 0");
  return Y;
#else
  X >>= N-6;
  X += SIGNED_optblocker;
  X >>= 5;
  return X;
#endif
}

SIGNED:
TYPE TYPE_negative_01(TYPE X) {
  return UNSIGNED_topbit_01(X);
}

SIGNED:
TYPE TYPE_topbit_mask(TYPE X) {
  return SIGNED_negative_mask(X);
}

SIGNED:
TYPE TYPE_topbit_01(TYPE X) {
  return UNSIGNED_topbit_01(X);
}

UNSIGNED:
TYPE TYPE_topbit_mask(TYPE X) {
  return SIGNED_negative_mask(X);
}

SIGNED:
TYPE TYPE_bottombit_mask(TYPE X) {
#if amd64
 8:  readasm("amd64; int8 X; X &= 1");
16:  readasm("amd64; int16 X; X &= 1");
32:  readasm("amd64; int32 X; X &= 1");
64:  readasm("amd64; int64 X; X &= 1");
  return -X;
#elif arm64
  TYPE Y;
 8:  readasm("arm64; int8 X Y; Y = -(1 & (X unsigned>> 0))");
16:  readasm("arm64; int16 X Y; Y = -(1 & (X unsigned>> 0))");
32:  readasm("arm64; int32 X Y; Y = -(1 & (X unsigned>> 0))");
64:  readasm("arm64; int64 X Y; Y = -(1 & (X unsigned>> 0))");
  return Y;
#elif arm32
  TYPE Y;
 8:  readasm("arm32; int8 X Y; Y = X & 1; Y = -Y");
16:  readasm("arm32; int16 X Y; Y = X & 1; Y = -Y");
32:  readasm("arm32; int32 X Y; Y = X & 1; Y = -Y");
64:  readasm("arm32; int64 X Y; Y.lo = X.lo & 1; Y.lo = -Y.lo; Y.hi = Y.lo");
  return Y;
#elif sparc32
  TYPE Y;
 8:  readasm("sparc32; int8 X Y; Y = X & 1; Y = -Y");
16:  readasm("sparc32; int16 X Y; Y = X & 1; Y = -Y");
32:  readasm("sparc32; int32 X Y; Y = X & 1; Y = -Y");
64:  readasm("sparc32; int64 X Y; Y.lo = X.lo & 1; Y.lo = -Y.lo; Y.hi = Y.lo");
  return Y;
#else
  X &= 1 + SIGNED_optblocker;
  return -X;
#endif
}

UNSIGNED:
TYPE TYPE_bottombit_mask(TYPE X) {
#if amd64
 8:  readasm("amd64; int8 X; X &= 1");
16:  readasm("amd64; int16 X; X &= 1");
32:  readasm("amd64; int32 X; X &= 1");
64:  readasm("amd64; int64 X; X &= 1");
  return -X;
#elif arm64
  TYPE Y;
 8:  readasm("arm64; int8 X Y; Y = -(1 & (X unsigned>> 0))");
16:  readasm("arm64; int16 X Y; Y = -(1 & (X unsigned>> 0))");
32:  readasm("arm64; int32 X Y; Y = -(1 & (X unsigned>> 0))");
64:  readasm("arm64; int64 X Y; Y = -(1 & (X unsigned>> 0))");
  return Y;
#elif arm32
  TYPE Y;
 8:  readasm("arm32; int8 X Y; Y = X & 1; Y = -Y; Y = (uint8) Y");
16:  readasm("arm32; int16 X Y; Y = X & 1; Y = -Y; Y = (uint16) Y");
32:  readasm("arm32; int32 X Y; Y = X & 1; Y = -Y");
64:  readasm("arm32; int64 X Y; Y.lo = X.lo & 1; Y.lo = -Y.lo; Y.hi = Y.lo");
  return Y;
#elif sparc32
  TYPE Y;
 8:  readasm("sparc32; int8 X Y; Y = X & 1; Y = -Y; Y = (uint8) Y");
16:  readasm("sparc32; int16 X Y; Y = X & 1; Y = -Y; Y = (uint16) Y");
32:  readasm("sparc32; int32 X Y; Y = X & 1; Y = -Y");
64:  readasm("sparc32; int64 X Y; Y.lo = X.lo & 1; Y.lo = -Y.lo; Y.hi = Y.lo");
  return Y;
#else
  X &= 1 + SIGNED_optblocker;
  return -X;
#endif
}

TYPE TYPE_bottombit_01(TYPE X) {
#if amd64
 8:  readasm("amd64; int8 X; X &= 1");
16:  readasm("amd64; int16 X; X &= 1");
32:  readasm("amd64; int32 X; X &= 1");
64:  readasm("amd64; int64 X; X &= 1");
  return X;
#elif arm64
  TYPE Y;
 8:  readasm("arm64; int8 X Y; Y = 1 & (X unsigned>> 0)");
16:  readasm("arm64; int16 X Y; Y = 1 & (X unsigned>> 0)");
32:  readasm("arm64; int32 X Y; Y = 1 & (X unsigned>> 0)");
64:  readasm("arm64; int64 X Y; Y = 1 & (X unsigned>> 0)");
  return Y;
#elif arm32
  TYPE Y;
 8:  readasm("arm32; int8 X Y; Y = X & 1");
16:  readasm("arm32; int16 X Y; Y = X & 1");
32:  readasm("arm32; int32 X Y; Y = X & 1");
64:  readasm("arm32; int64 X Y; Y.lo = X.lo & 1; Y.hi = 0");
  return Y;
#elif sparc32
  TYPE Y;
 8:  readasm("sparc32; int8 X Y; Y = X & 1");
16:  readasm("sparc32; int16 X Y; Y = X & 1");
32:  readasm("sparc32; int32 X Y; Y = X & 1");
64:  readasm("sparc32; int64 X Y; Y.lo = X.lo & 1; Y.hi = 0");
  return Y;
#else
  X &= 1 + SIGNED_optblocker;
  return X;
#endif
}

SIGNED:
TYPE TYPE_bitinrangepublicpos_mask(TYPE X,TYPE S) {
#if amd64
 8:  readasm("amd64; int8 X S; X signed>>= S");
16:  readasm("amd64; int16 X S; X signed>>= S");
32:  readasm("amd64; int32 X S; X signed>>= S");
64:  readasm("amd64; int64 X S; X signed>>= S");
#elif arm64
 8:  readasm("arm64; int8 X S; X = (int8) X; X = X signed>> S");
16:  readasm("arm64; int16 X S; X = (int16) X; X = X signed>> S");
32:  readasm("arm64; int32 X S; X = X signed>> S");
64:  readasm("arm64; int64 X S; X = X signed>> S");
#elif arm32
 8:  readasm("arm32; int8 X S; S = S & 7; X = (int8) X; X = X signed>> S");
16:  readasm("arm32; int16 X S; S = S & 15; X = (int16) X; X = X signed>> S");
32:  readasm("arm32; int32 X S; S = S & 31; X = X signed>> S");
64:  readasm("arm32; int64 X S; S.lo = S.lo & 63; X.lo = X.lo unsigned>> S.lo; S.hi = 32 - S.lo; X.lo = X.lo | (X.hi << S.hi); flags, S.hi = S.lo - 32; X.lo = X.lo | (X.hi signed>> S.hi) if unsigned>=; X.hi = X.hi signed>> S.lo");
#elif sparc32
 8:  readasm("sparc32; int8 X S; S = S & 7; X = X signed>> S");
16:  readasm("sparc32; int16 X S; S = S & 15; X = X signed>> S");
32:  readasm("sparc32; int32 X S; X = X signed>> S");
64:  TYPE Y, Z;
64:  readasm("sparc32; int64 X Y Z S; S.hi = ~S.lo; Z.hi = S.lo << 26; Z.lo = X.hi << 1; Y.lo = X.lo unsigned>> S.lo; Z.lo = Z.lo << S.hi; Y.hi = X.hi signed>> S.lo; Z.hi = Z.hi signed>> 31; Y.lo = Y.lo | Z.lo; Z.lo = Y.lo ^ Y.hi; Z.lo = Z.hi & Z.lo; X.hi = Y.hi signed>> Z.hi; X.lo = Y.lo ^ Z.lo");
#else
  X >>= S ^ SIGNED_optblocker;
#endif
  return TYPE_bottombit_mask(X);
}

SIGNED:
TYPE TYPE_bitinrangepublicpos_01(TYPE X,TYPE S) {
#if amd64
 8:  readasm("amd64; int8 X S; X signed>>= S");
16:  readasm("amd64; int16 X S; X signed>>= S");
32:  readasm("amd64; int32 X S; X signed>>= S");
64:  readasm("amd64; int64 X S; X signed>>= S");
#elif arm64
 8:  readasm("arm64; int8 X S; X = (int8) X; X = X signed>> S");
16:  readasm("arm64; int16 X S; X = (int16) X; X = X signed>> S");
32:  readasm("arm64; int32 X S; X = X signed>> S");
64:  readasm("arm64; int64 X S; X = X signed>> S");
#elif arm32
 8:  readasm("arm32; int8 X S; S = S & 7; X = (int8) X; X = X signed>> S");
16:  readasm("arm32; int16 X S; S = S & 15; X = (int16) X; X = X signed>> S");
32:  readasm("arm32; int32 X S; S = S & 31; X = X signed>> S");
64:  readasm("arm32; int64 X S; S.lo = S.lo & 63; X.lo = X.lo unsigned>> S.lo; S.hi = 32 - S.lo; X.lo = X.lo | (X.hi << S.hi); flags, S.hi = S.lo - 32; X.lo = X.lo | (X.hi signed>> S.hi) if unsigned>=; X.hi = X.hi signed>> S.lo");
#elif sparc32
 8:  readasm("sparc32; int8 X S; S = S & 7; X = X signed>> S");
16:  readasm("sparc32; int16 X S; S = S & 15; X = X signed>> S");
32:  readasm("sparc32; int32 X S; X = X signed>> S");
64:  TYPE Y, Z;
64:  readasm("sparc32; int64 X Y Z S; S.hi = ~S.lo; Z.hi = S.lo << 26; Z.lo = X.hi << 1; Y.lo = X.lo unsigned>> S.lo; Z.lo = Z.lo << S.hi; Y.hi = X.hi signed>> S.lo; Z.hi = Z.hi signed>> 31; Y.lo = Y.lo | Z.lo; Z.lo = Y.lo ^ Y.hi; Z.lo = Z.hi & Z.lo; X.hi = Y.hi signed>> Z.hi; X.lo = Y.lo ^ Z.lo");
#else
  X >>= S ^ SIGNED_optblocker;
#endif
  return TYPE_bottombit_01(X);
}

UNSIGNED:
TYPE TYPE_bitinrangepublicpos_mask(TYPE X,TYPE S) {
#if amd64
 8:  readasm("amd64; int8 X S; X unsigned>>= S");
16:  readasm("amd64; int16 X S; X unsigned>>= S");
32:  readasm("amd64; int32 X S; X unsigned>>= S");
64:  readasm("amd64; int64 X S; X unsigned>>= S");
#elif arm64
 8:  readasm("arm64; int8 X S; X = (uint8) X; X = X unsigned>> S");
16:  readasm("arm64; int16 X S; X = (uint16) X; X = X unsigned>> S");
32:  readasm("arm64; int32 X S; X = X unsigned>> S");
64:  readasm("arm64; int64 X S; X = X unsigned>> S");
#elif arm32
 8:  readasm("arm32; int8 X S; S = S & 7; X = (uint8) X; X = X unsigned>> S");
16:  readasm("arm32; int16 X S; S = S & 15; X = (uint16) X; X = X unsigned>> S");
32:  readasm("arm32; int32 X S; S = S & 31; X = X unsigned>> S");
64:  readasm("arm32; int64 X S; S.lo = S.lo & 63; X.lo = X.lo unsigned>> S.lo; S.hi = 32 - S.lo; X.lo = X.lo | (X.hi << S.hi); S.hi = S.lo - 32; X.lo = X.lo | (X.hi unsigned>> S.hi); X.hi = X.hi unsigned>> S.lo");
#elif sparc32
 8:  readasm("sparc32; int8 X S; S = S & 7; X = X unsigned>> S");
16:  readasm("sparc32; int16 X S; S = S & 15; X = X unsigned>> S");
32:  readasm("sparc32; int32 X S; X = X unsigned>> S");
64:  TYPE Y, Z;
64:  readasm("sparc32; int64 X Y Z S; S.hi = ~S.lo; Z.hi = S.lo << 26; Z.lo = X.hi << 1; Y.lo = X.lo unsigned>> S.lo; Z.lo = Z.lo << S.hi; Y.hi = X.hi unsigned>> S.lo; Z.hi = Z.hi signed>> 31; Y.lo = Y.lo | Z.lo; Z.lo = Y.lo ^ Y.hi; X.hi = Z.hi & Y.hi; Z.lo = Z.hi & Z.lo; X.hi = X.hi ^ Y.hi; X.lo = Y.lo ^ Z.lo");
#else
  X >>= S ^ SIGNED_optblocker;
#endif
  return TYPE_bottombit_mask(X);
}

UNSIGNED:
TYPE TYPE_bitinrangepublicpos_01(TYPE X,TYPE S) {
#if amd64
 8:  readasm("amd64; int8 X S; X unsigned>>= S");
16:  readasm("amd64; int16 X S; X unsigned>>= S");
32:  readasm("amd64; int32 X S; X unsigned>>= S");
64:  readasm("amd64; int64 X S; X unsigned>>= S");
#elif arm64
 8:  readasm("arm64; int8 X S; X = (uint8) X; X = X unsigned>> S");
16:  readasm("arm64; int16 X S; X = (uint16) X; X = X unsigned>> S");
32:  readasm("arm64; int32 X S; X = X unsigned>> S");
64:  readasm("arm64; int64 X S; X = X unsigned>> S");
#elif arm32
 8:  readasm("arm32; int8 X S; S = S & 7; X = (uint8) X; X = X unsigned>> S");
16:  readasm("arm32; int16 X S; S = S & 15; X = (uint16) X; X = X unsigned>> S");
32:  readasm("arm32; int32 X S; S = S & 31; X = X unsigned>> S");
64:  readasm("arm32; int64 X S; S.lo = S.lo & 63; X.lo = X.lo unsigned>> S.lo; S.hi = 32 - S.lo; X.lo = X.lo | (X.hi << S.hi); S.hi = S.lo - 32; X.lo = X.lo | (X.hi unsigned>> S.hi); X.hi = X.hi unsigned>> S.lo");
#elif sparc32
 8:  readasm("sparc32; int8 X S; S = S & 7; X = X unsigned>> S");
16:  readasm("sparc32; int16 X S; S = S & 15; X = X unsigned>> S");
32:  readasm("sparc32; int32 X S; X = X unsigned>> S");
64:  TYPE Y, Z;
64:  readasm("sparc32; int64 X Y Z S; S.hi = ~S.lo; Z.hi = S.lo << 26; Z.lo = X.hi << 1; Y.lo = X.lo unsigned>> S.lo; Z.lo = Z.lo << S.hi; Y.hi = X.hi unsigned>> S.lo; Z.hi = Z.hi signed>> 31; Y.lo = Y.lo | Z.lo; Z.lo = Y.lo ^ Y.hi; X.hi = Z.hi & Y.hi; Z.lo = Z.hi & Z.lo; X.hi = X.hi ^ Y.hi; X.lo = Y.lo ^ Z.lo");
#else
  X >>= S ^ SIGNED_optblocker;
#endif
  return TYPE_bottombit_01(X);
}

SIGNED:
TYPE TYPE_shlmod(TYPE X,TYPE S) {
#if amd64
 8:  S &= 7;
 8:  readasm("amd64; int8 X S; X <<= S");
16:  S &= 15;
16:  readasm("amd64; int16 X S; X <<= S");
32:  readasm("amd64; int32 X S; X <<= S");
64:  readasm("amd64; int64 X S; X <<= S");
#elif arm64
 8:  S &= 7;
 8:  readasm("arm64; int8 X S; X = (uint8) X; X = X << S");
16:  S &= 15;
16:  readasm("arm64; int16 X S; X = (uint16) X; X = X << S");
32:  readasm("arm64; int32 X S; X = X << S");
64:  readasm("arm64; int64 X S; X = X << S");
#elif arm32
 8:  S &= 7;
 8:  readasm("arm32; int8 X S; X = X << S; X = (int8) X");
16:  S &= 15;
16:  readasm("arm32; int16 X S; X = X << S; X = (int16) X");
32:  S &= 31;
32:  readasm("arm32; int32 X S; X = X << S");
64:  readasm("arm32; int64 X S; S.lo = S.lo & 63; X.hi = X.hi << S.lo; S.hi = S.lo - 32; X.hi = X.hi | (X.lo << S.hi); S.hi = 32 - S.lo; X.hi = X.hi | (X.lo unsigned>> S.hi); X.lo = X.lo << S.lo");
#elif sparc32
 8:  S &= 7; S += 24;
 8:  readasm("sparc32; int8 X S; X = X << S; X = X signed>> 24");
16:  S &= 15; S += 16;
16:  readasm("sparc32; int16 X S; X = X << S; X = X signed>> 16");
32:  readasm("sparc32; int32 X S; X = X << S");
64:  TYPE Y, Z;
64:  readasm("sparc32; int64 X Y Z S; S.hi = ~S.lo; Z.hi = S.lo << 26; Z.lo = X.lo unsigned>> 1; Y.lo = X.lo << S.lo; Z.lo = Z.lo unsigned>> S.hi; Y.hi = X.hi << S.lo; Z.hi = Z.hi signed>> 31; Y.hi = Y.hi | Z.lo; Z.lo = Y.lo ^ Y.hi; X.lo = Z.hi & Y.lo; Z.lo = Z.hi & Z.lo; X.lo = X.lo ^ Y.lo; X.hi = Y.hi ^ Z.lo");
#else
  int K, L;
  for (L = 0,K = 1;K < N;++L,K *= 2)
    X ^= (X ^ (X << K)) & TYPE_bitinrangepublicpos_mask(S,L);
#endif
  return X;
}

UNSIGNED:
TYPE TYPE_shlmod(TYPE X,TYPE S) {
#if amd64
 8:  S &= 7;
 8:  readasm("amd64; int8 X S; X <<= S");
16:  S &= 15;
16:  readasm("amd64; int16 X S; X <<= S");
32:  readasm("amd64; int32 X S; X <<= S");
64:  readasm("amd64; int64 X S; X <<= S");
#elif arm64
 8:  S &= 7;
 8:  readasm("arm64; int8 X S; X = (uint8) X; X = X << S");
16:  S &= 15;
16:  readasm("arm64; int16 X S; X = (uint16) X; X = X << S");
32:  readasm("arm64; int32 X S; X = X << S");
64:  readasm("arm64; int64 X S; X = X << S");
#elif arm32
 8:  S &= 7;
 8:  readasm("arm32; int8 X S; X = X << S; X = (uint8) X");
16:  S &= 15;
16:  readasm("arm32; int16 X S; X = X << S; X = (uint16) X");
32:  S &= 31;
32:  readasm("arm32; int32 X S; X = X << S");
64:  readasm("arm32; int64 X S; S.lo = S.lo & 63; X.hi = X.hi << S.lo; S.hi = S.lo - 32; X.hi = X.hi | (X.lo << S.hi); S.hi = 32 - S.lo; X.hi = X.hi | (X.lo unsigned>> S.hi); X.lo = X.lo << S.lo");
#elif sparc32
 8:  S &= 7;
 8:  readasm("sparc32; int8 X S; X = X << S; X = (uint8) X");
16:  S &= 15;
16:  readasm("sparc32; int16 X S; X = X << S; X = (uint16) X");
32:  readasm("sparc32; int32 X S; X = X << S");
64:  TYPE Y, Z;
64:  readasm("sparc32; int64 X Y Z S; S.hi = ~S.lo; Z.hi = S.lo << 26; Z.lo = X.lo unsigned>> 1; Y.lo = X.lo << S.lo; Z.lo = Z.lo unsigned>> S.hi; Y.hi = X.hi << S.lo; Z.hi = Z.hi signed>> 31; Y.hi = Y.hi | Z.lo; Z.lo = Y.lo ^ Y.hi; X.lo = Z.hi & Y.lo; Z.lo = Z.hi & Z.lo; X.lo = X.lo ^ Y.lo; X.hi = Y.hi ^ Z.lo");
#else
  int K, L;
  for (L = 0,K = 1;K < N;++L,K *= 2)
    X ^= (X ^ (X << K)) & TYPE_bitinrangepublicpos_mask(S,L);
#endif
  return X;
}

SIGNED:
TYPE TYPE_shrmod(TYPE X,TYPE S) {
#if amd64
 8:  S &= 7;
 8:  readasm("amd64; int8 X S; X signed>>= S");
16:  S &= 15;
16:  readasm("amd64; int16 X S; X signed>>= S");
32:  readasm("amd64; int32 X S; X signed>>= S");
64:  readasm("amd64; int64 X S; X signed>>= S");
#elif arm64
 8:  S &= 7;
 8:  readasm("arm64; int8 X S; X = (int8) X; X = X signed>> S");
16:  S &= 15;
16:  readasm("arm64; int16 X S; X = (int16) X; X = X signed>> S");
32:  readasm("arm64; int32 X S; X = X signed>> S");
64:  readasm("arm64; int64 X S; X = X signed>> S");
#elif arm32
 8:  S &= 7;
 8:  readasm("arm32; int8 X S; X = (int8) X; X = X signed>> S");
16:  S &= 15;
16:  readasm("arm32; int16 X S; X = (int16) X; X = X signed>> S");
32:  S &= 31;
32:  readasm("arm32; int32 X S; X = X signed>> S");
64:  readasm("arm32; int64 X S; S.lo = S.lo & 63; X.lo = X.lo unsigned>> S.lo; S.hi = 32 - S.lo; X.lo = X.lo | (X.hi << S.hi); flags, S.hi = S.lo - 32; X.lo = X.lo | (X.hi signed>> S.hi) if unsigned>=; X.hi = X.hi signed>> S.lo");
#elif sparc32
 8:  S &= 7;
 8:  readasm("sparc32; int8 X S; X = X signed>> S");
16:  S &= 15;
16:  readasm("sparc32; int16 X S; X = X signed>> S");
32:  readasm("sparc32; int32 X S; X = X signed>> S");
64:  TYPE Y, Z;
64:  readasm("sparc32; int64 X Y Z S; S.hi = ~S.lo; Z.hi = S.lo << 26; Z.lo = X.hi << 1; Y.lo = X.lo unsigned>> S.lo; Z.lo = Z.lo << S.hi; Y.hi = X.hi signed>> S.lo; Z.hi = Z.hi signed>> 31; Y.lo = Y.lo | Z.lo; Z.lo = Y.lo ^ Y.hi; Z.lo = Z.hi & Z.lo; X.hi = Y.hi signed>> Z.hi; X.lo = Y.lo ^ Z.lo");
#else
  int K, L;
  for (L = 0,K = 1;K < N;++L,K *= 2)
    X ^= (X ^ (X >> K)) & TYPE_bitinrangepublicpos_mask(S,L);
#endif
  return X;
}

UNSIGNED:
TYPE TYPE_shrmod(TYPE X,TYPE S) {
#if amd64
 8:  S &= 7;
 8:  readasm("amd64; int8 X S; X unsigned>>= S");
16:  S &= 15;
16:  readasm("amd64; int16 X S; X unsigned>>= S");
32:  readasm("amd64; int32 X S; X unsigned>>= S");
64:  readasm("amd64; int64 X S; X unsigned>>= S");
#elif arm64
 8:  S &= 7;
 8:  readasm("arm64; int8 X S; X = (uint8) X; X = X unsigned>> S");
16:  S &= 15;
16:  readasm("arm64; int16 X S; X = (uint16) X; X = X unsigned>> S");
32:  readasm("arm64; int32 X S; X = X unsigned>> S");
64:  readasm("arm64; int64 X S; X = X unsigned>> S");
#elif arm32
 8:  S &= 7;
 8:  readasm("arm32; int8 X S; X = (uint8) X; X = X unsigned>> S");
16:  S &= 15;
16:  readasm("arm32; int16 X S; X = (uint16) X; X = X unsigned>> S");
32:  S &= 31;
32:  readasm("arm32; int32 X S; X = X unsigned>> S");
64:  readasm("arm32; int64 X S; S.lo = S.lo & 63; X.lo = X.lo unsigned>> S.lo; S.hi = 32 - S.lo; X.lo = X.lo | (X.hi << S.hi); S.hi = S.lo - 32; X.lo = X.lo | (X.hi unsigned>> S.hi); X.hi = X.hi unsigned>> S.lo");
#elif sparc32
 8:  S &= 7;
 8:  readasm("sparc32; int8 X S; X = X unsigned>> S");
16:  S &= 15;
16:  readasm("sparc32; int16 X S; X = X unsigned>> S");
32:  readasm("sparc32; int32 X S; X = X unsigned>> S");
64:  TYPE Y, Z;
64:  readasm("sparc32; int64 X Y Z S; S.hi = ~S.lo; Z.hi = S.lo << 26; Z.lo = X.hi << 1; Y.lo = X.lo unsigned>> S.lo; Z.lo = Z.lo << S.hi; Y.hi = X.hi unsigned>> S.lo; Z.hi = Z.hi signed>> 31; Y.lo = Y.lo | Z.lo; Z.lo = Y.lo ^ Y.hi; X.hi = Z.hi & Y.hi; Z.lo = Z.hi & Z.lo; X.hi = X.hi ^ Y.hi; X.lo = Y.lo ^ Z.lo");
#else
  int K, L;
  for (L = 0,K = 1;K < N;++L,K *= 2)
    X ^= (X ^ (X >> K)) & TYPE_bitinrangepublicpos_mask(S,L);
#endif
  return X;
}

TYPE TYPE_bitmod_mask(TYPE X,TYPE S) {
  X = TYPE_shrmod(X,S);
  return TYPE_bottombit_mask(X);
}

TYPE TYPE_bitmod_01(TYPE X,TYPE S) {
  X = TYPE_shrmod(X,S);
  return TYPE_bottombit_01(X);
}

TYPE TYPE_nonzero_mask(TYPE X) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Z32; Z32 = 0; Q32 = -1; X32 & (int8) X32; Z32 = Q32 if !=");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Z; Z = 0; Q = -1; X & X; Z = Q if !=");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Z; Z = 0; Q = -1; X & X; Z = Q if !=");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Z; Z = 0; Q = -1; X & X; Z = Q if !=");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Z; X & 255; Z = -1 if != else 0");
16:  readasm("arm64; int16 X Z; X & 65535; Z = -1 if != else 0");
32:  readasm("arm64; int32 X Z; X - 0; Z = -1 if != else 0");
64:  readasm("arm64; int64 X Z; X - 0; Z = -1 if != else 0");
  return Z;
#elif arm32
 8:  readasm("arm32; int8 X; X = (uint8) X; X - 0; X = -1 if !=");
16:  readasm("arm32; int16 X; X = (uint16) X; X - 0; X = -1 if !=");
32:  readasm("arm32; int32 X; X - 0; X = -1 if !=");
64:  readasm("arm32; int64 X; flags, X.lo = X.lo | X.hi; X.lo = -1 if !=; X.hi = X.lo");
  return X;
#elif sparc32
  TYPE Z;
 8:  readasm("sparc32; int8 X Z; X = (uint8) X; 0 - X; Z = -carry");
16:  readasm("sparc32; int16 X Z; X = (uint16) X; 0 - X; Z = -carry");
32:  readasm("sparc32; int32 X Z; 0 - X; Z = -carry");
64:  readasm("sparc32; int64 X Z; Z.lo = X.lo | X.hi; 0 - Z.lo; Z.lo = -carry; Z.hi = Z.lo");
  return Z;
#else
  X |= -X;
  return SIGNED_negative_mask(X);
#endif
}

TYPE TYPE_nonzero_01(TYPE X) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Z32; Z32 = 0; Q32 = 1; X32 & (int8) X32; Z32 = Q32 if !=");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Z; Z = 0; Q = 1; X & X; Z = Q if !=");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Z; Z = 0; Q = 1; X & X; Z = Q if !=");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Z; Z = 0; Q = 1; X & X; Z = Q if !=");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Z; X & 255; Z = 1 if != else 0");
16:  readasm("arm64; int16 X Z; X & 65535; Z = 1 if != else 0");
32:  readasm("arm64; int32 X Z; X - 0; Z = 1 if != else 0");
64:  readasm("arm64; int64 X Z; X - 0; Z = 1 if != else 0");
  return Z;
#elif arm32
 8:  readasm("arm32; int8 X; X = (uint8) X; X - 0; X = 1 if !=");
16:  readasm("arm32; int16 X; X = (uint16) X; X - 0; X = 1 if !=");
32:  readasm("arm32; int32 X; X - 0; X = 1 if !=");
64:  readasm("arm32; int64 X; flags, X.lo = X.lo | X.hi; X.lo = 1 if !=; X.hi = 0");
  return X;
#elif sparc32
  TYPE Z;
 8:  readasm("sparc32; int8 X Z; X = (uint8) X; 0 - X; Z = carry");
16:  readasm("sparc32; int16 X Z; X = (uint16) X; 0 - X; Z = carry");
32:  readasm("sparc32; int32 X Z; 0 - X; Z = carry");
64:  readasm("sparc32; int64 X Z; Z.lo = X.lo | X.hi; 0 - Z.lo; Z.lo = carry; Z.hi = 0");
  return Z;
#else
  X |= -X;
  return UNSIGNED_topbit_01(X);
#endif
}

SIGNED:
TYPE TYPE_positive_mask(TYPE X) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Z32; Z32 = 0; Q32 = -1; X32 & (int8) X32; Z32 = Q32 if signed>");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Z; Z = 0; Q = -1; X & X; Z = Q if signed>");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Z; Z = 0; Q = -1; X & X; Z = Q if signed>");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Z; Z = 0; Q = -1; X & X; Z = Q if signed>");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Z; Z = (int8) X; Z - 0; Z = -1 if signed> else 0");
16:  readasm("arm64; int16 X Z; Z = (int16) X; Z - 0; Z = -1 if signed> else 0");
32:  readasm("arm64; int32 X Z; X - 0; Z = -1 if signed> else 0");
64:  readasm("arm64; int64 X Z; X - 0; Z = -1 if signed> else 0");
  return Z;
#elif sparc32
  TYPE Z;
 8:  readasm("sparc32; int8 X Z; Z = X << 24; Z = Z signed>> 31; Z = Z - X; Z = Z signed>> 31");
16:  readasm("sparc32; int16 X Z; Z = X << 16; Z = Z signed>> 31; Z = Z - X; Z = Z signed>> 31");
32:  readasm("sparc32; int32 X Z; Z = X signed>> 31; Z = Z - X; Z = Z signed>> 31");
64:  readasm("sparc32; int64 X Z; Z.lo = X.hi signed>> 31; carry, Z.hi = Z.lo - X.lo; Z.hi = Z.lo - X.hi - carry; Z.hi = Z.hi signed>> 31; Z.lo = Z.hi");
  return Z;
#else
  TYPE Z = -X;
  Z ^= X & Z;
  return TYPE_negative_mask(Z);
#endif
}

SIGNED:
TYPE TYPE_positive_01(TYPE X) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Z32; Z32 = 0; Q32 = 1; X32 & (int8) X32; Z32 = Q32 if signed>");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Z; Z = 0; Q = 1; X & X; Z = Q if signed>");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Z; Z = 0; Q = 1; X & X; Z = Q if signed>");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Z; Z = 0; Q = 1; X & X; Z = Q if signed>");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Z; Z = (int8) X; Z - 0; Z = 1 if signed> else 0");
16:  readasm("arm64; int16 X Z; Z = (int16) X; Z - 0; Z = 1 if signed> else 0");
32:  readasm("arm64; int32 X Z; X - 0; Z = 1 if signed> else 0");
64:  readasm("arm64; int64 X Z; X - 0; Z = 1 if signed> else 0");
  return Z;
#elif sparc32
  TYPE Z;
 8:  readasm("sparc32; int8 X Z; Z = X << 24; Z = Z signed>> 31; Z = Z - X; Z = Z unsigned>> 31");
16:  readasm("sparc32; int16 X Z; Z = X << 16; Z = Z signed>> 31; Z = Z - X; Z = Z unsigned>> 31");
32:  readasm("sparc32; int32 X Z; Z = X signed>> 31; Z = Z - X; Z = Z unsigned>> 31");
64:  readasm("sparc32; int64 X Z; Z.hi = X.hi signed>> 31; carry, Z.lo = Z.hi - X.lo; Z.lo = Z.hi - X.hi - carry; Z.lo = Z.lo unsigned>> 31; Z.hi = 0");
  return Z;
#else
  TYPE Z = -X;
  Z ^= X & Z;
  return UNSIGNED_topbit_01(Z);
#endif
}

TYPE TYPE_zero_mask(TYPE X) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Z32; Z32 = 0; Q32 = -1; X32 & (int8) X32; Z32 = Q32 if =");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Z; Z = 0; Q = -1; X & X; Z = Q if =");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Z; Z = 0; Q = -1; X & X; Z = Q if =");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Z; Z = 0; Q = -1; X & X; Z = Q if =");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Z; X & 255; Z = -1 if = else 0");
16:  readasm("arm64; int16 X Z; X & 65535; Z = -1 if = else 0");
32:  readasm("arm64; int32 X Z; X - 0; Z = -1 if = else 0");
64:  readasm("arm64; int64 X Z; X - 0; Z = -1 if = else 0");
  return Z;
#elif sparc32
  TYPE Z;
 8:  readasm("sparc32; int8 X Z; X = (uint8) X; 0 - X; Z = carry - 1");
16:  readasm("sparc32; int16 X Z; X = (uint16) X; 0 - X; Z = carry - 1");
32:  readasm("sparc32; int32 X Z; 0 - X; Z = carry - 1");
64:  readasm("sparc32; int64 X Z; Z.lo = X.lo | X.hi; 0 - Z.lo; Z.lo = carry - 1; Z.hi = Z.lo");
  return Z;
#else
  return ~TYPE_nonzero_mask(X);
#endif
}

TYPE TYPE_zero_01(TYPE X) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Z32; Z32 = 0; Q32 = 1; X32 & (int8) X32; Z32 = Q32 if =");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Z; Z = 0; Q = 1; X & X; Z = Q if =");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Z; Z = 0; Q = 1; X & X; Z = Q if =");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Z; Z = 0; Q = 1; X & X; Z = Q if =");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Z; X & 255; Z = 1 if = else 0");
16:  readasm("arm64; int16 X Z; X & 65535; Z = 1 if = else 0");
32:  readasm("arm64; int32 X Z; X - 0; Z = 1 if = else 0");
64:  readasm("arm64; int64 X Z; X - 0; Z = 1 if = else 0");
  return Z;
#elif sparc32
  TYPE Z;
 8:  readasm("sparc32; int8 X Z; X = (uint8) X; 0 - X; Z = 1 - carry");
16:  readasm("sparc32; int16 X Z; X = (uint16) X; 0 - X; Z = 1 - carry");
32:  readasm("sparc32; int32 X Z; 0 - X; Z = 1 - carry");
64:  readasm("sparc32; int64 X Z; Z.lo = X.lo | X.hi; 0 - Z.lo; Z.lo = 1 - carry; Z.hi = 0");
  return Z;
#else
  return 1-TYPE_nonzero_01(X);
#endif
}

TYPE TYPE_unequal_mask(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = -1; X32 - (int8) Y32; Z32 = Q32 if !=");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if !=");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if !=");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if !=");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (uint8) X; Z - (uint8) Y; Z = -1 if != else 0");
16:  readasm("arm64; int16 X Y Z; Z = (uint16) X; Z - (uint16) Y; Z = -1 if != else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = -1 if != else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = -1 if != else 0");
  return Z;
#else
  return TYPE_nonzero_mask(X ^ Y);
#endif
}

TYPE TYPE_unequal_01(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = 1; X32 - (int8) Y32; Z32 = Q32 if !=");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if !=");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if !=");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if !=");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (uint8) X; Z - (uint8) Y; Z = 1 if != else 0");
16:  readasm("arm64; int16 X Y Z; Z = (uint16) X; Z - (uint16) Y; Z = 1 if != else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = 1 if != else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = 1 if != else 0");
  return Z;
#else
  return TYPE_nonzero_01(X ^ Y);
#endif
}

TYPE TYPE_equal_mask(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = -1; X32 - (int8) Y32; Z32 = Q32 if =");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if =");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if =");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if =");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (uint8) X; Z - (uint8) Y; Z = -1 if = else 0");
16:  readasm("arm64; int16 X Y Z; Z = (uint16) X; Z - (uint16) Y; Z = -1 if = else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = -1 if = else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = -1 if = else 0");
  return Z;
#else
  return TYPE_zero_mask(X ^ Y);
#endif
}

TYPE TYPE_equal_01(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = 1; X32 - (int8) Y32; Z32 = Q32 if =");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if =");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if =");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if =");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (uint8) X; Z - (uint8) Y; Z = 1 if = else 0");
16:  readasm("arm64; int16 X Y Z; Z = (uint16) X; Z - (uint16) Y; Z = 1 if = else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = 1 if = else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = 1 if = else 0");
  return Z;
#else
  return TYPE_zero_01(X ^ Y);
#endif
}

SIGNED:
TYPE TYPE_min(TYPE X,TYPE Y) {
#if amd64
 8:  int32_t X32 = X,Y32 = Y;
 8:  readasm("amd64; int32 X32 Y32; X32 - (int8) Y32; X32 = Y32 if signed>");
 8:  X = X32;
16:  readasm("amd64; int16 X Y; X - Y; X = Y if signed>");
32:  readasm("amd64; int32 X Y; X - Y; X = Y if signed>");
64:  readasm("amd64; int64 X Y; X - Y; X = Y if signed>");
  return X;
#elif arm64
 8:  readasm("arm64; int8 X Y; X = (int8) X; X - (int8) Y; X = X if signed< else Y");
16:  readasm("arm64; int16 X Y; X = (int16) X; X - (int16) Y; X = X if signed< else Y");
32:  readasm("arm64; int32 X Y; X - Y; X = X if signed< else Y");
64:  readasm("arm64; int64 X Y; X - Y; X = X if signed< else Y");
  return X;
#else
  TYPE R = Y ^ X;
  TYPE Z = Y - X;
  Z ^= R & (Z ^ Y);
  Z = TYPE_negative_mask(Z);
  Z &= R;
  return X ^ Z;
#endif
}

UNSIGNED:
TYPE TYPE_min(TYPE X,TYPE Y) {
#if amd64
 8:  uint32_t X32 = X,Y32 = Y;
 8:  readasm("amd64; int32 X32 Y32; X32 - (int8) Y32; X32 = Y32 if unsigned>");
 8:  X = X32;
16:  readasm("amd64; int16 X Y; X - Y; X = Y if unsigned>");
32:  readasm("amd64; int32 X Y; X - Y; X = Y if unsigned>");
64:  readasm("amd64; int64 X Y; X - Y; X = Y if unsigned>");
  return X;
#elif arm64
 8:  readasm("arm64; int8 X Y; X = (uint8) X; X - (uint8) Y; X = X if unsigned< else Y");
16:  readasm("arm64; int16 X Y; X = (uint16) X; X - (uint16) Y; X = X if unsigned< else Y");
32:  readasm("arm64; int32 X Y; X - Y; X = X if unsigned< else Y");
64:  readasm("arm64; int64 X Y; X - Y; X = X if unsigned< else Y");
  return X;
#else
  TYPE R = Y ^ X;
  TYPE Z = Y - X;
  Z ^= R & (Z ^ Y ^ (((TYPE) 1) << (N-1)));
  Z = SIGNED_negative_mask(Z);
  Z &= R;
  return X ^ Z;
#endif
}

SIGNED:
TYPE TYPE_max(TYPE X,TYPE Y) {
#if amd64
 8:  int32_t X32 = X,Y32 = Y;
 8:  readasm("amd64; int32 X32 Y32; X32 - (int8) Y32; X32 = Y32 if signed<");
 8:  X = X32;
16:  readasm("amd64; int16 X Y; X - Y; X = Y if signed<");
32:  readasm("amd64; int32 X Y; X - Y; X = Y if signed<");
64:  readasm("amd64; int64 X Y; X - Y; X = Y if signed<");
  return X;
#elif arm64
 8:  readasm("arm64; int8 X Y; X = (int8) X; X - (int8) Y; X = Y if signed< else X");
16:  readasm("arm64; int16 X Y; X = (int16) X; X - (int16) Y; X = Y if signed< else X");
32:  readasm("arm64; int32 X Y; X - Y; X = Y if signed< else X");
64:  readasm("arm64; int64 X Y; X - Y; X = Y if signed< else X");
  return X;
#else
  TYPE R = Y ^ X;
  TYPE Z = Y - X;
  Z ^= R & (Z ^ Y);
  Z = TYPE_negative_mask(Z);
  Z &= R;
  return Y ^ Z;
#endif
}

UNSIGNED:
TYPE TYPE_max(TYPE X,TYPE Y) {
#if amd64
 8:  uint32_t X32 = X,Y32 = Y;
 8:  readasm("amd64; int32 X32 Y32; X32 - (int8) Y32; X32 = Y32 if unsigned<");
 8:  X = X32;
16:  readasm("amd64; int16 X Y; X - Y; X = Y if unsigned<");
32:  readasm("amd64; int32 X Y; X - Y; X = Y if unsigned<");
64:  readasm("amd64; int64 X Y; X - Y; X = Y if unsigned<");
  return X;
#elif arm64
 8:  readasm("arm64; int8 X Y; X = (uint8) X; X - (uint8) Y; X = Y if unsigned< else X");
16:  readasm("arm64; int16 X Y; X = (uint16) X; X - (uint16) Y; X = Y if unsigned< else X");
32:  readasm("arm64; int32 X Y; X - Y; X = Y if unsigned< else X");
64:  readasm("arm64; int64 X Y; X - Y; X = Y if unsigned< else X");
  return X;
#else
  TYPE R = Y ^ X;
  TYPE Z = Y - X;
  Z ^= R & (Z ^ Y ^ (((TYPE) 1) << (N-1)));
  Z = SIGNED_negative_mask(Z);
  Z &= R;
  return Y ^ Z;
#endif
}

SIGNED:
void TYPE_minmax(TYPE *P,TYPE *Q) {
  TYPE X = *P;
  TYPE Y = *Q;
#if amd64
 8:  int32_t X32 = X,Y32 = Y,Z32;
 8:  readasm("amd64; int32 X32 Y32 Z32; X32 - (int8) Y32; Z32 = X32; X32 = Y32 if signed>; Y32 = Z32 if signed>");
 8:  X = X32; Y = Y32;
16:  TYPE Z;
16:  readasm("amd64; int16 X Y Z; X - Y; Z = X; X = Y if signed>; Y = Z if signed>");
32:  TYPE Z;
32:  readasm("amd64; int32 X Y Z; X - Y; Z = X; X = Y if signed>; Y = Z if signed>");
64:  TYPE Z;
64:  readasm("amd64; int64 X Y Z; X - Y; Z = X; X = Y if signed>; Y = Z if signed>");
  *P = X;
  *Q = Y;
#elif arm64
  TYPE R, S;
 8:  readasm("arm64; int8 X Y R S; X = (int8) X; X - (int8) Y; R = X if signed< else Y; S = Y if signed< else X");
16:  readasm("arm64; int16 X Y R S; X = (int16) X; X - (int16) Y; R = X if signed< else Y; S = Y if signed< else X");
32:  readasm("arm64; int32 X Y R S; X - Y; R = X if signed< else Y; S = Y if signed< else X");
64:  readasm("arm64; int64 X Y R S; X - Y; R = X if signed< else Y; S = Y if signed< else X");
  *P = R;
  *Q = S;
#else
  TYPE R = Y ^ X;
  TYPE Z = Y - X;
  Z ^= R & (Z ^ Y);
  Z = TYPE_negative_mask(Z);
  Z &= R;
  X ^= Z;
  Y ^= Z;
  *P = X;
  *Q = Y;
#endif
}

UNSIGNED:
void TYPE_minmax(TYPE *P,TYPE *Q) {
  TYPE X = *P;
  TYPE Y = *Q;
#if amd64
 8:  uint32_t X32 = X,Y32 = Y,Z32;
 8:  readasm("amd64; int32 X32 Y32 Z32; X32 - (int8) Y32; Z32 = X32; X32 = Y32 if unsigned>; Y32 = Z32 if unsigned>");
 8:  X = X32; Y = Y32;
16:  TYPE Z;
16:  readasm("amd64; int16 X Y Z; X - Y; Z = X; X = Y if unsigned>; Y = Z if unsigned>");
32:  TYPE Z;
32:  readasm("amd64; int32 X Y Z; X - Y; Z = X; X = Y if unsigned>; Y = Z if unsigned>");
64:  TYPE Z;
64:  readasm("amd64; int64 X Y Z; X - Y; Z = X; X = Y if unsigned>; Y = Z if unsigned>");
  *P = X;
  *Q = Y;
#elif arm64
  TYPE R, S;
 8:  readasm("arm64; int8 X Y R S; X = (uint8) X; X - (uint8) Y; R = X if unsigned< else Y; S = Y if unsigned< else X");
16:  readasm("arm64; int16 X Y R S; X = (uint16) X; X - (uint16) Y; R = X if unsigned< else Y; S = Y if unsigned< else X");
32:  readasm("arm64; int32 X Y R S; X - Y; R = X if unsigned< else Y; S = Y if unsigned< else X");
64:  readasm("arm64; int64 X Y R S; X - Y; R = X if unsigned< else Y; S = Y if unsigned< else X");
  *P = R;
  *Q = S;
#else
  TYPE R = Y ^ X;
  TYPE Z = Y - X;
  Z ^= R & (Z ^ Y ^ (((TYPE) 1) << (N-1)));
  Z = SIGNED_negative_mask(Z);
  Z &= R;
  X ^= Z;
  Y ^= Z;
  *P = X;
  *Q = Y;
#endif
}

SIGNED:
TYPE TYPE_smaller_mask(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = -1; X32 - (int8) Y32; Z32 = Q32 if signed<");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if signed<");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if signed<");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if signed<");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (int8) X; Z - (int8) Y; Z = -1 if signed< else 0");
16:  readasm("arm64; int16 X Y Z; Z = (int16) X; Z - (int16) Y; Z = -1 if signed< else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = -1 if signed< else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = -1 if signed< else 0");
  return Z;
#else
  TYPE R = X ^ Y;
  TYPE Z = X - Y;
  Z ^= R & (Z ^ X);
  return TYPE_negative_mask(Z);
#endif
}

SIGNED:
TYPE TYPE_smaller_01(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = 1; X32 - (int8) Y32; Z32 = Q32 if signed<");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if signed<");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if signed<");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if signed<");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (int8) X; Z - (int8) Y; Z = 1 if signed< else 0");
16:  readasm("arm64; int16 X Y Z; Z = (int16) X; Z - (int16) Y; Z = 1 if signed< else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = 1 if signed< else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = 1 if signed< else 0");
  return Z;
#else
  TYPE R = X ^ Y;
  TYPE Z = X - Y;
  Z ^= R & (Z ^ X);
  return UNSIGNED_topbit_01(Z);
#endif
}

UNSIGNED:
TYPE TYPE_smaller_mask(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = -1; X32 - (int8) Y32; Z32 = Q32 if unsigned<");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if unsigned<");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if unsigned<");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if unsigned<");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (uint8) X; Z - (uint8) Y; Z = -1 if unsigned< else 0");
16:  readasm("arm64; int16 X Y Z; Z = (uint16) X; Z - (uint16) Y; Z = -1 if unsigned< else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = -1 if unsigned< else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = -1 if unsigned< else 0");
  return Z;
#else
  TYPE R = X ^ Y;
  TYPE Z = X - Y;
  Z ^= R & (Z ^ X ^ (((TYPE) 1) << (N-1)));
  return SIGNED_negative_mask(Z);
#endif
}

UNSIGNED:
TYPE TYPE_smaller_01(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = 1; X32 - (int8) Y32; Z32 = Q32 if unsigned<");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if unsigned<");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if unsigned<");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if unsigned<");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (uint8) X; Z - (uint8) Y; Z = 1 if unsigned< else 0");
16:  readasm("arm64; int16 X Y Z; Z = (uint16) X; Z - (uint16) Y; Z = 1 if unsigned< else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = 1 if unsigned< else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = 1 if unsigned< else 0");
  return Z;
#else
  TYPE R = X ^ Y;
  TYPE Z = X - Y;
  Z ^= R & (Z ^ X ^ (((TYPE) 1) << (N-1)));
  return UNSIGNED_topbit_01(Z);
#endif
}

SIGNED:
TYPE TYPE_leq_mask(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = -1; X32 - (int8) Y32; Z32 = Q32 if signed<=");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if signed<=");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if signed<=");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if signed<=");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (int8) X; Z - (int8) Y; Z = -1 if signed<= else 0");
16:  readasm("arm64; int16 X Y Z; Z = (int16) X; Z - (int16) Y; Z = -1 if signed<= else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = -1 if signed<= else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = -1 if signed<= else 0");
  return Z;
#else
  return ~TYPE_smaller_mask(Y,X);
#endif
}

SIGNED:
TYPE TYPE_leq_01(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = 1; X32 - (int8) Y32; Z32 = Q32 if signed<=");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if signed<=");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if signed<=");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if signed<=");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (int8) X; Z - (int8) Y; Z = 1 if signed<= else 0");
16:  readasm("arm64; int16 X Y Z; Z = (int16) X; Z - (int16) Y; Z = 1 if signed<= else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = 1 if signed<= else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = 1 if signed<= else 0");
  return Z;
#else
  return 1-TYPE_smaller_01(Y,X);
#endif
}

UNSIGNED:
TYPE TYPE_leq_mask(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = -1; X32 - (int8) Y32; Z32 = Q32 if unsigned<=");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if unsigned<=");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if unsigned<=");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = -1; X - Y; Z = Q if unsigned<=");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (uint8) X; Z - (uint8) Y; Z = -1 if unsigned<= else 0");
16:  readasm("arm64; int16 X Y Z; Z = (uint16) X; Z - (uint16) Y; Z = -1 if unsigned<= else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = -1 if unsigned<= else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = -1 if unsigned<= else 0");
  return Z;
#else
  return ~TYPE_smaller_mask(Y,X);
#endif
}

UNSIGNED:
TYPE TYPE_leq_01(TYPE X,TYPE Y) {
#if amd64
 8:  TYPE Z;
 8:  int32_t X32 = X,Y32 = Y,Q32,Z32;
 8:  readasm("amd64; int32 X32 Q32 Y32 Z32; Z32 = 0; Q32 = 1; X32 - (int8) Y32; Z32 = Q32 if unsigned<=");
 8:  Z = Z32;
16:  TYPE Q,Z;
16:  readasm("amd64; int16 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if unsigned<=");
32:  TYPE Q,Z;
32:  readasm("amd64; int32 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if unsigned<=");
64:  TYPE Q,Z;
64:  readasm("amd64; int64 X Q Y Z; Z = 0; Q = 1; X - Y; Z = Q if unsigned<=");
  return Z;
#elif arm64
  TYPE Z;
 8:  readasm("arm64; int8 X Y Z; Z = (uint8) X; Z - (uint8) Y; Z = 1 if unsigned<= else 0");
16:  readasm("arm64; int16 X Y Z; Z = (uint16) X; Z - (uint16) Y; Z = 1 if unsigned<= else 0");
32:  readasm("arm64; int32 X Y Z; X - Y; Z = 1 if unsigned<= else 0");
64:  readasm("arm64; int64 X Y Z; X - Y; Z = 1 if unsigned<= else 0");
  return Z;
#else
  return 1-TYPE_smaller_01(Y,X);
#endif
}

int TYPE_ones_num(TYPE X) {
  UNSIGNED Y = X;
 8:  const TYPE C0 = 0x55;
16:  const TYPE C0 = 0x5555;
32:  const TYPE C0 = 0x55555555;
64:  const TYPE C0 = 0x5555555555555555;
 8:  const TYPE C1 = 0x33;
16:  const TYPE C1 = 0x3333;
32:  const TYPE C1 = 0x33333333;
64:  const TYPE C1 = 0x3333333333333333;
 8:  const TYPE C2 = 0x0f;
16:  const TYPE C2 = 0x0f0f;
32:  const TYPE C2 = 0x0f0f0f0f;
64:  const TYPE C2 = 0x0f0f0f0f0f0f0f0f;
  Y -= ((Y >> 1) & C0);
  Y = (Y & C1) + ((Y >> 2) & C1);
  Y = (Y + (Y >> 4)) & C2;
16:  Y = (Y + (Y >> 8)) & 0xff;
32:  Y += Y >> 8;
32:  Y = (Y + (Y >> 16)) & 0xff;
64:  Y += Y >> 8;
64:  Y += Y >> 16;
64:  Y = (Y + (Y >> 32)) & 0xff;
  return Y;
}

int TYPE_bottomzeros_num(TYPE X) {
#if amd64
 8:  int32_t fallback = N;
 8:  int32_t X32 = X;
 8:  readasm("amd64; int32 X32 fallback; X32 = numbottomzeros_tricky X32; X32 = fallback if =");
 8:  X = X32;
16:  TYPE fallback = N;
16:  readasm("amd64; int16 X fallback; X = numbottomzeros_tricky X; X = fallback if =");
32:  TYPE fallback = N;
32:  readasm("amd64; int32 X fallback; X = numbottomzeros_tricky X; X = fallback if =");
64:  TYPE fallback = N;
64:  readasm("amd64; int64 X fallback; X = numbottomzeros_tricky X; X = fallback if =");
  return X;
#elif arm64
  int64_t Y;
 8:  readasm("arm64; int8 X Y; Y = X | -256; Y = bitrev32 Y; Y = numbottomzeros Y");
16:  readasm("arm64; int16 X Y; Y = X | -65536; Y = bitrev32 Y; Y = numbottomzeros Y");
32:  readasm("arm64; int32 X Y; Y = bitrev32 X; Y = numbottomzeros Y");
64:  readasm("arm64; int64 X Y; Y = bitrev64 X; Y = numbottomzeros Y");
  return Y;
#else
  TYPE Y = X ^ (X-1);
  Y = ((SIGNED) Y) >> 1;
  Y &= ~(X & (((TYPE) 1) << (N-1)));
  return TYPE_ones_num(Y);
#endif
}
