diff --git a/lib/builtins/CMakeLists.txt b/lib/builtins/CMakeLists.txt index 8ba3a4e26..329ff05e6 100644 --- a/lib/builtins/CMakeLists.txt +++ b/lib/builtins/CMakeLists.txt @@ -181,7 +181,12 @@ set(MSVC_SOURCES divxc3.c mulsc3.c muldc3.c - mulxc3.c) + mulxc3.c + divti3.c + modti3.c + udivti3.c + umodti3.c + udivmodti4.c) if(APPLE) diff --git a/lib/builtins/divti3.c b/lib/builtins/divti3.c index c73eae28f..f055614ef 100644 --- a/lib/builtins/divti3.c +++ b/lib/builtins/divti3.c @@ -18,6 +18,28 @@ /* Returns: a / b */ +#if defined(_MSC_VER) +COMPILER_RT_ABI ti_int +__divti3(ti_int a, ti_int b) +{ + ti_int s_a = { 0, 0 }; + ti_int s_b = { 0, 0 }; + if (a.high < 0) { + s_a.high = -1; + s_a.low = (du_int)(-1); + a = subti3(xorti3(a, s_a), s_a); + } + if (b.high < 0) { + s_b.high = -1; + s_b.low = (du_int)(-1); + b = subti3(xorti3(b, s_b), s_b); + }; + s_a = xorti3(s_a, s_b); + tu_int tu_a = titotu(a); + tu_int tu_b = titotu(b); + return subti3(xorti3(tutoti(__udivmodti4(tu_a, tu_b, (tu_int*)0)), s_a), s_a); +} +#else COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) { @@ -29,5 +51,6 @@ __divti3(ti_int a, ti_int b) s_a ^= s_b; /* sign of quotient */ return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */ } +#endif #endif /* CRT_HAS_128BIT */ diff --git a/lib/builtins/int_lib.h b/lib/builtins/int_lib.h index 39eee18d9..d94d57685 100644 --- a/lib/builtins/int_lib.h +++ b/lib/builtins/int_lib.h @@ -126,4 +126,129 @@ uint32_t __inline __builtin_clzll(uint64_t value) { #define __builtin_clzl __builtin_clzll #endif /* defined(_MSC_VER) && !defined(__clang__) */ +#if defined(CRT_HAS_128BIT) +#if defined(_MSC_VER) && !defined(__clang__) +unsigned long long __inline __builtin_ctzll(unsigned long long x) { + unsigned long r; + _BitScanForward64(&r, x); + return r; +} + +tu_int __inline subtu3(tu_int a, tu_int b) { + tu_int n_b; + n_b.high = b.high ^ (du_int)(-1); + n_b.low = b.low ^ (du_int)(-1); + n_b.low++; + if (n_b.low < b.low) { + n_b.high++; + } + + tu_int sum; + sum.high = a.high + n_b.high; + sum.low = a.low + n_b.low; + if (sum.low < a.low) { + sum.high++; + } + + return sum; +} + +ti_int __inline subti3(ti_int a, ti_int b) { + tu_int u_a; + u_a.high = (du_int)(a.high); + u_a.low = a.low; + + tu_int u_b; + u_b.high = (du_int)(b.high); + u_b.low = b.low; + + tu_int sum = subtu3(u_a, u_b); + + ti_int ret; + ret.high = (di_int)(sum.high); + ret.low = sum.low; + return ret; +} + +ti_int __inline xorti3(ti_int a, ti_int b) { + ti_int ret; + ret.high = (di_int)((du_int)(a.high) ^ (du_int)(b.high)); + ret.low = a.low ^ b.low; + return ret; +} + +ti_int __inline rshiftti3(ti_int ti, unsigned char shift) { + ti_int ret; + ret.high = ti.high >> shift; + ret.low = (du_int)(__shiftright128(ti.low, ti.high, shift)); + return ret; +} + +tu_int __inline lshifttu3(tu_int tu, unsigned char shift) { + tu_int ret; + ret.high = (du_int)(__shiftleft128(tu.low, (unsigned __int64)(tu.high), shift)); + ret.low = tu.low << shift; + return ret; +} + +tu_int __inline ditotu(di_int di) { + tu_int tu; + tu.high = di < 0 ? (du_int)(-1) : 0; + tu.low = di; + return tu; +} + +tu_int __inline titotu(ti_int ti) { + tu_int tu; + tu.high = (du_int)(ti.high); + tu.low = ti.low; + return tu; +} + +ti_int __inline tutoti(tu_int tu) { + ti_int ti; + ti.high = (di_int)(tu.high); + ti.low = tu.low; + return ti; +} + +tu_int __inline dutotu(du_int du) { + tu_int tu; + tu.high = 0; + tu.low = du; + return tu; +} + +tu_int __inline itotu(int i) { + tu_int tu; + tu.high = i < 0 ? (du_int)(-1) : 0; + tu.low = i; + return tu; +} + +ti_int __inline itoti(int i) { + ti_int ti; + ti.high = i < 0 ? -1 : 0; + ti.low = i; + return ti; +} +#else +tu_int __inline ditotu(di_int di) { + return di; +} + +tu_int __inline titotu(ti_int ti) { + return ti; +} + +tu_int __inline dutotu(du_int du) { + return du; +} + +tu_int __inline itotu(int i) { + return i; +} +#endif /* defined(_MSC_VER) && !defined(__clang__) */ +#endif /* defined(CRT_HAS_128BIT) */ + #endif /* INT_LIB_H */ diff --git a/lib/builtins/int_types.h b/lib/builtins/int_types.h index 660385ecd..8e33b6b4c 100644 --- a/lib/builtins/int_types.h +++ b/lib/builtins/int_types.h @@ -67,8 +67,29 @@ typedef union #endif #ifdef CRT_HAS_128BIT +#if defined(_MSC_VER) +typedef struct { +#if _YUGA_LITTLE_ENDIAN + du_int low; + di_int high; +#else + di_int high; + du_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ +} ti_int; +typedef struct { +#if _YUGA_LITTLE_ENDIAN + du_int low; + du_int high; +#else + du_int high; + du_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ +} tu_int; +#else typedef int ti_int __attribute__ ((mode (TI))); typedef unsigned tu_int __attribute__ ((mode (TI))); +#endif typedef union { diff --git a/lib/builtins/modti3.c b/lib/builtins/modti3.c index d505c07ac..b8b1925bd 100644 --- a/lib/builtins/modti3.c +++ b/lib/builtins/modti3.c @@ -18,6 +18,29 @@ /*Returns: a % b */ +#if defined(_MSC_VER) +COMPILER_RT_ABI ti_int +__modti3(ti_int a, ti_int b) +{ + ti_int s_a = { 0, 0 }; + ti_int s_b = { 0, 0 }; + if (a.high < 0) { + s_a.high = -1; + s_a.low = (du_int)(-1); + a = subti3(xorti3(a, s_a), s_a); + } + if (b.high < 0) { + s_b.high = -1; + s_b.low = (du_int)(-1); + b = subti3(xorti3(b, s_b), s_b); + }; + tu_int r; + tu_int tu_a = titotu(a); + tu_int tu_b = titotu(b); + __udivmodti4(tu_a, tu_b, &r); + return subti3(xorti3(tutoti(r), s_a), s_a); +} +#else COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) { @@ -30,5 +53,6 @@ __modti3(ti_int a, ti_int b) __udivmodti4(a, b, &r); return ((ti_int)r ^ s) - s; /* negate if s == -1 */ } +#endif #endif /* CRT_HAS_128BIT */ diff --git a/lib/builtins/udivmodti4.c b/lib/builtins/udivmodti4.c index 803168849..d0878f25c 100644 --- a/lib/builtins/udivmodti4.c +++ b/lib/builtins/udivmodti4.c @@ -44,16 +44,16 @@ __udivmodti4(tu_int a, tu_int b, tu_int* rem) * 0 X */ if (rem) - *rem = n.s.low % d.s.low; - return n.s.low / d.s.low; + *rem = ditotu(n.s.low % d.s.low); + return ditotu(n.s.low / d.s.low); } /* 0 X * --- * K X */ if (rem) - *rem = n.s.low; - return 0; + *rem = ditotu(n.s.low); + return ditotu(0); } /* n.s.high != 0 */ if (d.s.low == 0) @@ -65,8 +65,8 @@ __udivmodti4(tu_int a, tu_int b, tu_int* rem) * 0 0 */ if (rem) - *rem = n.s.high % d.s.low; - return n.s.high / d.s.low; + *rem = ditotu(n.s.high % d.s.low); + return ditotu(n.s.high / d.s.low); } /* d.s.high != 0 */ if (n.s.low == 0) @@ -81,7 +81,7 @@ __udivmodti4(tu_int a, tu_int b, tu_int* rem) r.s.low = 0; *rem = r.all; } - return n.s.high / d.s.high; + return dutotu(n.s.high / d.s.high); } /* K K * --- @@ -95,7 +95,7 @@ __udivmodti4(tu_int a, tu_int b, tu_int* rem) r.s.high = n.s.high & (d.s.high - 1); *rem = r.all; } - return n.s.high >> __builtin_ctzll(d.s.high); + return dutotu(n.s.high >> __builtin_ctzll(d.s.high)); } /* K K * --- @@ -107,7 +107,7 @@ __udivmodti4(tu_int a, tu_int b, tu_int* rem) { if (rem) *rem = n.all; - return 0; + return itotu(0); } ++sr; /* 1 <= sr <= n_udword_bits - 1 */ @@ -129,7 +129,7 @@ __udivmodti4(tu_int a, tu_int b, tu_int* rem) if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ { if (rem) - *rem = n.s.low & (d.s.low - 1); + *rem = dutotu(n.s.low & (d.s.low - 1)); if (d.s.low == 1) return n.all; sr = __builtin_ctzll(d.s.low); @@ -182,7 +182,7 @@ __udivmodti4(tu_int a, tu_int b, tu_int* rem) { if (rem) *rem = n.all; - return 0; + return itotu(0); } ++sr; /* 1 <= sr <= n_udword_bits @@ -225,11 +225,30 @@ __udivmodti4(tu_int a, tu_int b, tu_int* rem) * carry = 1; * } */ +#if defined(_MSC_VER) + ti_int sub = tutoti(subtu3(subtu3(d.all, r.all), itotu(1))); + unsigned char shift = (unsigned char)(n_utword_bits - 1); + ti_int s = rshiftti3(sub, shift); + + carry = s.low & 1; + + tu_int mask; + mask.high = d.all.high & (du_int)(s.high); + mask.low = d.all.low & s.low; + r.all = subtu3(r.all, mask); +#else const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); carry = s & 1; r.all -= d.all & s; +#endif } +#if defined(_MSC_VER) + tu_int shifted = lshifttu3(q.all, 1); + q.all.high = shifted.high; + q.all.low = shifted.low | carry; +#else q.all = (q.all << 1) | carry; +#endif if (rem) *rem = r.all; return q.all;